pickle-to-skops / app.py
adrin's picture
Update app.py
b43a211
import os
import pickle
import tempfile
import warnings
from io import BytesIO
from pathlib import Path
from uuid import uuid4
import gradio as gr
import joblib
from huggingface_hub import upload_file
from skops import io as sio
title = "skops converter"
desc = """
# Pickle to skops converter
This space converts your pickle files to skops format. You can read more on the
skops format [here]( https://skops.readthedocs.io/en/stable/persistence.html).
You can use `skops.io.dump(joblib.load(in_file), out_file)` to do the
conversion yourself, where `in_file` is your source pickle file and `out_file`
is where you want to save the skops file. But only do that **if you trust the
source of the pickle file**.
You can then use `skops.io.load(skops_file, trusted=unknown_types)` to load the
file, where `skops_file` is the converted skops format file, and the
`unknown_types` is what you see in the "Unknown Types" box bellow. You can also
locally reproduce this list using
`skops.io.get_untrusted_types(file=skops_file)`. You should only load a `skops`
file that you trust all the types included in the `unknown_types` list.
## Requirements
This space assumes you have used the latest `joblib` and `scikit-learn`
versions installed on your environment to create the pickle file.
## Reporting issues
If you encounter an issue, please open an issue on the project's repository
on the [issue tracker](
https://github.com/skops-dev/skops/issues/new?title=CONVERSION+error+from+hf.space&body=Paste+the+error+message+and+a+link+to+your+pickle+file+here+please)
"""
def convert(file, store):
msg = ""
try:
with warnings.catch_warnings(record=True) as record:
in_file = Path(file.name)
if store:
upload_file(
path_or_fileobj=str(in_file),
path_in_repo=f"{uuid4()}/{in_file.name}",
repo_id="scikit-learn/pickle-to-skops",
repo_type="dataset",
token=os.environ["HF_TOKEN"],
)
try:
obj = joblib.load(in_file)
except:
with open(in_file, "rb") as f:
obj = pickle.load(f)
if "." in in_file.name:
out_file = ".".join(in_file.name.split(".")[:-1])
else:
out_file = in_file.name
out_file += ".skops"
path = tempfile.mkdtemp(prefix="gradio-convert-")
out_file = Path(path) / out_file
sio.dump(obj, out_file)
unknown_types = sio.get_untrusted_types(file=out_file)
if len(record):
msg = "\n".join([repr(w.message) for w in record])
except Exception as e:
return None, None, repr(e)
return out_file, unknown_types, msg
with gr.Blocks(title=title) as iface:
gr.Markdown(desc)
store = gr.Checkbox(
label=(
"Store a copy: if you leave this box checked, we store a copy of your"
" pickle file in a private place, only used for us to find issues and"
" improve the skops format. Please uncheck this box if your pickle file"
" includes any personal or sensitive data."
),
value=True,
)
upload_button = gr.UploadButton(
"Click to Upload a File",
file_types=None,
file_count="single",
)
file_output = gr.File(label="Converted File")
upload_button.upload(
convert,
[upload_button, store],
[
file_output,
gr.Text(label="Unknown Types"),
gr.Text(label="Errors and Warnings"),
],
api_name="upload-file",
)
iface.launch(debug=True)