|
|
|
import os |
|
from huggingface_hub import logging, login |
|
login(token=os.environ.get("HF_TOKEN"), write_permission=True) |
|
logging.set_verbosity_debug() |
|
|
|
|
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
|
|
from app_1M_image import get_demo as get_demo_1M_image |
|
from app_image import get_demo as get_demo_image |
|
from app_json import get_demo as get_demo_json |
|
|
|
def _get_demo_code(path: str) -> str: |
|
code = Path(path).read_text() |
|
code = code.replace("def get_demo():", "with gr.Blocks() as demo:") |
|
code += "\n\ndemo.launch()" |
|
return code |
|
|
|
|
|
DEMO_EXPLANATION = """ |
|
<h1 style='text-align: center; margin-bottom: 1rem'> How to persist data from a Space to a Dataset? </h1> |
|
|
|
This demo shows how to leverage `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub. |
|
When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits, |
|
number of files, and more. The tabs below show different ways of implementing a "save to dataset" feature. Depending on the |
|
complexity and usage of your app, you might want to use one or the other. |
|
|
|
This Space comes as a demo for this `huggingface_hub` [guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). Please check it out if you need more technical details. |
|
""" |
|
|
|
JSON_DEMO_EXPLANATION = """ |
|
## Use case |
|
|
|
- Save inputs and outputs |
|
- Build an annotation platform |
|
|
|
## Data |
|
|
|
Json-able only: text and numeric but no binaries. |
|
|
|
## Robustness |
|
|
|
Works with concurrent users and replicas. |
|
|
|
## Limitations |
|
|
|
If you expect millions of lines, you must split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub. |
|
|
|
## Demo |
|
""" |
|
|
|
IMAGE_DEMO_EXPLANATION = """ |
|
## Use case |
|
|
|
Save images with metadata (caption, parameters, datetime, etc.). |
|
|
|
## Robustness |
|
|
|
Works with concurrent users and replicas. |
|
|
|
## Limitations |
|
|
|
- only 10k images/folder are supported on the Hub. If you expect more usage, you must save data in subfolders. |
|
- only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before uploading. See the _1M images Dataset_ demo. |
|
|
|
## Demo |
|
""" |
|
|
|
IMAGE_1M_DEMO_EXPLANATION = """ |
|
## Use case: |
|
|
|
Save 1M images with metadata (caption, parameters, datetime, etc.). |
|
|
|
## Robustness |
|
|
|
Works with concurrent users and replicas. |
|
|
|
## Limitations |
|
|
|
None. |
|
|
|
## Demo |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(DEMO_EXPLANATION) |
|
|
|
with gr.Tab("JSON Dataset"): |
|
gr.Markdown(JSON_DEMO_EXPLANATION) |
|
get_demo_json() |
|
gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-json\n\n## Code") |
|
with gr.Accordion("Source code", open=True): |
|
gr.Code(_get_demo_code("app_json.py"), language="python") |
|
|
|
with gr.Tab("Image Dataset"): |
|
gr.Markdown(IMAGE_DEMO_EXPLANATION) |
|
get_demo_image() |
|
gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image\n\n## Code") |
|
with gr.Accordion("Source code", open=True): |
|
gr.Code(_get_demo_code("app_image.py"), language="python") |
|
|
|
with gr.Tab("1M images Dataset"): |
|
gr.Markdown(IMAGE_1M_DEMO_EXPLANATION) |
|
get_demo_1M_image() |
|
gr.Markdown( |
|
"## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image-zip\n\n## Code" |
|
) |
|
with gr.Accordion("Source code", open=True): |
|
gr.Code(_get_demo_code("app_1M_image.py"), language="python") |
|
demo.launch() |
|
|