File size: 3,596 Bytes
e987d7b
b47dcdb
e987d7b
 
 
 
 
3531f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899ca8c
3531f81
899ca8c
3531f81
 
899ca8c
3531f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899ca8c
3531f81
 
 
 
 
 
 
899ca8c
3531f81
 
 
 
 
 
 
899ca8c
 
3531f81
 
 
 
 
 
 
899ca8c
3531f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3eab446
3531f81
 
 
 
 
 
3eab446
3531f81
 
 
 
 
 
 
3eab446
3531f81
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Start by setting token and debug mode before starting schedulers
import os
from huggingface_hub import logging, login
login(token=os.environ.get("HF_TOKEN"), write_permission=True)
logging.set_verbosity_debug()

# Start apps
from pathlib import Path

import gradio as gr

from app_1M_image import get_demo as get_demo_1M_image
from app_image import get_demo as get_demo_image
from app_json import get_demo as get_demo_json

def _get_demo_code(path: str) -> str:
    code = Path(path).read_text()
    code = code.replace("def get_demo():", "with gr.Blocks() as demo:")
    code += "\n\ndemo.launch()"
    return code


DEMO_EXPLANATION = """
<h1 style='text-align: center; margin-bottom: 1rem'> How to persist data from a Space to a Dataset? </h1>

This demo shows how to leverage `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub.
When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits,
number of files, and more. The tabs below show different ways of implementing a "save to dataset" feature. Depending on the
complexity and usage of your app, you might want to use one or the other.

This Space comes as a demo for this `huggingface_hub` [guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). Please check it out if you need more technical details.
"""

JSON_DEMO_EXPLANATION = """
## Use case

- Save inputs and outputs
- Build an annotation platform

## Data

Json-able only: text and numeric but no binaries.

## Robustness

Works with concurrent users and replicas.

## Limitations

If you expect millions of lines, you must split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub.

## Demo
"""

IMAGE_DEMO_EXPLANATION = """
## Use case

Save images with metadata (caption, parameters, datetime, etc.).

## Robustness

Works with concurrent users and replicas.

## Limitations

  - only 10k images/folder are supported on the Hub. If you expect more usage, you must save data in subfolders.
  - only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before uploading. See the _1M images Dataset_ demo.

## Demo
"""

IMAGE_1M_DEMO_EXPLANATION = """
## Use case:

Save 1M images with metadata (caption, parameters, datetime, etc.).

## Robustness

Works with concurrent users and replicas.

## Limitations

None.

## Demo
"""

with gr.Blocks() as demo:
    gr.Markdown(DEMO_EXPLANATION)

    with gr.Tab("JSON Dataset"):
        gr.Markdown(JSON_DEMO_EXPLANATION)
        get_demo_json()
        gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-json\n\n## Code")
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_json.py"), language="python")

    with gr.Tab("Image Dataset"):
        gr.Markdown(IMAGE_DEMO_EXPLANATION)
        get_demo_image()
        gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image\n\n## Code")
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_image.py"), language="python")

    with gr.Tab("1M images Dataset"):
        gr.Markdown(IMAGE_1M_DEMO_EXPLANATION)
        get_demo_1M_image()
        gr.Markdown(
            "## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image-zip\n\n## Code"
        )
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_1M_image.py"), language="python")
demo.launch()