Spaces:
Runtime error
Runtime error
annotations among languages resdy
Browse files- .gitignore +80 -0
- README.md +3 -5
- app.py +248 -0
- requirements.txt +72 -0
.gitignore
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# Distribution / packaging
|
7 |
+
.Python
|
8 |
+
build/
|
9 |
+
develop-eggs/
|
10 |
+
dist/
|
11 |
+
downloads/
|
12 |
+
eggs/
|
13 |
+
.eggs/
|
14 |
+
lib/
|
15 |
+
lib64/
|
16 |
+
parts/
|
17 |
+
sdist/
|
18 |
+
var/
|
19 |
+
wheels/
|
20 |
+
share/python-wheels/
|
21 |
+
*.egg-info/
|
22 |
+
.installed.cfg
|
23 |
+
*.egg
|
24 |
+
MANIFEST
|
25 |
+
|
26 |
+
# IDEs and editors
|
27 |
+
.idea/
|
28 |
+
.vscode/
|
29 |
+
*.sublime-project
|
30 |
+
*.sublime-workspace
|
31 |
+
|
32 |
+
# Installer logs
|
33 |
+
pip-log.txt
|
34 |
+
pip-delete-this-directory.txt
|
35 |
+
|
36 |
+
# Unit test / coverage reports
|
37 |
+
htmlcov/
|
38 |
+
.tox/
|
39 |
+
.nox/
|
40 |
+
.coverage
|
41 |
+
.coverage.*
|
42 |
+
.cache
|
43 |
+
nosetests.xml
|
44 |
+
coverage.xml
|
45 |
+
*.cover
|
46 |
+
*.py,cover
|
47 |
+
.hypothesis/
|
48 |
+
.pytest_cache/
|
49 |
+
cover/
|
50 |
+
|
51 |
+
# Sphinx documentation
|
52 |
+
docs/_build/
|
53 |
+
|
54 |
+
# Jupyter Notebook
|
55 |
+
.ipynb_checkpoints
|
56 |
+
|
57 |
+
# pyenv
|
58 |
+
# For a library or package, you might want to ignore these files since the code is
|
59 |
+
# intended to run in multiple environments; otherwise, check them in:
|
60 |
+
.python-version
|
61 |
+
|
62 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
63 |
+
__pypackages__/
|
64 |
+
|
65 |
+
# Environments
|
66 |
+
.env
|
67 |
+
.venv
|
68 |
+
env/
|
69 |
+
venv/
|
70 |
+
ENV/
|
71 |
+
env.bak/
|
72 |
+
venv.bak/
|
73 |
+
|
74 |
+
# mkdocs documentation
|
75 |
+
/site
|
76 |
+
|
77 |
+
# Other
|
78 |
+
*.log
|
79 |
+
*.swp
|
80 |
+
.DS_Store
|
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: green
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.22.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Multilingual Dashboard - Multilingual Prompt Evaluation ProjectlDashboard
|
3 |
+
emoji: π
|
4 |
colorFrom: green
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.22.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: apache-2.0
|
11 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import os
|
3 |
+
from typing import Dict, Tuple
|
4 |
+
from uuid import UUID
|
5 |
+
|
6 |
+
import altair as alt
|
7 |
+
import argilla as rg
|
8 |
+
from argilla.feedback import FeedbackDataset
|
9 |
+
from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset
|
10 |
+
import gradio as gr
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
# Translation of legends and titels
|
14 |
+
ANNOTATED = "Annotated"
|
15 |
+
NUMBER_ANNOTATED = "Total Annotations"
|
16 |
+
PENDING = "Pending Annotations"
|
17 |
+
|
18 |
+
NUMBER_ANNOTATORS = "Number of Annotators"
|
19 |
+
NAME = "Username"
|
20 |
+
NUMBER_ANNOTATIONS = "Number of Annotations"
|
21 |
+
|
22 |
+
CATEGORY = "Category"
|
23 |
+
|
24 |
+
SUPPORTED_LANGUAGES = [
|
25 |
+
"Spanish",
|
26 |
+
]
|
27 |
+
|
28 |
+
|
29 |
+
def get_user_annotations_dictionary(
|
30 |
+
dataset: FeedbackDataset | RemoteFeedbackDataset,
|
31 |
+
) -> Dict[str, int]:
|
32 |
+
"""
|
33 |
+
This function returns a dictionary with the username as the key and the number of annotations as the value.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
dataset: The dataset to be analyzed.
|
37 |
+
Returns:
|
38 |
+
A dictionary with the username as the key and the number of annotations as the value.
|
39 |
+
"""
|
40 |
+
output = {}
|
41 |
+
for record in dataset:
|
42 |
+
for response in record.responses:
|
43 |
+
if str(response.user_id) not in output.keys():
|
44 |
+
output[str(response.user_id)] = 1
|
45 |
+
else:
|
46 |
+
output[str(response.user_id)] += 1
|
47 |
+
|
48 |
+
# Changing the name of the keys, from the id to the username
|
49 |
+
for key in list(output.keys()):
|
50 |
+
output[rg.User.from_id(UUID(key)).username] = output.pop(key)
|
51 |
+
|
52 |
+
return output
|
53 |
+
|
54 |
+
|
55 |
+
def fetch_data() -> Tuple[Dict[str, int], Dict[str, dict]]:
|
56 |
+
"""
|
57 |
+
This function fetches the data from all the datasets and stores the annotation information in two dictionaries.
|
58 |
+
To do so, looks for all the environment variables that follow this pattern:
|
59 |
+
- SPANISH_API_URL
|
60 |
+
- SPANISH_API_KEY
|
61 |
+
- SPANISH_DATASET
|
62 |
+
- SPANISH_WORKSPACE
|
63 |
+
If the language name matches with one of the languages present in our SUPPORTED_LANGUAGES list, it will fetch the data
|
64 |
+
with the total amount of annotations and the total annotators.
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
Tuple[Dict[str, int], Dict[str, dict]]: A tuple with two dictionaries. The first one contains the total amount of annotations
|
68 |
+
for each language. The second one contains the total annotators for each language.
|
69 |
+
"""
|
70 |
+
|
71 |
+
print(f"Starting to fetch data: {datetime.datetime.now()}")
|
72 |
+
|
73 |
+
# Obtain all the environment variables
|
74 |
+
environment_variables_languages = {}
|
75 |
+
|
76 |
+
for language in SUPPORTED_LANGUAGES:
|
77 |
+
|
78 |
+
print("Fetching data for: ", language)
|
79 |
+
|
80 |
+
if not os.getenv(f"{language.upper()}_API_URL"):
|
81 |
+
print(f"Missing environment variables for {language}")
|
82 |
+
continue
|
83 |
+
|
84 |
+
environment_variables_languages[language] = {
|
85 |
+
"api_url": os.getenv(f"{language.upper()}_API_URL"),
|
86 |
+
"api_key": os.getenv(f"{language.upper()}_API_KEY"),
|
87 |
+
"dataset_name": os.getenv(f"{language.upper()}_DATASET"),
|
88 |
+
"workspace_name": os.getenv(f"{language.upper()}_WORKSPACE"),
|
89 |
+
}
|
90 |
+
|
91 |
+
global annotations, annotators
|
92 |
+
annotations = {}
|
93 |
+
annotators = {}
|
94 |
+
|
95 |
+
# Connect to each space and obtain the total amount of annotations and annotators
|
96 |
+
for language, environment_variables in environment_variables_languages.items():
|
97 |
+
rg.init(
|
98 |
+
api_url=environment_variables["api_url"],
|
99 |
+
api_key=environment_variables["api_key"],
|
100 |
+
)
|
101 |
+
|
102 |
+
# Obtain the dataset and see how many pending records are there
|
103 |
+
dataset = rg.FeedbackDataset.from_argilla(
|
104 |
+
environment_variables["dataset_name"],
|
105 |
+
workspace=environment_variables["workspace_name"],
|
106 |
+
)
|
107 |
+
|
108 |
+
# filtered_source_dataset = source_dataset.filter_by(response_status=["pending"])
|
109 |
+
|
110 |
+
target_dataset = dataset.filter_by(response_status=["submitted"])
|
111 |
+
|
112 |
+
annotations[language.lower()] = len(target_dataset)
|
113 |
+
annotators[language.lower()] = {
|
114 |
+
"annotators": get_user_annotations_dictionary(target_dataset)
|
115 |
+
}
|
116 |
+
|
117 |
+
# Print the current date and time
|
118 |
+
print(f"Data fetched: {datetime.datetime.now()}")
|
119 |
+
|
120 |
+
return annotations, annotators
|
121 |
+
|
122 |
+
|
123 |
+
def kpi_chart_total_annotations() -> alt.Chart:
|
124 |
+
"""
|
125 |
+
This function returns a KPI chart with the total amount of annotators.
|
126 |
+
Returns:
|
127 |
+
An altair chart with the KPI chart.
|
128 |
+
"""
|
129 |
+
|
130 |
+
total_annotations = 0
|
131 |
+
for language in annotations.keys():
|
132 |
+
total_annotations += annotations[language]
|
133 |
+
|
134 |
+
# Assuming you have a DataFrame with user data, create a sample DataFrame
|
135 |
+
data = pd.DataFrame({"Category": [NUMBER_ANNOTATED], "Value": [total_annotations]})
|
136 |
+
|
137 |
+
# Create Altair chart
|
138 |
+
chart = (
|
139 |
+
alt.Chart(data)
|
140 |
+
.mark_text(fontSize=100, align="center", baseline="middle", color="#e68b39")
|
141 |
+
.encode(text="Value:N")
|
142 |
+
.properties(title=NUMBER_ANNOTATED, width=250, height=200)
|
143 |
+
)
|
144 |
+
|
145 |
+
return chart
|
146 |
+
|
147 |
+
|
148 |
+
def donut_chart_total() -> alt.Chart:
|
149 |
+
"""
|
150 |
+
This function returns a donut chart with the progress of the total annotations in each language.
|
151 |
+
|
152 |
+
Returns:
|
153 |
+
An altair chart with the donut chart.
|
154 |
+
"""
|
155 |
+
|
156 |
+
# Load your data
|
157 |
+
annotated_records = [annotation for annotation in annotations.values()]
|
158 |
+
languages = [language for language in annotations.keys()]
|
159 |
+
|
160 |
+
# Prepare data for the donut chart
|
161 |
+
source = pd.DataFrame(
|
162 |
+
{
|
163 |
+
"values": annotated_records,
|
164 |
+
"category": languages,
|
165 |
+
#"colors": ["#4682b4", "#e68c39"], # Blue for Completed, Orange for Remaining
|
166 |
+
}
|
167 |
+
)
|
168 |
+
|
169 |
+
base = alt.Chart(source).encode(
|
170 |
+
theta=alt.Theta("values:Q", stack=True),
|
171 |
+
radius=alt.Radius(
|
172 |
+
"values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
|
173 |
+
),
|
174 |
+
color=alt.Color(
|
175 |
+
field="category",
|
176 |
+
type="nominal",
|
177 |
+
legend=alt.Legend(title=CATEGORY),
|
178 |
+
),
|
179 |
+
)
|
180 |
+
|
181 |
+
c1 = base.mark_arc(innerRadius=20, stroke="#fff")
|
182 |
+
|
183 |
+
c2 = base.mark_text(radiusOffset=20).encode(text="values:Q")
|
184 |
+
|
185 |
+
chart = c1 + c2
|
186 |
+
|
187 |
+
return chart
|
188 |
+
|
189 |
+
|
190 |
+
def main() -> None:
|
191 |
+
|
192 |
+
fetch_data()
|
193 |
+
|
194 |
+
# To avoid the orange border for the Gradio elements that are in constant loading
|
195 |
+
css = """
|
196 |
+
.generating {
|
197 |
+
border: none;
|
198 |
+
}
|
199 |
+
"""
|
200 |
+
|
201 |
+
with gr.Blocks(css=css) as demo:
|
202 |
+
gr.Markdown(
|
203 |
+
"""
|
204 |
+
# π Translation Efforts Dashboard - Multilingual Prompt Evaluation Project
|
205 |
+
You can check out the progress done in each language for the Multilingual Prompt Evaluation Project in this dashboard. If you want to add a new language to this dashboard, please open an issue and we will contact you to obtain the necessary API KEYs and URLs include your language in this dashboard.
|
206 |
+
|
207 |
+
## How to participate
|
208 |
+
Participating is easy. Go to the [annotation space](https://somosnlp-dibt-prompt-translation-for-es.hf.space/), log in or create a Hugging Face account, and you can start working.
|
209 |
+
"""
|
210 |
+
)
|
211 |
+
|
212 |
+
gr.Markdown(
|
213 |
+
f"""
|
214 |
+
## π Annotations among Languages
|
215 |
+
Here you can see the progress of the annotations among the different languages.
|
216 |
+
"""
|
217 |
+
)
|
218 |
+
|
219 |
+
with gr.Row():
|
220 |
+
|
221 |
+
kpi_chart_annotations = gr.Plot(label="Plot")
|
222 |
+
demo.load(
|
223 |
+
kpi_chart_total_annotations,
|
224 |
+
inputs=[],
|
225 |
+
outputs=[kpi_chart_annotations],
|
226 |
+
)
|
227 |
+
|
228 |
+
donut_languages = gr.Plot(label="Plot")
|
229 |
+
demo.load(
|
230 |
+
donut_chart_total,
|
231 |
+
inputs=[],
|
232 |
+
outputs=[donut_languages],
|
233 |
+
)
|
234 |
+
|
235 |
+
gr.Markdown(
|
236 |
+
"""
|
237 |
+
## πΎ Hall of Fame
|
238 |
+
Check out the users with more contributions among the different translation efforts.
|
239 |
+
|
240 |
+
"""
|
241 |
+
)
|
242 |
+
|
243 |
+
# Launch the Gradio interface
|
244 |
+
demo.launch()
|
245 |
+
|
246 |
+
|
247 |
+
if __name__ == "__main__":
|
248 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
altair==5.2.0
|
3 |
+
annotated-types==0.6.0
|
4 |
+
anyio==4.2.0
|
5 |
+
apscheduler==3.10.4
|
6 |
+
argilla==1.23.0
|
7 |
+
attrs==23.2.0
|
8 |
+
backoff==2.2.1
|
9 |
+
certifi==2024.2.2
|
10 |
+
charset-normalizer==3.3.2
|
11 |
+
click==8.1.7
|
12 |
+
colorama==0.4.6
|
13 |
+
contourpy==1.2.0
|
14 |
+
cycler==0.12.1
|
15 |
+
Deprecated==1.2.14
|
16 |
+
exceptiongroup==1.2.0
|
17 |
+
fastapi==0.109.2
|
18 |
+
ffmpy==0.3.1
|
19 |
+
filelock==3.13.1
|
20 |
+
fonttools==4.48.1
|
21 |
+
fsspec==2024.2.0
|
22 |
+
gradio==4.17.0
|
23 |
+
gradio_client==0.9.0
|
24 |
+
h11==0.14.0
|
25 |
+
httpcore==1.0.2
|
26 |
+
httpx==0.26.0
|
27 |
+
huggingface-hub==0.20.3
|
28 |
+
idna==3.6
|
29 |
+
importlib-resources==6.1.1
|
30 |
+
Jinja2==3.1.3
|
31 |
+
jsonschema==4.21.1
|
32 |
+
jsonschema-specifications==2023.12.1
|
33 |
+
kiwisolver==1.4.5
|
34 |
+
markdown-it-py==3.0.0
|
35 |
+
MarkupSafe==2.1.5
|
36 |
+
matplotlib==3.8.2
|
37 |
+
mdurl==0.1.2
|
38 |
+
monotonic==1.6
|
39 |
+
numpy==1.23.5
|
40 |
+
orjson==3.9.13
|
41 |
+
packaging==23.2
|
42 |
+
pandas==1.5.3
|
43 |
+
pillow==10.2.0
|
44 |
+
pydantic==2.6.1
|
45 |
+
pydantic_core==2.16.2
|
46 |
+
pydub==0.25.1
|
47 |
+
Pygments==2.17.2
|
48 |
+
pyparsing==3.1.1
|
49 |
+
python-dateutil==2.8.2
|
50 |
+
python-multipart==0.0.7
|
51 |
+
pytz==2024.1
|
52 |
+
PyYAML==6.0.1
|
53 |
+
referencing==0.33.0
|
54 |
+
requests==2.31.0
|
55 |
+
rich==13.7.0
|
56 |
+
rpds-py==0.17.1
|
57 |
+
ruff==0.2.1
|
58 |
+
semantic-version==2.10.0
|
59 |
+
shellingham==1.5.4
|
60 |
+
six==1.16.0
|
61 |
+
sniffio==1.3.0
|
62 |
+
starlette==0.36.3
|
63 |
+
tomlkit==0.12.0
|
64 |
+
toolz==0.12.1
|
65 |
+
tqdm==4.66.1
|
66 |
+
typer==0.9.0
|
67 |
+
typing_extensions==4.9.0
|
68 |
+
urllib3==2.2.0
|
69 |
+
uvicorn==0.27.0.post1
|
70 |
+
vega-datasets==0.9.0
|
71 |
+
websockets==11.0.3
|
72 |
+
wrapt==1.14.1
|