Spaces:

DIBT
/

PromptTranslationMultilingualDashboard

Runtime error

App Files Files Community

ignacioct commited on Mar 25

Commit

b6d9f6b

•

1 Parent(s): ac15c95

annotations among languages resdy

Browse files

Files changed (4) hide show

.gitignore +80 -0
README.md +3 -5
app.py +248 -0
requirements.txt +72 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,80 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# IDEs and editors
+.idea/
+.vscode/
+*.sublime-project
+*.sublime-workspace
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Sphinx documentation
+docs/_build/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+.python-version
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# mkdocs documentation
+/site
+# Other
+*.log
+*.swp
+.DS_Store

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
-title: PromptTranslationMultilingualDashboard
-emoji: 🏆
 colorFrom: green
 colorTo: green
 sdk: gradio
 sdk_version: 4.22.0
 app_file: app.py
-pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Multilingual Dashboard - Multilingual Prompt Evaluation ProjectlDashboard
+emoji: 🌎
 colorFrom: green
 colorTo: green
 sdk: gradio
 sdk_version: 4.22.0
 app_file: app.py
+pinned: true
 license: apache-2.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import datetime
+import os
+from typing import Dict, Tuple
+from uuid import UUID
+import altair as alt
+import argilla as rg
+from argilla.feedback import FeedbackDataset
+from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset
+import gradio as gr
+import pandas as pd
+# Translation of legends and titels
+ANNOTATED = "Annotated"
+NUMBER_ANNOTATED = "Total Annotations"
+PENDING = "Pending Annotations"
+NUMBER_ANNOTATORS = "Number of Annotators"
+NAME = "Username"
+NUMBER_ANNOTATIONS = "Number of Annotations"
+CATEGORY = "Category"
+SUPPORTED_LANGUAGES = [
+    "Spanish",
+]
+def get_user_annotations_dictionary(
+    dataset: FeedbackDataset | RemoteFeedbackDataset,
+) -> Dict[str, int]:
+    """
+    This function returns a dictionary with the username as the key and the number of annotations as the value.
+    Args:
+        dataset: The dataset to be analyzed.
+    Returns:
+        A dictionary with the username as the key and the number of annotations as the value.
+    """
+    output = {}
+    for record in dataset:
+        for response in record.responses:
+            if str(response.user_id) not in output.keys():
+                output[str(response.user_id)] = 1
+            else:
+                output[str(response.user_id)] += 1
+    # Changing the name of the keys, from the id to the username
+    for key in list(output.keys()):
+        output[rg.User.from_id(UUID(key)).username] = output.pop(key)
+    return output
+def fetch_data() -> Tuple[Dict[str, int], Dict[str, dict]]:
+    """
+    This function fetches the data from all the datasets and stores the annotation information in two dictionaries.
+    To do so, looks for all the environment variables that follow this pattern:
+    - SPANISH_API_URL
+    - SPANISH_API_KEY
+    - SPANISH_DATASET
+    - SPANISH_WORKSPACE
+    If the language name matches with one of the languages present in our SUPPORTED_LANGUAGES list, it will fetch the data
+    with the total amount of annotations and the total annotators.
+    Returns:
+        Tuple[Dict[str, int], Dict[str, dict]]: A tuple with two dictionaries. The first one contains the total amount of annotations
+        for each language. The second one contains the total annotators for each language.
+    """
+    print(f"Starting to fetch data: {datetime.datetime.now()}")
+    # Obtain all the environment variables
+    environment_variables_languages = {}
+    for language in SUPPORTED_LANGUAGES:
+        print("Fetching data for: ", language)
+        if not os.getenv(f"{language.upper()}_API_URL"):
+            print(f"Missing environment variables for {language}")
+            continue
+        environment_variables_languages[language] = {
+            "api_url": os.getenv(f"{language.upper()}_API_URL"),
+            "api_key": os.getenv(f"{language.upper()}_API_KEY"),
+            "dataset_name": os.getenv(f"{language.upper()}_DATASET"),
+            "workspace_name": os.getenv(f"{language.upper()}_WORKSPACE"),
+        }
+    global annotations, annotators
+    annotations = {}
+    annotators = {}
+    # Connect to each space and obtain the total amount of annotations and annotators
+    for language, environment_variables in environment_variables_languages.items():
+        rg.init(
+            api_url=environment_variables["api_url"],
+            api_key=environment_variables["api_key"],
+        )
+        # Obtain the dataset and see how many pending records are there
+        dataset = rg.FeedbackDataset.from_argilla(
+            environment_variables["dataset_name"],
+            workspace=environment_variables["workspace_name"],
+        )
+        # filtered_source_dataset = source_dataset.filter_by(response_status=["pending"])
+        target_dataset = dataset.filter_by(response_status=["submitted"])
+        annotations[language.lower()] = len(target_dataset)
+        annotators[language.lower()] = {
+            "annotators": get_user_annotations_dictionary(target_dataset)
+        }
+    # Print the current date and time
+    print(f"Data fetched: {datetime.datetime.now()}")
+    return annotations, annotators
+def kpi_chart_total_annotations() -> alt.Chart:
+    """
+    This function returns a KPI chart with the total amount of annotators.
+    Returns:
+        An altair chart with the KPI chart.
+    """
+    total_annotations = 0
+    for language in annotations.keys():
+        total_annotations += annotations[language]
+    # Assuming you have a DataFrame with user data, create a sample DataFrame
+    data = pd.DataFrame({"Category": [NUMBER_ANNOTATED], "Value": [total_annotations]})
+    # Create Altair chart
+    chart = (
+        alt.Chart(data)
+        .mark_text(fontSize=100, align="center", baseline="middle", color="#e68b39")
+        .encode(text="Value:N")
+        .properties(title=NUMBER_ANNOTATED, width=250, height=200)
+    )
+    return chart
+def donut_chart_total() -> alt.Chart:
+    """
+    This function returns a donut chart with the progress of the total annotations in each language.
+    Returns:
+        An altair chart with the donut chart.
+    """
+    # Load your data
+    annotated_records = [annotation for annotation in annotations.values()]
+    languages = [language for language in annotations.keys()]
+    # Prepare data for the donut chart
+    source = pd.DataFrame(
+        {
+            "values": annotated_records,
+            "category": languages,
+            #"colors": ["#4682b4", "#e68c39"],  # Blue for Completed, Orange for Remaining
+        }
+    )
+    base = alt.Chart(source).encode(
+        theta=alt.Theta("values:Q", stack=True),
+        radius=alt.Radius(
+            "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
+        ),
+        color=alt.Color(
+            field="category",
+            type="nominal",
+            legend=alt.Legend(title=CATEGORY),
+        ),
+    )
+    c1 = base.mark_arc(innerRadius=20, stroke="#fff")
+    c2 = base.mark_text(radiusOffset=20).encode(text="values:Q")
+    chart = c1 + c2
+    return chart
+def main() -> None:
+    fetch_data()
+    # To avoid the orange border for the Gradio elements that are in constant loading
+    css = """
+    .generating {
+        border: none;
+    }
+    """
+    with gr.Blocks(css=css) as demo:
+        gr.Markdown(
+            """
+            # 🌍 Translation Efforts Dashboard - Multilingual Prompt Evaluation Project
+            You can check out the progress done in each language for the Multilingual Prompt Evaluation Project in this dashboard. If you want to add a new language to this dashboard, please open an issue and we will contact you to obtain the necessary API KEYs and URLs include your language in this dashboard.
+            ## How to participate
+            Participating is easy. Go to the [annotation space](https://somosnlp-dibt-prompt-translation-for-es.hf.space/), log in or create a Hugging Face account, and you can start working.
+            """
+        )
+        gr.Markdown(
+            f"""
+            ## 🚀 Annotations among Languages
+            Here you can see the progress of the annotations among the different languages.
+            """
+        )
+        with gr.Row():
+            kpi_chart_annotations = gr.Plot(label="Plot")
+            demo.load(
+                kpi_chart_total_annotations,
+                inputs=[],
+                outputs=[kpi_chart_annotations],
+            )
+            donut_languages = gr.Plot(label="Plot")
+            demo.load(
+                donut_chart_total,
+                inputs=[],
+                outputs=[donut_languages],
+            )
+        gr.Markdown(
+            """
+            ## 👾 Hall of Fame
+            Check out the users with more contributions among the different translation efforts.
+            """
+        )
+    # Launch the Gradio interface
+    demo.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,72 @@

+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.2.0
+apscheduler==3.10.4
+argilla==1.23.0
+attrs==23.2.0
+backoff==2.2.1
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+Deprecated==1.2.14
+exceptiongroup==1.2.0
+fastapi==0.109.2
+ffmpy==0.3.1
+filelock==3.13.1
+fonttools==4.48.1
+fsspec==2024.2.0
+gradio==4.17.0
+gradio_client==0.9.0
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+huggingface-hub==0.20.3
+idna==3.6
+importlib-resources==6.1.1
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.2
+mdurl==0.1.2
+monotonic==1.6
+numpy==1.23.5
+orjson==3.9.13
+packaging==23.2
+pandas==1.5.3
+pillow==10.2.0
+pydantic==2.6.1
+pydantic_core==2.16.2
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-multipart==0.0.7
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.33.0
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.17.1
+ruff==0.2.1
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.0
+starlette==0.36.3
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.1
+typer==0.9.0
+typing_extensions==4.9.0
+urllib3==2.2.0
+uvicorn==0.27.0.post1
+vega-datasets==0.9.0
+websockets==11.0.3
+wrapt==1.14.1