Spaces:

JetBrains-Research
/

long-code-arena

Running

App Files Files Community

saridormi commited on Dec 19, 2023

Commit

6c92442

•

1 Parent(s): 2770288

Make files submissions instructions task-specific & other small changes

Browse files

Files changed (10) hide show

app.py +29 -27
requirements.txt +2 -1
src/content.py +4 -2
src/evaluation/base_task_metrics.py +1 -3
src/evaluation/commit_message_generation/cmg_metrics.py +6 -17
src/formatting.py +1 -3
src/get_results_for_task.py +13 -21
src/leaderboard_formatting.py +1 -5
src/submission_uploader.py +23 -66
src/{tasks.py → tasks_content.py} +20 -6

app.py CHANGED Viewed

@@ -2,17 +2,27 @@ import logging
 import os
 import gradio as gr  # type: ignore[import]
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import HfApi
-from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
-                         LEADERBOARD_TEXT, LEADERBOARD_TITLE,
-                         SUBMISSION_TEXT_FILES, SUBMISSION_TEXT_INTRO,
-                         SUBMISSION_TEXT_METADATA, SUBMISSION_TEXT_SUBMIT,
-                         SUBMISSION_TEXT_TASK, SUBMISSION_TITLE)
 from src.get_results_for_task import get_results_for_task
 from src.submission_uploader import SubmissionUploader
-from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
 logging.basicConfig(
     level=logging.INFO,
@@ -23,35 +33,28 @@ logging.basicConfig(
 submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
-def restart_space():
-    HfApi(token=os.environ["HF_TOKEN"]).restart_space(
-        repo_id="JetBrains-Research/long-code-arena", token=os.environ["HF_TOKEN"]
-    )
 with gr.Blocks() as demo:
     gr.HTML(INTRODUCTION_TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     gr.HTML(LEADERBOARD_TITLE)
     gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
     with gr.Tabs():
-        for task in TASKS_PRETTY_REVERSE:
-            with gr.TabItem(task):
                 with gr.Row():
-                    gr.Markdown(TASKS_DESCRIPTIONS[task])
-                leaderboard_table = gr.components.Dataframe(
-                    value=get_results_for_task(task), interactive=False
-                )
     gr.HTML(SUBMISSION_TITLE)
     gr.Markdown(SUBMISSION_TEXT_INTRO, elem_classes="markdown-text")
     with gr.Accordion("🚀 Submit new results"):
         gr.Markdown(SUBMISSION_TEXT_TASK, elem_classes="markdown-text")
-        task = gr.Radio(TASKS_PRETTY_REVERSE.keys(), label="Task")
         gr.Markdown(SUBMISSION_TEXT_METADATA, elem_classes="markdown-text")
         with gr.Row():
@@ -91,6 +94,8 @@ with gr.Blocks() as demo:
                 )
         gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
         file_output = gr.File(file_count="multiple")
         gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
@@ -99,7 +104,7 @@ with gr.Blocks() as demo:
         submit_button.click(
             submission_uploader.upload_files,
             [
-                task,
                 model_folder_textbox,
                 model_name_textbox,
                 model_availability_textbox,
@@ -112,7 +117,4 @@ with gr.Blocks() as demo:
         )
 if __name__ == "__main__":
-    scheduler = BackgroundScheduler()
-    scheduler.add_job(restart_space, "interval", seconds=30 * 60)
-    scheduler.start()
     demo.launch()

 import os
 import gradio as gr  # type: ignore[import]
+from src.content import (
+    INTRODUCTION_TEXT,
+    INTRODUCTION_TITLE,
+    LEADERBOARD_TEXT,
+    LEADERBOARD_TITLE,
+    SUBMISSION_TEXT_FILES,
+    SUBMISSION_TEXT_INTRO,
+    SUBMISSION_TEXT_METADATA,
+    SUBMISSION_TEXT_SUBMIT,
+    SUBMISSION_TEXT_TASK,
+    SUBMISSION_TITLE,
+)
 from src.get_results_for_task import get_results_for_task
 from src.submission_uploader import SubmissionUploader
+from src.tasks_content import (
+    TASKS_DESCRIPTIONS,
+    TASKS_PRETTY,
+    TASKS_PRETTY_REVERSE,
+    get_submission_text_files_for_task,
+)
 logging.basicConfig(
     level=logging.INFO,
 submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
 with gr.Blocks() as demo:
+    # intro
     gr.HTML(INTRODUCTION_TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    # leaderboard
     gr.HTML(LEADERBOARD_TITLE)
     gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
     with gr.Tabs():
+        for task_pretty in TASKS_PRETTY_REVERSE:
+            with gr.TabItem(task_pretty):
                 with gr.Row():
+                    gr.Markdown(TASKS_DESCRIPTIONS[TASKS_PRETTY_REVERSE[task_pretty]])
+                leaderboard_table = gr.components.Dataframe(value=get_results_for_task(task_pretty), interactive=False)
+    # submission
     gr.HTML(SUBMISSION_TITLE)
     gr.Markdown(SUBMISSION_TEXT_INTRO, elem_classes="markdown-text")
     with gr.Accordion("🚀 Submit new results"):
         gr.Markdown(SUBMISSION_TEXT_TASK, elem_classes="markdown-text")
+        task_selection = gr.Radio(TASKS_PRETTY_REVERSE.keys(), label="Task")
         gr.Markdown(SUBMISSION_TEXT_METADATA, elem_classes="markdown-text")
         with gr.Row():
                 )
         gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
+        task_specific_instructions = gr.Markdown(get_submission_text_files_for_task(None))
+        task_selection.select(get_submission_text_files_for_task, [task_selection], task_specific_instructions)
         file_output = gr.File(file_count="multiple")
         gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
         submit_button.click(
             submission_uploader.upload_files,
             [
+                task_selection,
                 model_folder_textbox,
                 model_name_textbox,
                 model_availability_textbox,
         )
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,8 +1,9 @@
 huggingface_hub
 jsonlines
 pandas
 tqdm
-apscheduler
 # CMG metrics
 evaluate
 bert-score

 huggingface_hub
 jsonlines
 pandas
+gradio
+datasets
 tqdm
 # CMG metrics
 evaluate
 bert-score

src/content.py CHANGED Viewed

@@ -1,3 +1,5 @@
 # ================================
 # =            ABOUT             =
 # ================================
@@ -25,9 +27,9 @@ SUBMISSION_TEXT_TASK = """1. Select a task you want to submit results for."""
 SUBMISSION_TEXT_METADATA = """2. Fill in some metadata about your submission."""
 SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predictions.
-    * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
-    * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in  🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
 """
 SUBMISSION_TEXT_SUBMIT = """All set! A new PR to 🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard.
 ⏳ **Note:** It might take some time (up to 40 minutes) for PR to get created, since it involves computing metrics for your submission."""

+from .formatting import styled_warning
 # ================================
 # =            ABOUT             =
 # ================================
 SUBMISSION_TEXT_METADATA = """2. Fill in some metadata about your submission."""
 SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predictions.
+    * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
 """
 SUBMISSION_TEXT_SUBMIT = """All set! A new PR to 🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard.
 ⏳ **Note:** It might take some time (up to 40 minutes) for PR to get created, since it involves computing metrics for your submission."""

src/evaluation/base_task_metrics.py CHANGED Viewed

@@ -7,9 +7,7 @@ class BaseTaskMetrics(ABC):
         pass
     @abstractmethod
-    def add_batch(
-        self, predictions: List[str], references: List[str], *args, **kwargs
-    ) -> None:
         pass
     @abstractmethod

         pass
     @abstractmethod
+    def add_batch(self, predictions: List[str], references: List[str], *args, **kwargs) -> None:
         pass
     @abstractmethod

src/evaluation/commit_message_generation/cmg_metrics.py CHANGED Viewed

@@ -13,27 +13,17 @@ class CMGMetrics(BaseTaskMetrics):
         self.bertscore = evaluate.load("bertscore")
         self.bertscore_normalized = evaluate.load("bertscore")
-    def add_batch(
-        self, predictions: List[str], references: List[str], *args, **kwargs
-    ) -> None:
-        self.bleu.add_batch(
-            predictions=predictions, references=[[ref] for ref in references]
-        )
-        self.chrf.add_batch(
-            predictions=predictions, references=[[ref] for ref in references]
-        )
         self.rouge.add_batch(predictions=predictions, references=references)
         self.bertscore.add_batch(predictions=predictions, references=references)
-        self.bertscore_normalized.add_batch(
-            predictions=predictions, references=references
-        )
     def compute(self, *args, **kwargs) -> Dict[str, float]:
         rouge = self.rouge.compute()
         bertscore = self.bertscore.compute(lang="en")
-        bertscore_normalized = self.bertscore_normalized.compute(
-            lang="en", rescale_with_baseline=True
-        )
         return {
             "bleu": self.bleu.compute(tokenize="13a")["score"],
             "chrf": self.chrf.compute()["score"],
@@ -41,6 +31,5 @@ class CMGMetrics(BaseTaskMetrics):
             "rouge2": rouge["rouge2"] * 100,
             "rougeL": rouge["rougeL"] * 100,
             "bertscore": sum(bertscore["f1"]) / len(bertscore["f1"]),
-            "bertscore_normalized": sum(bertscore_normalized["f1"])
-            / len(bertscore_normalized["f1"]),
         }

         self.bertscore = evaluate.load("bertscore")
         self.bertscore_normalized = evaluate.load("bertscore")
+    def add_batch(self, predictions: List[str], references: List[str], *args, **kwargs) -> None:
+        self.bleu.add_batch(predictions=predictions, references=[[ref] for ref in references])
+        self.chrf.add_batch(predictions=predictions, references=[[ref] for ref in references])
         self.rouge.add_batch(predictions=predictions, references=references)
         self.bertscore.add_batch(predictions=predictions, references=references)
+        self.bertscore_normalized.add_batch(predictions=predictions, references=references)
     def compute(self, *args, **kwargs) -> Dict[str, float]:
         rouge = self.rouge.compute()
         bertscore = self.bertscore.compute(lang="en")
+        bertscore_normalized = self.bertscore_normalized.compute(lang="en", rescale_with_baseline=True)
         return {
             "bleu": self.bleu.compute(tokenize="13a")["score"],
             "chrf": self.chrf.compute()["score"],
             "rouge2": rouge["rouge2"] * 100,
             "rougeL": rouge["rougeL"] * 100,
             "bertscore": sum(bertscore["f1"]) / len(bertscore["f1"]),
+            "bertscore_normalized": sum(bertscore_normalized["f1"]) / len(bertscore_normalized["f1"]),
         }

src/formatting.py CHANGED Viewed

@@ -7,6 +7,4 @@ def styled_warning(warn):
 def styled_message(message):
-    return (
-        f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
-    )


7
8
9	def styled_message(message):
10	+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"

src/get_results_for_task.py CHANGED Viewed

@@ -2,13 +2,15 @@ import logging
 import os
 import pandas as pd  # type: ignore[import]
-from datasets import (get_dataset_config_names,  # type: ignore[import]
-                      load_dataset)
-from .leaderboard_formatting import (COLUMNS_PRETTY, METRICS_PER_TASK,
-                                     SORT_COLUMN_PER_TASK,
-                                     get_columns_per_task)
-from .tasks import TASKS_PRETTY_REVERSE
 AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
@@ -44,27 +46,17 @@ def _get_results_stub() -> pd.DataFrame:
 def _get_results_dataset(task_id: str) -> pd.DataFrame:
-    results_df = load_dataset(
-        os.environ["DATASET_ID"], task_id, split="test"
-    ).to_pandas()
     results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
-    results_df["Context Size"] = results_df["Context Size"].map(
-        lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x
-    )
-    results_df = results_df.sort_values(
-        by=SORT_COLUMN_PER_TASK[task_id], ascending=False
-    )
     for metric_column in METRICS_PER_TASK[task_id]:
         if "BERTScore" in metric_column:
-            results_df[metric_column] = results_df[metric_column].map(
-                lambda x: f"{x:.5f}"
-            )
         else:
-            results_df[metric_column] = results_df[metric_column].map(
-                lambda x: f"{x:.2f}"
-            )
     results_df = results_df[get_columns_per_task(task_id)]
     return results_df

 import os
 import pandas as pd  # type: ignore[import]
+from datasets import get_dataset_config_names, load_dataset  # type: ignore[import]
+from .leaderboard_formatting import (
+    COLUMNS_PRETTY,
+    METRICS_PER_TASK,
+    SORT_COLUMN_PER_TASK,
+    get_columns_per_task,
+)
+from .tasks_content import TASKS_PRETTY_REVERSE
 AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
 def _get_results_dataset(task_id: str) -> pd.DataFrame:
+    results_df = load_dataset(os.environ["DATASET_ID"], task_id, split="test").to_pandas()
     results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
+    results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
+    results_df = results_df.sort_values(by=SORT_COLUMN_PER_TASK[task_id], ascending=False)
     for metric_column in METRICS_PER_TASK[task_id]:
         if "BERTScore" in metric_column:
+            results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.5f}")
         else:
+            results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.2f}")
     results_df = results_df[get_columns_per_task(task_id)]
     return results_df

src/leaderboard_formatting.py CHANGED Viewed

@@ -35,8 +35,4 @@ SORT_COLUMN_PER_TASK = {"commit_message_generation": "ROUGE-1"}
 def get_columns_per_task(task_id: str) -> List[str]:
     metrics_per_task = METRICS_PER_TASK[task_id]
-    return (
-        ["Model Name", "Availability", "Context Size"]
-        + metrics_per_task
-        + ["Submitted By"]
-    )

 def get_columns_per_task(task_id: str) -> List[str]:
     metrics_per_task = METRICS_PER_TASK[task_id]
+    return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By"]

src/submission_uploader.py CHANGED Viewed

@@ -11,7 +11,7 @@ from tqdm import tqdm
 from .evaluation import METRICS
 from .formatting import styled_error, styled_message, styled_warning
-from .tasks import TASKS_PRETTY_REVERSE
 class AlreadyExists(Exception):
@@ -34,17 +34,11 @@ class SubmissionUploader:
     def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
         """Searches among discussions of dataset repo for a PR with the given title."""
         try:
-            discussions = self._api.get_repo_discussions(
-                repo_id=self._dataset_id, repo_type="dataset"
-            )
         except Exception:
             return None
         for discussion in discussions:
-            if (
-                discussion.status == "open"
-                and discussion.is_pull_request
-                and discussion.title == pr_title
-            ):
                 return discussion
         return None
@@ -79,41 +73,30 @@ class SubmissionUploader:
         ]
         return commit_operations
-    def _compute_metrics_for_predictions(
-        self, task_id: str, filenames: Optional[List[str]], temp_directory: str
-    ) -> None:
         metrics_module = METRICS[task_id]
-        assert (
-            metrics_module is not None
-        ), f"Computing metrics for {task_id} is not supported."
         metrics_module.reset()
         open(os.path.join(temp_directory, "metrics.jsonl"), "w").close()
         # compute the metrics for each submitted file
         for filename in filenames:
             with jsonlines.open(filename, "r") as reader:
-                for example in tqdm(
-                    reader, desc=f"Computing metrics for {os.path.basename(filename)}"
-                ):
                     metrics_module.add_batch(
                         predictions=[example["prediction"]],
                         references=[example["reference"]],
                     )
             computed_metrics = metrics_module.compute()
             metrics_module.reset()
-            with jsonlines.open(
-                os.path.join(temp_directory, "metrics.jsonl"), "a"
-            ) as writer:
                 writer.write(computed_metrics)
         # aggregate the metrics over submitted files
-        with jsonlines.open(
-            os.path.join(temp_directory, "metrics.jsonl"), "r"
-        ) as reader:
             metrics_results = [line for line in reader]
         final_metrics_results = {
-            key: sum(entry[key] for entry in metrics_results) / len(metrics_results)
-            for key in metrics_results[0]
         }
         with open(os.path.join(temp_directory, "final_metrics.json"), "w") as f:
             json.dump(final_metrics_results, f)
@@ -142,9 +125,7 @@ class SubmissionUploader:
         )
         final_results.update(metadata_dict)
-        with jsonlines.open(
-            os.path.join(temp_directory, "final_results.jsonl"), "w"
-        ) as writer:
             writer.write(final_results)
         return [
@@ -165,29 +146,17 @@ class SubmissionUploader:
         submitted_by: str,
         filenames: Optional[List[str]],
     ):
-        assert (
-            task_pretty and task_pretty in TASKS_PRETTY_REVERSE
-        ), "Please, select one of the supported tasks."
-        assert (
-            model_folder
-        ), "Please, specify non-empty name for a directory with a model's results."
         assert model_name_pretty, "Please, specify non-empty name for a model."
-        assert (
-            model_availability
-        ), "Please, specify non-empty information about a model's availability."
-        assert (
-            context_size
-        ), "Please, specify non-empty information about a model's context size."
         try:
             _ = int(context_size)
         except:
-            raise ValueError(
-                "Please, specify a model's context size as an integer (e.g., 16000)."
-            )
-        assert (
-            submitted_by
-        ), "Please, specify non-empty information about a submission's author(s)."
         assert filenames, "Please, attach at least one file with predictions."
     def upload_files(
@@ -221,25 +190,16 @@ class SubmissionUploader:
             logging.info("Checking if this request has already been submitted...")
             if not force:
-                if model_name_pretty in self._fs.ls(
-                    f"datasets/{self._dataset_id}/{task_id}/predictions"
-                ) and all(
-                    filename
-                    in self._fs.ls(
-                        f"datasets/{self._dataset_id}/{task_id}/predictions/{model_name_pretty}"
-                    )
-                    for filename in filenames + ["metadata.json"]
                 ):
-                    return styled_warning(
-                        f"{model_name_pretty} is already present in {self._dataset_id}."
-                    )
                 prev_pr = self._get_previous_pr(pr_title)
                 if prev_pr is not None:
                     url = f"https://huggingface.co/datasets/{self._dataset_id}/discussions/{prev_pr.num}"
-                    return styled_warning(
-                        f"{self._dataset_id} already has an open PR for this submission: {url}."
-                    )
             logging.info("Processing predictions...")
             predictions_commit_operations = self._upload_predictions(
@@ -250,9 +210,7 @@ class SubmissionUploader:
             with TemporaryDirectory() as d:
                 logging.info("Computing metrics...")
-                self._compute_metrics_for_predictions(
-                    task_id=task_id, filenames=filenames, temp_directory=str(d)
-                )
                 logging.info("Processing results...")
                 results_commit_operations = self._upload_results(
@@ -269,8 +227,7 @@ class SubmissionUploader:
                 logging.info("Creating commit...")
                 new_pr = self._api.create_commit(
                     repo_id=self._dataset_id,
-                    operations=predictions_commit_operations
-                    + results_commit_operations,
                     commit_message=pr_title,
                     commit_description=f"""New submission to {task_pretty} task in 🏟️ Long Code Arena benchmark!\n* Model name: {model_name_pretty}\n* Model availability: {model_availability}\n* Context Size: {context_size}\n* Relevant URLs: {urls}\n* Submitted By: {submitted_by}""",
                     create_pr=True,

 from .evaluation import METRICS
 from .formatting import styled_error, styled_message, styled_warning
+from .tasks_content import TASKS_PRETTY_REVERSE
 class AlreadyExists(Exception):
     def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
         """Searches among discussions of dataset repo for a PR with the given title."""
         try:
+            discussions = self._api.get_repo_discussions(repo_id=self._dataset_id, repo_type="dataset")
         except Exception:
             return None
         for discussion in discussions:
+            if discussion.status == "open" and discussion.is_pull_request and discussion.title == pr_title:
                 return discussion
         return None
         ]
         return commit_operations
+    def _compute_metrics_for_predictions(self, task_id: str, filenames: List[str], temp_directory: str) -> None:
         metrics_module = METRICS[task_id]
+        assert metrics_module is not None, f"Computing metrics for {task_id} is not supported."
         metrics_module.reset()
         open(os.path.join(temp_directory, "metrics.jsonl"), "w").close()
         # compute the metrics for each submitted file
         for filename in filenames:
             with jsonlines.open(filename, "r") as reader:
+                for example in tqdm(reader, desc=f"Computing metrics for {os.path.basename(filename)}"):
                     metrics_module.add_batch(
                         predictions=[example["prediction"]],
                         references=[example["reference"]],
                     )
             computed_metrics = metrics_module.compute()
             metrics_module.reset()
+            with jsonlines.open(os.path.join(temp_directory, "metrics.jsonl"), "a") as writer:
                 writer.write(computed_metrics)
         # aggregate the metrics over submitted files
+        with jsonlines.open(os.path.join(temp_directory, "metrics.jsonl"), "r") as reader:
             metrics_results = [line for line in reader]
         final_metrics_results = {
+            key: sum(entry[key] for entry in metrics_results) / len(metrics_results) for key in metrics_results[0]
         }
         with open(os.path.join(temp_directory, "final_metrics.json"), "w") as f:
             json.dump(final_metrics_results, f)
         )
         final_results.update(metadata_dict)
+        with jsonlines.open(os.path.join(temp_directory, "final_results.jsonl"), "w") as writer:
             writer.write(final_results)
         return [
         submitted_by: str,
         filenames: Optional[List[str]],
     ):
+        assert task_pretty and task_pretty in TASKS_PRETTY_REVERSE, "Please, select one of the supported tasks."
+        assert model_folder, "Please, specify non-empty name for a directory with a model's results."
         assert model_name_pretty, "Please, specify non-empty name for a model."
+        assert model_availability, "Please, specify non-empty information about a model's availability."
+        assert context_size, "Please, specify non-empty information about a model's context size."
         try:
             _ = int(context_size)
         except:
+            raise ValueError("Please, specify a model's context size as an integer (e.g., 16000).")
+        assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
         assert filenames, "Please, attach at least one file with predictions."
     def upload_files(
             logging.info("Checking if this request has already been submitted...")
             if not force:
+                if model_name_pretty in self._fs.ls(f"datasets/{self._dataset_id}/{task_id}/predictions") and all(
+                    filename in self._fs.ls(f"datasets/{self._dataset_id}/{task_id}/predictions/{model_name_pretty}")
+                    for filename in filenames
                 ):
+                    return styled_warning(f"{model_name_pretty} is already present in {self._dataset_id}.")
                 prev_pr = self._get_previous_pr(pr_title)
                 if prev_pr is not None:
                     url = f"https://huggingface.co/datasets/{self._dataset_id}/discussions/{prev_pr.num}"
+                    return styled_warning(f"{self._dataset_id} already has an open PR for this submission: {url}.")
             logging.info("Processing predictions...")
             predictions_commit_operations = self._upload_predictions(
             with TemporaryDirectory() as d:
                 logging.info("Computing metrics...")
+                self._compute_metrics_for_predictions(task_id=task_id, filenames=filenames, temp_directory=str(d))
                 logging.info("Processing results...")
                 results_commit_operations = self._upload_results(
                 logging.info("Creating commit...")
                 new_pr = self._api.create_commit(
                     repo_id=self._dataset_id,
+                    operations=predictions_commit_operations + results_commit_operations,
                     commit_message=pr_title,
                     commit_description=f"""New submission to {task_pretty} task in 🏟️ Long Code Arena benchmark!\n* Model name: {model_name_pretty}\n* Model availability: {model_availability}\n* Context Size: {context_size}\n* Relevant URLs: {urls}\n* Submitted By: {submitted_by}""",
                     create_pr=True,

src/{tasks.py → tasks_content.py} RENAMED Viewed

@@ -1,3 +1,5 @@
 TASKS_PRETTY = {
     "commit_message_generation": "Commit Message Generation",
     "bug_localization": "Bug Localization on Issue",
@@ -9,7 +11,7 @@ TASKS_PRETTY = {
 TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
 TASKS_DESCRIPTIONS = {
-    "Commit Message Generation": """# Commit Message Generation\n
         Our Commit Message Generation benchmark 🤗 [JetBrains-Research/lca-cmg](https://huggingface.co/datasets/JetBrains-Research/lca-cmg) includes 163 manually curated commits from Python projects.
@@ -21,9 +23,21 @@ TASKS_DESCRIPTIONS = {
         For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
         """,
-    "Bug Localization on Issue": "cool description for Bug Localization on Issue task",
-    "Module-to-Text": "cool description for Module-to-Text task",
-    "Library Usage Examples Generation": "cool description for Library Usage Examples Generation task",
-    "Project-level Code Completion": "cool description for Project-level Code Completion task",
-    "Bug Localization on Build Logs": "cool description for Bug Localization on Build Logs task",
 }

+from typing import Optional
 TASKS_PRETTY = {
     "commit_message_generation": "Commit Message Generation",
     "bug_localization": "Bug Localization on Issue",
 TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
 TASKS_DESCRIPTIONS = {
+    "commit_message_generation": """# Commit Message Generation\n
         Our Commit Message Generation benchmark 🤗 [JetBrains-Research/lca-cmg](https://huggingface.co/datasets/JetBrains-Research/lca-cmg) includes 163 manually curated commits from Python projects.
         For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
         """,
+    "bug_localization": "cool description for Bug Localization on Issue task",
+    "module_to_text": "cool description for Module-to-Text task",
+    "library_usage": "cool description for Library Usage Examples Generation task",
+    "project_code_completion": "cool description for Project-level Code Completion task",
+    "bug_localization_build_logs": "cool description for Bug Localization on Build Logs task",
 }
+def get_submission_text_files_for_task(task_pretty: Optional[str]) -> str:
+    if not task_pretty:
+        return "Please, select a specific task to see more detailed instructions regarding submitting files."
+    task_id = TASKS_PRETTY_REVERSE[task_pretty]
+    if task_id == "commit_message_generation":
+        return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in  🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
+    return f"**{task_pretty} Instructions:**\n\n* 🚧 There are no instructions for the current task yet."