Fix intro and sort order
Browse files- app.py +3 -3
- content.py +11 -3
app.py
CHANGED
@@ -99,12 +99,12 @@ def get_leaderboard_df(performance_dict, pretrained_models):
|
|
99 |
if arc_perf * hellaswag_perf * mmlu_perf * truthfulqa_perf == 0:
|
100 |
continue
|
101 |
avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
|
102 |
-
notes = ' '.join([pretrained, lang_name
|
103 |
row = [pretrained, lang_name, lang, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes]
|
104 |
df.append(row)
|
105 |
|
106 |
df = pd.DataFrame.from_records(df, columns=COLS)
|
107 |
-
df = df.sort_values(by=[AVERAGE_COL], ascending=False)
|
108 |
df = df[COLS]
|
109 |
|
110 |
return df
|
@@ -140,7 +140,7 @@ with demo:
|
|
140 |
|
141 |
with gr.Box():
|
142 |
search_bar = gr.Textbox(
|
143 |
-
placeholder="Search models...", show_label=False, elem_id="search-bar"
|
144 |
)
|
145 |
|
146 |
leaderboard_table = gr.components.Dataframe(
|
|
|
99 |
if arc_perf * hellaswag_perf * mmlu_perf * truthfulqa_perf == 0:
|
100 |
continue
|
101 |
avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
|
102 |
+
notes = ' '.join([pretrained, lang_name])
|
103 |
row = [pretrained, lang_name, lang, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes]
|
104 |
df.append(row)
|
105 |
|
106 |
df = pd.DataFrame.from_records(df, columns=COLS)
|
107 |
+
df = df.sort_values(by=[LANG_COL, AVERAGE_COL], ascending=False)
|
108 |
df = df[COLS]
|
109 |
|
110 |
return df
|
|
|
140 |
|
141 |
with gr.Box():
|
142 |
search_bar = gr.Textbox(
|
143 |
+
placeholder="Search models and languages...", show_label=False, elem_id="search-bar"
|
144 |
)
|
145 |
|
146 |
leaderboard_table = gr.components.Dataframe(
|
content.py
CHANGED
@@ -3,21 +3,29 @@ TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Le
|
|
3 |
INTRO_TEXT = f"""
|
4 |
## About
|
5 |
|
6 |
-
This leaderboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
- <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot)
|
9 |
- <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot)
|
10 |
- <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot)
|
11 |
- <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot)
|
12 |
|
13 |
-
The evaluation data was translated into
|
14 |
|
15 |
"""
|
16 |
|
17 |
HOW_TO = f"""
|
18 |
## How to list your model performance on this leaderboard:
|
19 |
|
20 |
-
Send an email with title [Open mLLM Loaderboard] to [email protected] with the
|
21 |
|
22 |
We will run your model on the four benchmarks and add it to the leaderboard.
|
23 |
"""
|
|
|
3 |
INTRO_TEXT = f"""
|
4 |
## About
|
5 |
|
6 |
+
This leaderboard tracks progress and ranks performance of large language models (LLMs) developed for different languages,
|
7 |
+
emphasizing on non-English languages to democratize benefits of LLMs to broader society.
|
8 |
+
Our current leaderboard provides evaluation data for 29 languages, i.e.,
|
9 |
+
Arabic, Armenian, Basque, Bengali, Catalan, Chinese, Croatian, Danish, Dutch,
|
10 |
+
French, German, Gujarati, Hindi, Hungarian, Indonesian, Italian, Kannada, Malayalam,
|
11 |
+
Marathi, Nepali, Portuguese, Romanian, Russian, Serbian, Slovak, Spanish, Swedish,
|
12 |
+
Tamil, Telugu, Ukrainian, and Vietnamese, that will be expanded along the way.
|
13 |
+
Both multilingual and language-specific LLMs are welcome in this leaderboard.
|
14 |
+
We currently evaluate models over four benchmarks:
|
15 |
|
16 |
- <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot)
|
17 |
- <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot)
|
18 |
- <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot)
|
19 |
- <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot)
|
20 |
|
21 |
+
The evaluation data was translated into these languages using ChatGPT (gpt-35-turbo).
|
22 |
|
23 |
"""
|
24 |
|
25 |
HOW_TO = f"""
|
26 |
## How to list your model performance on this leaderboard:
|
27 |
|
28 |
+
Send an email with title [Open mLLM Loaderboard] to [email protected] with the Huggingface's model name.
|
29 |
|
30 |
We will run your model on the four benchmarks and add it to the leaderboard.
|
31 |
"""
|