Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Muennighoff
commited on
Commit
•
3ffdc42
1
Parent(s):
003d24d
Updates
Browse files
README.md
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
---
|
3 |
title: leaderboard
|
4 |
emoji: 🔥
|
|
|
|
|
1 |
---
|
2 |
title: leaderboard
|
3 |
emoji: 🔥
|
app.py
CHANGED
@@ -96,19 +96,6 @@ TASK_LIST_SUMMARIZATION = [
|
|
96 |
|
97 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
98 |
|
99 |
-
TASK_TO_TASK_LIST = {}
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
def make_clickable_model(model_name):
|
104 |
-
# Remove user from model name
|
105 |
-
model_name_show = " ".join(model_name.split("/")[1:])
|
106 |
-
link = "https://huggingface.co/" + model_name
|
107 |
-
return (
|
108 |
-
f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
|
109 |
-
)
|
110 |
-
|
111 |
-
|
112 |
TASK_TO_METRIC = {
|
113 |
"BitextMining": "f1",
|
114 |
"Clustering": "v_measure",
|
@@ -120,7 +107,16 @@ TASK_TO_METRIC = {
|
|
120 |
"Summarization": "cos_sim_spearman",
|
121 |
}
|
122 |
|
123 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
api = HfApi()
|
125 |
models = api.list_models(filter="mteb")
|
126 |
df_list = []
|
@@ -141,9 +137,7 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st
|
|
141 |
# {"type": "f1", "value": 38.809586587791664},
|
142 |
# ],
|
143 |
# },
|
144 |
-
|
145 |
# Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
|
146 |
-
#if langs is None:
|
147 |
task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
|
148 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
149 |
#else:
|
@@ -170,53 +164,60 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st
|
|
170 |
cols = sorted(list(df.columns))
|
171 |
cols.insert(0, cols.pop(cols.index("Model")))
|
172 |
df = df[cols]
|
173 |
-
# df.insert(1, "Average", df.mean(axis=1, skipna=False))
|
174 |
df.fillna("", inplace=True)
|
175 |
if cast_to_str:
|
176 |
return df.astype(str) # Cast to str as Gradio does not accept floats
|
177 |
return df
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
-
DATA_OVERALL =
|
181 |
-
tasks=[
|
182 |
-
"Classification",
|
183 |
-
"Clustering",
|
184 |
-
"PairClassification",
|
185 |
-
"Reranking",
|
186 |
-
"Retrieval",
|
187 |
-
"STS",
|
188 |
-
"Summarization",
|
189 |
-
],
|
190 |
-
langs=["en", "en-en"],
|
191 |
-
cast_to_str=False
|
192 |
-
)
|
193 |
-
|
194 |
-
DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
|
195 |
-
DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
|
196 |
-
DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
|
197 |
-
DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
|
198 |
-
DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
|
199 |
-
DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
|
200 |
-
DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
|
201 |
-
DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
|
202 |
-
DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
|
203 |
|
204 |
-
DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
|
205 |
-
DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
|
206 |
-
DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
|
207 |
-
DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
|
208 |
-
DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
|
209 |
-
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
210 |
-
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
211 |
|
212 |
-
DATA_OVERALL = DATA_OVERALL[["Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
|
213 |
|
|
|
214 |
|
|
|
215 |
block = gr.Blocks()
|
216 |
|
|
|
217 |
with block:
|
218 |
gr.Markdown(
|
219 |
-
"""Leaderboard
|
220 |
)
|
221 |
with gr.Tabs():
|
222 |
with gr.TabItem("Overall"):
|
@@ -225,11 +226,30 @@ with block:
|
|
225 |
with gr.Row():
|
226 |
data_overall = gr.components.Dataframe(
|
227 |
DATA_OVERALL,
|
228 |
-
datatype="markdown",
|
229 |
type="pandas",
|
230 |
-
col_count=(len(DATA_OVERALL.columns), "fixed"),
|
231 |
wrap=True,
|
232 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
with gr.TabItem("Classification"):
|
234 |
with gr.TabItem("English"):
|
235 |
with gr.Row():
|
@@ -237,20 +257,17 @@ with block:
|
|
237 |
with gr.Row():
|
238 |
data_classification_en = gr.components.Dataframe(
|
239 |
DATA_CLASSIFICATION_EN,
|
240 |
-
datatype="markdown",
|
241 |
type="pandas",
|
242 |
-
col_count=(len(DATA_CLASSIFICATION_EN.columns), "fixed"),
|
243 |
)
|
244 |
with gr.Row():
|
245 |
-
|
246 |
task_classification_en = gr.Variable(value="Classification")
|
247 |
-
metric_classification_en = gr.Variable(value="accuracy")
|
248 |
lang_classification_en = gr.Variable(value=["en"])
|
249 |
-
|
250 |
get_mteb_data,
|
251 |
inputs=[
|
252 |
task_classification_en,
|
253 |
-
metric_classification_en,
|
254 |
lang_classification_en,
|
255 |
],
|
256 |
outputs=data_classification_en,
|
@@ -260,16 +277,15 @@ with block:
|
|
260 |
gr.Markdown("""Multilingual Classification""")
|
261 |
with gr.Row():
|
262 |
data_classification = gr.components.Dataframe(
|
263 |
-
datatype=["markdown"] * 500,
|
264 |
type="pandas",
|
265 |
)
|
266 |
with gr.Row():
|
267 |
data_run = gr.Button("Refresh")
|
268 |
task_classification = gr.Variable(value="Classification")
|
269 |
-
metric_classification = gr.Variable(value="accuracy")
|
270 |
data_run.click(
|
271 |
get_mteb_data,
|
272 |
-
inputs=[task_classification
|
273 |
outputs=data_classification,
|
274 |
)
|
275 |
with gr.TabItem("Clustering"):
|
@@ -277,48 +293,68 @@ with block:
|
|
277 |
gr.Markdown("""Leaderboard for Clustering""")
|
278 |
with gr.Row():
|
279 |
data_clustering = gr.components.Dataframe(
|
280 |
-
|
|
|
281 |
type="pandas",
|
|
|
282 |
)
|
283 |
with gr.Row():
|
284 |
data_run = gr.Button("Refresh")
|
285 |
task_clustering = gr.Variable(value="Clustering")
|
286 |
-
metric_clustering = gr.Variable(value="v_measure")
|
287 |
data_run.click(
|
288 |
get_mteb_data,
|
289 |
-
inputs=[task_clustering
|
290 |
outputs=data_clustering,
|
291 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
with gr.TabItem("Retrieval"):
|
293 |
with gr.Row():
|
294 |
gr.Markdown("""Leaderboard for Retrieval""")
|
295 |
with gr.Row():
|
296 |
data_retrieval = gr.components.Dataframe(
|
297 |
-
|
|
|
298 |
type="pandas",
|
299 |
)
|
300 |
with gr.Row():
|
301 |
data_run = gr.Button("Refresh")
|
302 |
task_retrieval = gr.Variable(value="Retrieval")
|
303 |
-
metric_retrieval = gr.Variable(value="ndcg_at_10")
|
304 |
data_run.click(
|
305 |
-
get_mteb_data, inputs=[task_retrieval
|
306 |
)
|
307 |
with gr.TabItem("Reranking"):
|
308 |
with gr.Row():
|
309 |
gr.Markdown("""Leaderboard for Reranking""")
|
310 |
with gr.Row():
|
311 |
data_reranking = gr.components.Dataframe(
|
312 |
-
|
|
|
313 |
type="pandas",
|
314 |
-
|
315 |
)
|
316 |
with gr.Row():
|
317 |
data_run = gr.Button("Refresh")
|
318 |
task_reranking = gr.Variable(value="Reranking")
|
319 |
metric_reranking = gr.Variable(value="map")
|
320 |
data_run.click(
|
321 |
-
get_mteb_data, inputs=[task_reranking
|
322 |
)
|
323 |
with gr.TabItem("STS"):
|
324 |
with gr.TabItem("English"):
|
@@ -326,17 +362,18 @@ with block:
|
|
326 |
gr.Markdown("""Leaderboard for STS""")
|
327 |
with gr.Row():
|
328 |
data_sts_en = gr.components.Dataframe(
|
329 |
-
|
|
|
330 |
type="pandas",
|
|
|
331 |
)
|
332 |
with gr.Row():
|
333 |
data_run_en = gr.Button("Refresh")
|
334 |
task_sts_en = gr.Variable(value="STS")
|
335 |
-
metric_sts_en = gr.Variable(value="cos_sim_spearman")
|
336 |
lang_sts_en = gr.Variable(value=["en", "en-en"])
|
337 |
data_run.click(
|
338 |
get_mteb_data,
|
339 |
-
inputs=[task_sts_en,
|
340 |
outputs=data_sts_en,
|
341 |
)
|
342 |
with gr.TabItem("Multilingual"):
|
@@ -344,49 +381,49 @@ with block:
|
|
344 |
gr.Markdown("""Leaderboard for STS""")
|
345 |
with gr.Row():
|
346 |
data_sts = gr.components.Dataframe(
|
347 |
-
datatype=["markdown"] *
|
348 |
type="pandas",
|
349 |
)
|
350 |
with gr.Row():
|
351 |
data_run = gr.Button("Refresh")
|
352 |
task_sts = gr.Variable(value="STS")
|
353 |
-
|
354 |
-
data_run.click(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
|
355 |
with gr.TabItem("Summarization"):
|
356 |
with gr.Row():
|
357 |
gr.Markdown("""Leaderboard for Summarization""")
|
358 |
with gr.Row():
|
359 |
data_summarization = gr.components.Dataframe(
|
360 |
-
|
|
|
361 |
type="pandas",
|
|
|
362 |
)
|
363 |
with gr.Row():
|
364 |
data_run = gr.Button("Refresh")
|
365 |
task_summarization = gr.Variable(value="Summarization")
|
366 |
-
metric_summarization = gr.Variable(value="cos_sim_spearman")
|
367 |
data_run.click(
|
368 |
get_mteb_data,
|
369 |
-
inputs=[task_summarization
|
370 |
outputs=data_summarization,
|
371 |
)
|
372 |
# running the function on page load in addition to when the button is clicked
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
block.load(
|
380 |
-
|
381 |
-
inputs=[task_classification, metric_classification],
|
382 |
-
outputs=data_classification,
|
383 |
-
)
|
384 |
-
block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering)
|
385 |
-
block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval)
|
386 |
-
block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking)
|
387 |
-
block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
|
388 |
-
block.load(
|
389 |
-
get_mteb_data, inputs=[task_summarization, metric_summarization], outputs=data_summarization
|
390 |
-
)
|
391 |
|
392 |
block.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
TASK_TO_METRIC = {
|
100 |
"BitextMining": "f1",
|
101 |
"Clustering": "v_measure",
|
|
|
107 |
"Summarization": "cos_sim_spearman",
|
108 |
}
|
109 |
|
110 |
+
def make_clickable_model(model_name):
|
111 |
+
# Remove user from model name
|
112 |
+
model_name_show = " ".join(model_name.split("/")[1:])
|
113 |
+
link = "https://huggingface.co/" + model_name
|
114 |
+
return (
|
115 |
+
f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
|
120 |
api = HfApi()
|
121 |
models = api.list_models(filter="mteb")
|
122 |
df_list = []
|
|
|
137 |
# {"type": "f1", "value": 38.809586587791664},
|
138 |
# ],
|
139 |
# },
|
|
|
140 |
# Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
|
|
|
141 |
task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
|
142 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
143 |
#else:
|
|
|
164 |
cols = sorted(list(df.columns))
|
165 |
cols.insert(0, cols.pop(cols.index("Model")))
|
166 |
df = df[cols]
|
|
|
167 |
df.fillna("", inplace=True)
|
168 |
if cast_to_str:
|
169 |
return df.astype(str) # Cast to str as Gradio does not accept floats
|
170 |
return df
|
171 |
|
172 |
+
def get_mteb_average(get_all_avgs=False):
|
173 |
+
global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION
|
174 |
+
DATA_OVERALL = get_mteb_data(
|
175 |
+
tasks=[
|
176 |
+
"Classification",
|
177 |
+
"Clustering",
|
178 |
+
"PairClassification",
|
179 |
+
"Reranking",
|
180 |
+
"Retrieval",
|
181 |
+
"STS",
|
182 |
+
"Summarization",
|
183 |
+
],
|
184 |
+
langs=["en", "en-en"],
|
185 |
+
cast_to_str=False
|
186 |
+
)
|
187 |
+
|
188 |
+
DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
|
189 |
+
DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
|
190 |
+
DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
|
191 |
+
DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
|
192 |
+
DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
|
193 |
+
DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
|
194 |
+
DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
|
195 |
+
DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
|
196 |
+
DATA_OVERALL.sort_values("Average", ascending=False, inplace=True)
|
197 |
+
# Start ranking from 1
|
198 |
+
DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
|
199 |
|
200 |
+
DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
|
203 |
+
DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
|
204 |
+
DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
|
205 |
+
DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
|
206 |
+
DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
|
207 |
+
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
208 |
+
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
209 |
|
210 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
|
211 |
|
212 |
+
return DATA_OVERALL
|
213 |
|
214 |
+
get_mteb_average()
|
215 |
block = gr.Blocks()
|
216 |
|
217 |
+
|
218 |
with block:
|
219 |
gr.Markdown(
|
220 |
+
"""MTEB Leaderboard. See <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>"""
|
221 |
)
|
222 |
with gr.Tabs():
|
223 |
with gr.TabItem("Overall"):
|
|
|
226 |
with gr.Row():
|
227 |
data_overall = gr.components.Dataframe(
|
228 |
DATA_OVERALL,
|
229 |
+
datatype=["markdown"] * len(DATA_OVERALL.columns) * 2,
|
230 |
type="pandas",
|
231 |
+
#col_count=(len(DATA_OVERALL.columns), "fixed"),
|
232 |
wrap=True,
|
233 |
)
|
234 |
+
with gr.Row():
|
235 |
+
data_run = gr.Button("Refresh")
|
236 |
+
data_run.click(get_mteb_average, inputs=None, outputs=data_overall)
|
237 |
+
with gr.TabItem("BitextMining"):
|
238 |
+
with gr.Row():
|
239 |
+
gr.Markdown("""Leaderboard for Clustering""")
|
240 |
+
with gr.Row():
|
241 |
+
data_bitext_mining = gr.components.Dataframe(
|
242 |
+
datatype=["markdown"] * 500, # hack when we don't know how many columns
|
243 |
+
type="pandas",
|
244 |
+
)
|
245 |
+
with gr.Row():
|
246 |
+
data_run = gr.Button("Refresh")
|
247 |
+
task_bitext_mining = gr.Variable(value="BitextMining")
|
248 |
+
data_run.click(
|
249 |
+
get_mteb_data,
|
250 |
+
inputs=[task_bitext_mining],
|
251 |
+
outputs=data_bitext_mining,
|
252 |
+
)
|
253 |
with gr.TabItem("Classification"):
|
254 |
with gr.TabItem("English"):
|
255 |
with gr.Row():
|
|
|
257 |
with gr.Row():
|
258 |
data_classification_en = gr.components.Dataframe(
|
259 |
DATA_CLASSIFICATION_EN,
|
260 |
+
datatype=["markdown"] * len(DATA_CLASSIFICATION_EN.columns) * 20,
|
261 |
type="pandas",
|
|
|
262 |
)
|
263 |
with gr.Row():
|
264 |
+
data_run_classification_en = gr.Button("Refresh")
|
265 |
task_classification_en = gr.Variable(value="Classification")
|
|
|
266 |
lang_classification_en = gr.Variable(value=["en"])
|
267 |
+
data_run_classification_en.click(
|
268 |
get_mteb_data,
|
269 |
inputs=[
|
270 |
task_classification_en,
|
|
|
271 |
lang_classification_en,
|
272 |
],
|
273 |
outputs=data_classification_en,
|
|
|
277 |
gr.Markdown("""Multilingual Classification""")
|
278 |
with gr.Row():
|
279 |
data_classification = gr.components.Dataframe(
|
280 |
+
datatype=["markdown"] * 500, # hack when we don't know how many columns
|
281 |
type="pandas",
|
282 |
)
|
283 |
with gr.Row():
|
284 |
data_run = gr.Button("Refresh")
|
285 |
task_classification = gr.Variable(value="Classification")
|
|
|
286 |
data_run.click(
|
287 |
get_mteb_data,
|
288 |
+
inputs=[task_classification],
|
289 |
outputs=data_classification,
|
290 |
)
|
291 |
with gr.TabItem("Clustering"):
|
|
|
293 |
gr.Markdown("""Leaderboard for Clustering""")
|
294 |
with gr.Row():
|
295 |
data_clustering = gr.components.Dataframe(
|
296 |
+
DATA_CLUSTERING,
|
297 |
+
datatype="markdown",
|
298 |
type="pandas",
|
299 |
+
col_count=(len(DATA_CLUSTERING.columns), "fixed"),
|
300 |
)
|
301 |
with gr.Row():
|
302 |
data_run = gr.Button("Refresh")
|
303 |
task_clustering = gr.Variable(value="Clustering")
|
|
|
304 |
data_run.click(
|
305 |
get_mteb_data,
|
306 |
+
inputs=[task_clustering],
|
307 |
outputs=data_clustering,
|
308 |
)
|
309 |
+
with gr.TabItem("Pair Classification"):
|
310 |
+
with gr.Row():
|
311 |
+
gr.Markdown("""Leaderboard for Pair Classification""")
|
312 |
+
with gr.Row():
|
313 |
+
data_pair_classification = gr.components.Dataframe(
|
314 |
+
DATA_PAIR_CLASSIFICATION,
|
315 |
+
datatype="markdown",
|
316 |
+
type="pandas",
|
317 |
+
col_count=(len(DATA_PAIR_CLASSIFICATION.columns), "fixed"),
|
318 |
+
)
|
319 |
+
with gr.Row():
|
320 |
+
data_run = gr.Button("Refresh")
|
321 |
+
task_pair_classification = gr.Variable(value="Clustering")
|
322 |
+
data_run.click(
|
323 |
+
get_mteb_data,
|
324 |
+
inputs=[task_pair_classification],
|
325 |
+
outputs=data_pair_classification,
|
326 |
+
)
|
327 |
with gr.TabItem("Retrieval"):
|
328 |
with gr.Row():
|
329 |
gr.Markdown("""Leaderboard for Retrieval""")
|
330 |
with gr.Row():
|
331 |
data_retrieval = gr.components.Dataframe(
|
332 |
+
DATA_RETRIEVAL,
|
333 |
+
datatype=["markdown"] * len(DATA_RETRIEVAL.columns) * 2,
|
334 |
type="pandas",
|
335 |
)
|
336 |
with gr.Row():
|
337 |
data_run = gr.Button("Refresh")
|
338 |
task_retrieval = gr.Variable(value="Retrieval")
|
|
|
339 |
data_run.click(
|
340 |
+
get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
|
341 |
)
|
342 |
with gr.TabItem("Reranking"):
|
343 |
with gr.Row():
|
344 |
gr.Markdown("""Leaderboard for Reranking""")
|
345 |
with gr.Row():
|
346 |
data_reranking = gr.components.Dataframe(
|
347 |
+
DATA_RERANKING,
|
348 |
+
datatype="markdown",
|
349 |
type="pandas",
|
350 |
+
col_count=(len(DATA_RERANKING.columns), "fixed"),
|
351 |
)
|
352 |
with gr.Row():
|
353 |
data_run = gr.Button("Refresh")
|
354 |
task_reranking = gr.Variable(value="Reranking")
|
355 |
metric_reranking = gr.Variable(value="map")
|
356 |
data_run.click(
|
357 |
+
get_mteb_data, inputs=[task_reranking], outputs=data_reranking
|
358 |
)
|
359 |
with gr.TabItem("STS"):
|
360 |
with gr.TabItem("English"):
|
|
|
362 |
gr.Markdown("""Leaderboard for STS""")
|
363 |
with gr.Row():
|
364 |
data_sts_en = gr.components.Dataframe(
|
365 |
+
DATA_STS_EN,
|
366 |
+
datatype="markdown",
|
367 |
type="pandas",
|
368 |
+
col_count=(len(DATA_STS_EN.columns), "fixed"),
|
369 |
)
|
370 |
with gr.Row():
|
371 |
data_run_en = gr.Button("Refresh")
|
372 |
task_sts_en = gr.Variable(value="STS")
|
|
|
373 |
lang_sts_en = gr.Variable(value=["en", "en-en"])
|
374 |
data_run.click(
|
375 |
get_mteb_data,
|
376 |
+
inputs=[task_sts_en, lang_sts_en],
|
377 |
outputs=data_sts_en,
|
378 |
)
|
379 |
with gr.TabItem("Multilingual"):
|
|
|
381 |
gr.Markdown("""Leaderboard for STS""")
|
382 |
with gr.Row():
|
383 |
data_sts = gr.components.Dataframe(
|
384 |
+
datatype=["markdown"] * 50, # hack when we don't know how many columns
|
385 |
type="pandas",
|
386 |
)
|
387 |
with gr.Row():
|
388 |
data_run = gr.Button("Refresh")
|
389 |
task_sts = gr.Variable(value="STS")
|
390 |
+
data_run.click(get_mteb_data, inputs=[task_sts], outputs=data_sts)
|
|
|
391 |
with gr.TabItem("Summarization"):
|
392 |
with gr.Row():
|
393 |
gr.Markdown("""Leaderboard for Summarization""")
|
394 |
with gr.Row():
|
395 |
data_summarization = gr.components.Dataframe(
|
396 |
+
DATA_SUMMARIZATION,
|
397 |
+
datatype="markdown",
|
398 |
type="pandas",
|
399 |
+
col_count=(len(DATA_SUMMARIZATION.columns), "fixed"),
|
400 |
)
|
401 |
with gr.Row():
|
402 |
data_run = gr.Button("Refresh")
|
403 |
task_summarization = gr.Variable(value="Summarization")
|
|
|
404 |
data_run.click(
|
405 |
get_mteb_data,
|
406 |
+
inputs=[task_summarization],
|
407 |
outputs=data_summarization,
|
408 |
)
|
409 |
# running the function on page load in addition to when the button is clicked
|
410 |
+
block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
|
411 |
+
block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
|
412 |
+
block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
|
413 |
+
block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
|
414 |
+
block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
|
415 |
+
block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
|
416 |
+
block.load(get_mteb_data, inputs=[task_sts], outputs=data_sts)
|
417 |
+
block.load(get_mteb_data, inputs=[task_summarization], outputs=data_summarization)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
|
419 |
block.launch()
|
420 |
+
|
421 |
+
|
422 |
+
# Possible changes:
|
423 |
+
# Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
|
424 |
+
# Could make it load in the background without the Gradio logo closer to the Deep RL space
|
425 |
+
# Could add graphs / other visual content
|
426 |
+
|
427 |
+
# Sources:
|
428 |
+
# https://huggingface.co/spaces/gradio/leaderboard
|
429 |
+
# https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
|