Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Muennighoff
commited on
Commit
β’
64dd40c
1
Parent(s):
4af9e8d
Add embedding dimensions
Browse files
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
from datasets import load_dataset
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import HfApi, hf_hub_download
|
@@ -193,6 +195,35 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
193 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
194 |
}
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
198 |
|
@@ -236,8 +267,22 @@ for model in EXTERNAL_MODELS:
|
|
236 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
237 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
238 |
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
api = HfApi()
|
242 |
models = api.list_models(filter="mteb")
|
243 |
# Initialize list to models that we cannot fetch metadata from
|
@@ -252,6 +297,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
|
|
252 |
res = {k: v for d in results_list for k, v in d.items()}
|
253 |
# Model & at least one result
|
254 |
if len(res) > 1:
|
|
|
255 |
df_list.append(res)
|
256 |
|
257 |
for model in models:
|
@@ -279,6 +325,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
|
|
279 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
280 |
out = {k: v for d in out for k, v in d.items()}
|
281 |
out["Model"] = make_clickable_model(model.modelId)
|
|
|
|
|
282 |
df_list.append(out)
|
283 |
df = pd.DataFrame(df_list)
|
284 |
# Put 'Model' column first
|
@@ -302,7 +350,8 @@ def get_mteb_average():
|
|
302 |
"Summarization",
|
303 |
],
|
304 |
langs=["en", "en-en"],
|
305 |
-
fillna=False
|
|
|
306 |
)
|
307 |
# Approximation (Missing Bitext Mining & including some nans)
|
308 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
@@ -335,7 +384,7 @@ def get_mteb_average():
|
|
335 |
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
336 |
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
337 |
|
338 |
-
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
339 |
|
340 |
return DATA_OVERALL
|
341 |
|
@@ -377,7 +426,7 @@ with block:
|
|
377 |
**Bitext Mining Leaderboard π**
|
378 |
|
379 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
380 |
-
- **Languages:**
|
381 |
""")
|
382 |
with gr.Row():
|
383 |
data_bitext_mining = gr.components.Dataframe(
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
from datasets import load_dataset
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import HfApi, hf_hub_download
|
|
|
195 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
196 |
}
|
197 |
|
198 |
+
EXTERNAL_MODEL_TO_DIM = {
|
199 |
+
"LASER2": 1024,
|
200 |
+
"LaBSE": 768,
|
201 |
+
"all-MiniLM-L12-v2": 384,
|
202 |
+
"all-MiniLM-L6-v2": 384,
|
203 |
+
"all-mpnet-base-v2": 768,
|
204 |
+
"allenai-specter": 768,
|
205 |
+
"bert-base-uncased": 768,
|
206 |
+
"contriever-base-msmarco": 768,
|
207 |
+
"glove.6B.300d": 300,
|
208 |
+
"gtr-t5-base": 768,
|
209 |
+
"gtr-t5-large": 768,
|
210 |
+
"gtr-t5-xl": 768,
|
211 |
+
"gtr-t5-xxl": 768,
|
212 |
+
"komninos": 300,
|
213 |
+
"msmarco-bert-co-condensor": 768,
|
214 |
+
"paraphrase-multilingual-MiniLM-L12-v2": 384,
|
215 |
+
"paraphrase-multilingual-mpnet-base-v2": 768,
|
216 |
+
"sentence-t5-base": 768,
|
217 |
+
"sentence-t5-large": 768,
|
218 |
+
"sentence-t5-xl": 768,
|
219 |
+
"sentence-t5-xxl": 768,
|
220 |
+
"sup-simcse-bert-base-uncased": 768,
|
221 |
+
"text-similarity-ada-001": 1024,
|
222 |
+
"text-search-ada-query-001": 1024,
|
223 |
+
"text-search-ada-doc-001": 1024,
|
224 |
+
"unsup-simcse-bert-base-uncased": 768,
|
225 |
+
}
|
226 |
+
|
227 |
|
228 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
229 |
|
|
|
267 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
268 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
269 |
|
270 |
+
def get_emb_dim(model):
|
271 |
+
filenames = [sib.rfilename for sib in model.siblings]
|
272 |
+
dim = ""
|
273 |
+
if "1_Pooling/config.json" in filenames:
|
274 |
+
st_config_path = hf_hub_download(model.modelId, filename="1_Pooling/config.json")
|
275 |
+
dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
|
276 |
+
elif "2_Pooling/config.json" in filenames:
|
277 |
+
st_config_path = hf_hub_download(model.modelId, filename="2_Pooling/config.json")
|
278 |
+
dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
|
279 |
+
elif "config.json" in filenames:
|
280 |
+
config_path = hf_hub_download(model.modelId, filename="config.json")
|
281 |
+
dim = json.load(open(config_path)).get("hidden_dim", "")
|
282 |
+
return dim
|
283 |
+
|
284 |
+
|
285 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False, task_to_metric=TASK_TO_METRIC):
|
286 |
api = HfApi()
|
287 |
models = api.list_models(filter="mteb")
|
288 |
# Initialize list to models that we cannot fetch metadata from
|
|
|
297 |
res = {k: v for d in results_list for k, v in d.items()}
|
298 |
# Model & at least one result
|
299 |
if len(res) > 1:
|
300 |
+
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
301 |
df_list.append(res)
|
302 |
|
303 |
for model in models:
|
|
|
325 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
326 |
out = {k: v for d in out for k, v in d.items()}
|
327 |
out["Model"] = make_clickable_model(model.modelId)
|
328 |
+
if add_emb_dim:
|
329 |
+
out["Embedding Dimensions"] = get_emb_dim(model)
|
330 |
df_list.append(out)
|
331 |
df = pd.DataFrame(df_list)
|
332 |
# Put 'Model' column first
|
|
|
350 |
"Summarization",
|
351 |
],
|
352 |
langs=["en", "en-en"],
|
353 |
+
fillna=False,
|
354 |
+
add_emb_dim=True,
|
355 |
)
|
356 |
# Approximation (Missing Bitext Mining & including some nans)
|
357 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
|
|
384 |
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
385 |
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
386 |
|
387 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Embedding Dimensions", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
388 |
|
389 |
return DATA_OVERALL
|
390 |
|
|
|
426 |
**Bitext Mining Leaderboard π**
|
427 |
|
428 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
429 |
+
- **Languages:** 117
|
430 |
""")
|
431 |
with gr.Row():
|
432 |
data_bitext_mining = gr.components.Dataframe(
|