Muennighoff commited on
Commit
64dd40c
1 Parent(s): 4af9e8d

Add embedding dimensions

Browse files
Files changed (1) hide show
  1. app.py +54 -5
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from datasets import load_dataset
2
  import gradio as gr
3
  from huggingface_hub import HfApi, hf_hub_download
@@ -193,6 +195,35 @@ EXTERNAL_MODEL_TO_LINK = {
193
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
194
  }
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
198
 
@@ -236,8 +267,22 @@ for model in EXTERNAL_MODELS:
236
  ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
237
  EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
238
 
239
-
240
- def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TASK_TO_METRIC):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  api = HfApi()
242
  models = api.list_models(filter="mteb")
243
  # Initialize list to models that we cannot fetch metadata from
@@ -252,6 +297,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
252
  res = {k: v for d in results_list for k, v in d.items()}
253
  # Model & at least one result
254
  if len(res) > 1:
 
255
  df_list.append(res)
256
 
257
  for model in models:
@@ -279,6 +325,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
279
  out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
280
  out = {k: v for d in out for k, v in d.items()}
281
  out["Model"] = make_clickable_model(model.modelId)
 
 
282
  df_list.append(out)
283
  df = pd.DataFrame(df_list)
284
  # Put 'Model' column first
@@ -302,7 +350,8 @@ def get_mteb_average():
302
  "Summarization",
303
  ],
304
  langs=["en", "en-en"],
305
- fillna=False
 
306
  )
307
  # Approximation (Missing Bitext Mining & including some nans)
308
  NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
@@ -335,7 +384,7 @@ def get_mteb_average():
335
  DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
336
  DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
337
 
338
- DATA_OVERALL = DATA_OVERALL[["Rank", "Model", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
339
 
340
  return DATA_OVERALL
341
 
@@ -377,7 +426,7 @@ with block:
377
  **Bitext Mining Leaderboard 🎌**
378
 
379
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
380
- - **Languages:** 112
381
  """)
382
  with gr.Row():
383
  data_bitext_mining = gr.components.Dataframe(
 
1
+ import json
2
+
3
  from datasets import load_dataset
4
  import gradio as gr
5
  from huggingface_hub import HfApi, hf_hub_download
 
195
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
196
  }
197
 
198
+ EXTERNAL_MODEL_TO_DIM = {
199
+ "LASER2": 1024,
200
+ "LaBSE": 768,
201
+ "all-MiniLM-L12-v2": 384,
202
+ "all-MiniLM-L6-v2": 384,
203
+ "all-mpnet-base-v2": 768,
204
+ "allenai-specter": 768,
205
+ "bert-base-uncased": 768,
206
+ "contriever-base-msmarco": 768,
207
+ "glove.6B.300d": 300,
208
+ "gtr-t5-base": 768,
209
+ "gtr-t5-large": 768,
210
+ "gtr-t5-xl": 768,
211
+ "gtr-t5-xxl": 768,
212
+ "komninos": 300,
213
+ "msmarco-bert-co-condensor": 768,
214
+ "paraphrase-multilingual-MiniLM-L12-v2": 384,
215
+ "paraphrase-multilingual-mpnet-base-v2": 768,
216
+ "sentence-t5-base": 768,
217
+ "sentence-t5-large": 768,
218
+ "sentence-t5-xl": 768,
219
+ "sentence-t5-xxl": 768,
220
+ "sup-simcse-bert-base-uncased": 768,
221
+ "text-similarity-ada-001": 1024,
222
+ "text-search-ada-query-001": 1024,
223
+ "text-search-ada-doc-001": 1024,
224
+ "unsup-simcse-bert-base-uncased": 768,
225
+ }
226
+
227
 
228
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
229
 
 
267
  ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
268
  EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
269
 
270
+ def get_emb_dim(model):
271
+ filenames = [sib.rfilename for sib in model.siblings]
272
+ dim = ""
273
+ if "1_Pooling/config.json" in filenames:
274
+ st_config_path = hf_hub_download(model.modelId, filename="1_Pooling/config.json")
275
+ dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
276
+ elif "2_Pooling/config.json" in filenames:
277
+ st_config_path = hf_hub_download(model.modelId, filename="2_Pooling/config.json")
278
+ dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
279
+ elif "config.json" in filenames:
280
+ config_path = hf_hub_download(model.modelId, filename="config.json")
281
+ dim = json.load(open(config_path)).get("hidden_dim", "")
282
+ return dim
283
+
284
+
285
+ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False, task_to_metric=TASK_TO_METRIC):
286
  api = HfApi()
287
  models = api.list_models(filter="mteb")
288
  # Initialize list to models that we cannot fetch metadata from
 
297
  res = {k: v for d in results_list for k, v in d.items()}
298
  # Model & at least one result
299
  if len(res) > 1:
300
+ res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
301
  df_list.append(res)
302
 
303
  for model in models:
 
325
  out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
326
  out = {k: v for d in out for k, v in d.items()}
327
  out["Model"] = make_clickable_model(model.modelId)
328
+ if add_emb_dim:
329
+ out["Embedding Dimensions"] = get_emb_dim(model)
330
  df_list.append(out)
331
  df = pd.DataFrame(df_list)
332
  # Put 'Model' column first
 
350
  "Summarization",
351
  ],
352
  langs=["en", "en-en"],
353
+ fillna=False,
354
+ add_emb_dim=True,
355
  )
356
  # Approximation (Missing Bitext Mining & including some nans)
357
  NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
 
384
  DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
385
  DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
386
 
387
+ DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Embedding Dimensions", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
388
 
389
  return DATA_OVERALL
390
 
 
426
  **Bitext Mining Leaderboard 🎌**
427
 
428
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
429
+ - **Languages:** 117
430
  """)
431
  with gr.Row():
432
  data_bitext_mining = gr.components.Dataframe(