orionweller commited on
Commit
cf7ddc6
1 Parent(s): 807cc67

add instruction following

Browse files
Files changed (2) hide show
  1. EXTERNAL_MODEL_RESULTS.json +0 -0
  2. app.py +114 -2
EXTERNAL_MODEL_RESULTS.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -226,6 +226,12 @@ TASK_LIST_RETRIEVAL_LAW = [
226
  "LegalSummarization",
227
  ]
228
 
 
 
 
 
 
 
229
  TASK_LIST_RETRIEVAL_PL = [
230
  "ArguAna-PL",
231
  "DBPedia-PL",
@@ -322,6 +328,7 @@ TASK_TO_METRIC = {
322
  "Retrieval": "ndcg_at_10",
323
  "STS": "cos_sim_spearman",
324
  "Summarization": "cos_sim_spearman",
 
325
  }
326
 
327
  def make_clickable_model(model_name, link=None):
@@ -339,6 +346,8 @@ EXTERNAL_MODELS = [
339
  "Cohere-embed-multilingual-v3.0",
340
  "Cohere-embed-multilingual-light-v3.0",
341
  "DanskBERT",
 
 
342
  "LASER2",
343
  "LLM2Vec-Llama-supervised",
344
  "LLM2Vec-Llama-unsupervised",
@@ -364,17 +373,22 @@ EXTERNAL_MODELS = [
364
  "bge-large-zh-v1.5",
365
  "bge-large-zh-noinstruct",
366
  "bge-small-zh-v1.5",
 
367
  "contriever-base-msmarco",
368
  "cross-en-de-roberta-sentence-transformer",
369
  "dfm-encoder-large-v1",
370
  "dfm-sentence-encoder-large-1",
371
  "distiluse-base-multilingual-cased-v2",
372
  "e5-base",
 
373
  "e5-large",
 
374
  "e5-mistral-7b-instruct",
375
  "e5-small",
376
  "electra-small-nordic",
377
  "electra-small-swedish-cased-discriminator",
 
 
378
  "flaubert_base_cased",
379
  "flaubert_base_uncased",
380
  "flaubert_large_cased",
@@ -391,11 +405,18 @@ EXTERNAL_MODELS = [
391
  "gtr-t5-xl",
392
  "gtr-t5-xxl",
393
  "herbert-base-retrieval-v2",
 
 
394
  "komninos",
 
395
  "luotuo-bert-medium",
396
  "m3e-base",
397
  "m3e-large",
 
398
  "mistral-embed",
 
 
 
399
  "msmarco-bert-co-condensor",
400
  "multi-qa-MiniLM-L6-cos-v1",
401
  "multilingual-e5-base",
@@ -423,6 +444,8 @@ EXTERNAL_MODELS = [
423
  "sup-simcse-bert-base-uncased",
424
  "st-polish-paraphrase-from-distilroberta",
425
  "st-polish-paraphrase-from-mpnet",
 
 
426
  "text2vec-base-chinese",
427
  "text2vec-base-multilingual",
428
  "text2vec-large-chinese",
@@ -470,6 +493,8 @@ EXTERNAL_MODEL_TO_LINK = {
470
  "LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
471
  "LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
472
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
 
 
473
  "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
474
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
475
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
@@ -488,6 +513,7 @@ EXTERNAL_MODEL_TO_LINK = {
488
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
489
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
490
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
 
491
  "camembert-base": "https://huggingface.co/almanach/camembert-base",
492
  "camembert-large": "https://huggingface.co/almanach/camembert-large",
493
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
@@ -501,11 +527,15 @@ EXTERNAL_MODEL_TO_LINK = {
501
  "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
502
  "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
503
  "e5-base": "https://huggingface.co/intfloat/e5-base",
 
504
  "e5-large": "https://huggingface.co/intfloat/e5-large",
 
505
  "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
506
  "e5-small": "https://huggingface.co/intfloat/e5-small",
507
  "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
508
  "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
 
 
509
  "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
510
  "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
511
  "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
@@ -522,11 +552,18 @@ EXTERNAL_MODEL_TO_LINK = {
522
  "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
523
  "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
524
  "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
 
 
525
  "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
 
526
  "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
527
  "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
528
  "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
 
529
  "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
 
 
 
530
  "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
531
  "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
532
  "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
@@ -554,6 +591,8 @@ EXTERNAL_MODEL_TO_LINK = {
554
  "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
555
  "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
556
  "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
 
 
557
  "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
558
  "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
559
  "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
@@ -593,6 +632,8 @@ EXTERNAL_MODEL_TO_DIM = {
593
  "Cohere-embed-multilingual-v3.0": 1024,
594
  "Cohere-embed-multilingual-light-v3.0": 384,
595
  "DanskBERT": 768,
 
 
596
  "LASER2": 1024,
597
  "LLM2Vec-Llama-supervised": 4096,
598
  "LLM2Vec-Llama-unsupervised": 4096,
@@ -617,6 +658,7 @@ EXTERNAL_MODEL_TO_DIM = {
617
  "bge-large-zh-v1.5": 1024,
618
  "bge-large-zh-noinstruct": 1024,
619
  "bge-small-zh-v1.5": 512,
 
620
  "camembert-base": 512,
621
  "camembert-large": 768,
622
  "contriever-base-msmarco": 768,
@@ -630,11 +672,15 @@ EXTERNAL_MODEL_TO_DIM = {
630
  "dfm-encoder-large-v1": 1024,
631
  "dfm-sentence-encoder-large-1": 1024,
632
  "e5-base": 768,
 
633
  "e5-large": 1024,
 
634
  "e5-mistral-7b-instruct": 4096,
635
  "e5-small": 384,
636
  "electra-small-nordic": 256,
637
  "electra-small-swedish-cased-discriminator": 256,
 
 
638
  "flaubert_base_cased": 768,
639
  "flaubert_base_uncased": 768,
640
  "flaubert_large_cased": 1024,
@@ -652,10 +698,17 @@ EXTERNAL_MODEL_TO_DIM = {
652
  "gtr-t5-xl": 768,
653
  "gtr-t5-xxl": 768,
654
  "herbert-base-retrieval-v2": 768,
 
 
655
  "komninos": 300,
 
656
  "m3e-base": 768,
657
  "m3e-large": 768,
 
658
  "mistral-embed": 1024,
 
 
 
659
  "msmarco-bert-co-condensor": 768,
660
  "multi-qa-MiniLM-L6-cos-v1": 384,
661
  "multilingual-e5-base": 768,
@@ -684,6 +737,8 @@ EXTERNAL_MODEL_TO_DIM = {
684
  "sup-simcse-bert-base-uncased": 768,
685
  "st-polish-paraphrase-from-distilroberta": 768,
686
  "st-polish-paraphrase-from-mpnet": 768,
 
 
687
  "text2vec-base-chinese": 768,
688
  "text2vec-large-chinese": 1024,
689
  "text-embedding-3-large": 3072,
@@ -723,6 +778,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
723
  "Cohere-embed-multilingual-v3.0": 512,
724
  "Cohere-embed-multilingual-light-v3.0": 512,
725
  "DanskBERT": 514,
 
 
726
  "LASER2": "N/A",
727
  "LLM2Vec-Llama-supervised": 4096,
728
  "LLM2Vec-Llama-unsupervised": 4096,
@@ -760,11 +817,15 @@ EXTERNAL_MODEL_TO_SEQLEN = {
760
  "dfm-sentence-encoder-large-1": 512,
761
  "distiluse-base-multilingual-cased-v2": 512,
762
  "e5-base": 512,
 
763
  "e5-large": 512,
 
764
  "e5-mistral-7b-instruct": 32768,
765
  "e5-small": 512,
766
  "electra-small-nordic": 512,
767
  "electra-small-swedish-cased-discriminator": 512,
 
 
768
  "flaubert_base_cased": 512,
769
  "flaubert_base_uncased": 512,
770
  "flaubert_large_cased": 512,
@@ -781,11 +842,18 @@ EXTERNAL_MODEL_TO_SEQLEN = {
781
  "gtr-t5-xl": 512,
782
  "gtr-t5-xxl": 512,
783
  "herbert-base-retrieval-v2": 514,
 
 
784
  "komninos": "N/A",
 
785
  "luotuo-bert-medium": 512,
786
  "m3e-base": 512,
787
  "m3e-large": 512,
 
788
  # "mistral-embed": "?",
 
 
 
789
  "msmarco-bert-co-condensor": 512,
790
  "multi-qa-MiniLM-L6-cos-v1": 512,
791
  "multilingual-e5-base": 514,
@@ -814,6 +882,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
814
  "sup-simcse-bert-base-uncased": 512,
815
  "st-polish-paraphrase-from-distilroberta": 514,
816
  "st-polish-paraphrase-from-mpnet": 514,
 
 
817
  "text2vec-base-chinese": 512,
818
  "text2vec-large-chinese": 512,
819
  "text-embedding-3-large": 8191,
@@ -849,6 +919,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
849
 
850
  EXTERNAL_MODEL_TO_SIZE = {
851
  "DanskBERT": 125,
 
 
852
  "LASER2": 43,
853
  "LLM2Vec-Llama-supervised": 6607,
854
  "LLM2Vec-Llama-unsupervised": 6607,
@@ -872,6 +944,7 @@ EXTERNAL_MODEL_TO_SIZE = {
872
  "bge-large-zh-v1.5": 326,
873
  "bge-large-zh-noinstruct": 326,
874
  "bge-small-zh-v1.5": 24,
 
875
  "camembert-base": 111,
876
  "camembert-large": 338,
877
  "cross-en-de-roberta-sentence-transformer": 278,
@@ -885,11 +958,15 @@ EXTERNAL_MODEL_TO_SIZE = {
885
  "dfm-encoder-large-v1": 355,
886
  "dfm-sentence-encoder-large-1": 355,
887
  "e5-base": 110,
 
888
  "e5-large": 335,
 
889
  "e5-mistral-7b-instruct": 7111,
890
  "e5-small": 33,
891
  "electra-small-nordic": 23,
892
  "electra-small-swedish-cased-discriminator": 16,
 
 
893
  "flaubert_base_cased": 138,
894
  "flaubert_base_uncased": 138,
895
  "flaubert_large_cased": 372,
@@ -906,11 +983,18 @@ EXTERNAL_MODEL_TO_SIZE = {
906
  "gtr-t5-xl": 1240,
907
  "gtr-t5-xxl": 4865,
908
  "herbert-base-retrieval-v2": 125,
 
 
909
  "komninos": 134,
 
910
  "luotuo-bert-medium": 328,
911
  "m3e-base": 102,
912
  "m3e-large": 102,
 
913
  "msmarco-bert-co-condensor": 110,
 
 
 
914
  "multi-qa-MiniLM-L6-cos-v1": 23,
915
  "multilingual-e5-base": 278,
916
  "multilingual-e5-small": 118,
@@ -936,7 +1020,9 @@ EXTERNAL_MODEL_TO_SIZE = {
936
  "silver-retriever-base-v1": 125,
937
  "sup-simcse-bert-base-uncased": 110,
938
  "st-polish-paraphrase-from-distilroberta": 125,
939
- "st-polish-paraphrase-from-mpnet": 125,
 
 
940
  "text2vec-base-chinese": 102,
941
  "text2vec-large-chinese": 326,
942
  "unsup-simcse-bert-base-uncased": 110,
@@ -1014,7 +1100,9 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
1014
  "dfm-encoder-large-v1",
1015
  "dfm-sentence-encoder-large-1",
1016
  "e5-base",
 
1017
  "e5-large",
 
1018
  "e5-mistral-7b-instruct",
1019
  "e5-small",
1020
  "electra-small-nordic",
@@ -1065,6 +1153,7 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
1065
  "sup-simcse-bert-base-uncased",
1066
  "st-polish-paraphrase-from-distilroberta",
1067
  "st-polish-paraphrase-from-mpnet",
 
1068
  "text2vec-base-chinese",
1069
  "text2vec-large-chinese",
1070
  "udever-bloom-1b1",
@@ -1247,6 +1336,8 @@ def add_task(examples):
1247
  examples["mteb_task"] = "Summarization"
1248
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
1249
  examples["mteb_task"] = "BitextMining"
 
 
1250
  else:
1251
  print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
1252
  examples["mteb_task"] = "Unknown"
@@ -1333,7 +1424,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
1333
  # Initialize list to models that we cannot fetch metadata from
1334
  df_list = []
1335
  for model in EXTERNAL_MODEL_RESULTS:
1336
- results_list = [res for task in tasks for res in EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]]
 
 
 
 
 
 
1337
  if len(datasets) > 0:
1338
  res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
1339
  elif langs:
@@ -1659,6 +1756,7 @@ DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLAS
1659
  DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
1660
  DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
1661
  DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
 
1662
 
1663
  # Exact, add all non-nan integer values for every dataset
1664
  NUM_SCORES = 0
@@ -1693,6 +1791,7 @@ for d in [
1693
  DATA_RETRIEVAL_PL,
1694
  DATA_RETRIEVAL_ZH,
1695
  DATA_RETRIEVAL_LAW,
 
1696
  DATA_STS_EN,
1697
  DATA_STS_FR,
1698
  DATA_STS_PL,
@@ -1751,6 +1850,7 @@ french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](ht
1751
  danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
1752
  norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
1753
  polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
 
1754
 
1755
  data = {
1756
  "Overall": {
@@ -2057,6 +2157,18 @@ data = {
2057
  "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
2058
  }
2059
  ]
 
 
 
 
 
 
 
 
 
 
 
 
2060
  }
2061
  }
2062
 
 
226
  "LegalSummarization",
227
  ]
228
 
229
+ TASK_LIST_RETRIEVAL_INSTRUCTIONS = [
230
+ "Robust04InstructionRetrieval",
231
+ "News21InstructionRetrieval",
232
+ "Core17InstructionRetrieval",
233
+ ]
234
+
235
  TASK_LIST_RETRIEVAL_PL = [
236
  "ArguAna-PL",
237
  "DBPedia-PL",
 
328
  "Retrieval": "ndcg_at_10",
329
  "STS": "cos_sim_spearman",
330
  "Summarization": "cos_sim_spearman",
331
+ "InstructionRetrieval": "p-MRR",
332
  }
333
 
334
  def make_clickable_model(model_name, link=None):
 
346
  "Cohere-embed-multilingual-v3.0",
347
  "Cohere-embed-multilingual-light-v3.0",
348
  "DanskBERT",
349
+ "FollowIR-7B",
350
+ "GritLM-7B",
351
  "LASER2",
352
  "LLM2Vec-Llama-supervised",
353
  "LLM2Vec-Llama-unsupervised",
 
373
  "bge-large-zh-v1.5",
374
  "bge-large-zh-noinstruct",
375
  "bge-small-zh-v1.5",
376
+ "bm25",
377
  "contriever-base-msmarco",
378
  "cross-en-de-roberta-sentence-transformer",
379
  "dfm-encoder-large-v1",
380
  "dfm-sentence-encoder-large-1",
381
  "distiluse-base-multilingual-cased-v2",
382
  "e5-base",
383
+ "e5-base-v2",
384
  "e5-large",
385
+ "e5-large-v2",
386
  "e5-mistral-7b-instruct",
387
  "e5-small",
388
  "electra-small-nordic",
389
  "electra-small-swedish-cased-discriminator",
390
+ "flan-t5-base",
391
+ "flan-t5-large",
392
  "flaubert_base_cased",
393
  "flaubert_base_uncased",
394
  "flaubert_large_cased",
 
405
  "gtr-t5-xl",
406
  "gtr-t5-xxl",
407
  "herbert-base-retrieval-v2",
408
+ "instructor-base",
409
+ "instructor-xl",
410
  "komninos",
411
+ "llama-2-7b-chat",
412
  "luotuo-bert-medium",
413
  "m3e-base",
414
  "m3e-large",
415
+ "mistral-7b-instruct-v0.2",
416
  "mistral-embed",
417
+ "monobert-large-msmarco",
418
+ "monot5-3b-msmarco-10k",
419
+ "monot5-base-msmarco-10k",
420
  "msmarco-bert-co-condensor",
421
  "multi-qa-MiniLM-L6-cos-v1",
422
  "multilingual-e5-base",
 
444
  "sup-simcse-bert-base-uncased",
445
  "st-polish-paraphrase-from-distilroberta",
446
  "st-polish-paraphrase-from-mpnet",
447
+ "tart-dual-contriever-msmarco",
448
+ "tart-full-flan-t5-xl",
449
  "text2vec-base-chinese",
450
  "text2vec-base-multilingual",
451
  "text2vec-large-chinese",
 
493
  "LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
494
  "LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
495
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
496
+ "FollowIR-7B": "https://huggingface.co/jhu-clsp/FollowIR-7B",
497
+ "GritLM-7B": "https://huggingface.co/GritLM/GritLM-7B",
498
  "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
499
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
500
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
 
513
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
514
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
515
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
516
+ "bm25": "https://en.wikipedia.org/wiki/Okapi_BM25",
517
  "camembert-base": "https://huggingface.co/almanach/camembert-base",
518
  "camembert-large": "https://huggingface.co/almanach/camembert-large",
519
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
 
527
  "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
528
  "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
529
  "e5-base": "https://huggingface.co/intfloat/e5-base",
530
+ "e5-base-v2": "https://huggingface.co/intfloat/e5-base-v2",
531
  "e5-large": "https://huggingface.co/intfloat/e5-large",
532
+ "e5-large-v2": "https://huggingface.co/intfloat/e5-large-v2",
533
  "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
534
  "e5-small": "https://huggingface.co/intfloat/e5-small",
535
  "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
536
  "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
537
+ "flan-t5-base": "https://huggingface.co/google/flan-t5-base",
538
+ "flan-t5-large": "https://huggingface.co/google/flan-t5-large",
539
  "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
540
  "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
541
  "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
 
552
  "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
553
  "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
554
  "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
555
+ "instructor-base": "https://huggingface.co/hkunlp/instructor-base",
556
+ "instructor-xl": "https://huggingface.co/hkunlp/instructor-xl",
557
  "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
558
+ "llama-2-7b-chat": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
559
  "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
560
  "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
561
  "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
562
+ "mistral-7b-instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
563
  "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
564
+ "monobert-large-msmarco": "https://huggingface.co/castorini/monobert-large-msmarco",
565
+ "monot5-3b-msmarco-10k": "https://huggingface.co/castorini/monot5-3b-msmarco-10k",
566
+ "monot5-base-msmarco-10k": "https://huggingface.co/castorini/monot5-base-msmarco-10k",
567
  "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
568
  "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
569
  "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
 
591
  "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
592
  "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
593
  "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
594
+ "tart-dual-contriever-msmarco": "https://huggingface.co/orionweller/tart-dual-contriever-msmarco",
595
+ "tart-full-flan-t5-xl": "https://huggingface.co/facebook/tart-full-flan-t5-xl",
596
  "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
597
  "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
598
  "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
 
632
  "Cohere-embed-multilingual-v3.0": 1024,
633
  "Cohere-embed-multilingual-light-v3.0": 384,
634
  "DanskBERT": 768,
635
+ "FollowIR-7B": -1,
636
+ "GritLM-7B": 4096,
637
  "LASER2": 1024,
638
  "LLM2Vec-Llama-supervised": 4096,
639
  "LLM2Vec-Llama-unsupervised": 4096,
 
658
  "bge-large-zh-v1.5": 1024,
659
  "bge-large-zh-noinstruct": 1024,
660
  "bge-small-zh-v1.5": 512,
661
+ "bm25": -1,
662
  "camembert-base": 512,
663
  "camembert-large": 768,
664
  "contriever-base-msmarco": 768,
 
672
  "dfm-encoder-large-v1": 1024,
673
  "dfm-sentence-encoder-large-1": 1024,
674
  "e5-base": 768,
675
+ "e5-base-v2": 768,
676
  "e5-large": 1024,
677
+ "e5-large-v2": 1024,
678
  "e5-mistral-7b-instruct": 4096,
679
  "e5-small": 384,
680
  "electra-small-nordic": 256,
681
  "electra-small-swedish-cased-discriminator": 256,
682
+ "flan-t5-base": -1,
683
+ "flan-t5-large": -1,
684
  "flaubert_base_cased": 768,
685
  "flaubert_base_uncased": 768,
686
  "flaubert_large_cased": 1024,
 
698
  "gtr-t5-xl": 768,
699
  "gtr-t5-xxl": 768,
700
  "herbert-base-retrieval-v2": 768,
701
+ "instructor-base": 768,
702
+ "instructor-xl": 768,
703
  "komninos": 300,
704
+ "llama-2-7b-chat": -1,
705
  "m3e-base": 768,
706
  "m3e-large": 768,
707
+ "mistral-7b-instruct-v0.2": -1,
708
  "mistral-embed": 1024,
709
+ "monobert-large-msmarco": -1,
710
+ "monot5-3b-msmarco-10k": -1,
711
+ "monot5-base-msmarco-10k": -1,
712
  "msmarco-bert-co-condensor": 768,
713
  "multi-qa-MiniLM-L6-cos-v1": 384,
714
  "multilingual-e5-base": 768,
 
737
  "sup-simcse-bert-base-uncased": 768,
738
  "st-polish-paraphrase-from-distilroberta": 768,
739
  "st-polish-paraphrase-from-mpnet": 768,
740
+ "tart-dual-contriever-msmarco": 768,
741
+ "tart-full-flan-t5-xl": -1,
742
  "text2vec-base-chinese": 768,
743
  "text2vec-large-chinese": 1024,
744
  "text-embedding-3-large": 3072,
 
778
  "Cohere-embed-multilingual-v3.0": 512,
779
  "Cohere-embed-multilingual-light-v3.0": 512,
780
  "DanskBERT": 514,
781
+ "FollowIR-7B": 32768,
782
+ "GritLM-7B": 32768,
783
  "LASER2": "N/A",
784
  "LLM2Vec-Llama-supervised": 4096,
785
  "LLM2Vec-Llama-unsupervised": 4096,
 
817
  "dfm-sentence-encoder-large-1": 512,
818
  "distiluse-base-multilingual-cased-v2": 512,
819
  "e5-base": 512,
820
+ "e5-base-v2": 512,
821
  "e5-large": 512,
822
+ "e5-large-v2": 512,
823
  "e5-mistral-7b-instruct": 32768,
824
  "e5-small": 512,
825
  "electra-small-nordic": 512,
826
  "electra-small-swedish-cased-discriminator": 512,
827
+ "flan-t5-base": 512,
828
+ "flan-t5-large": 512,
829
  "flaubert_base_cased": 512,
830
  "flaubert_base_uncased": 512,
831
  "flaubert_large_cased": 512,
 
842
  "gtr-t5-xl": 512,
843
  "gtr-t5-xxl": 512,
844
  "herbert-base-retrieval-v2": 514,
845
+ "instructor-base": 512,
846
+ "instructor-xl": 512,
847
  "komninos": "N/A",
848
+ "llama-2-7b-chat": 4096,
849
  "luotuo-bert-medium": 512,
850
  "m3e-base": 512,
851
  "m3e-large": 512,
852
+ "mistral-7b-instruct-v0.2": 32768,
853
  # "mistral-embed": "?",
854
+ "monobert-large-msmarco": 512,
855
+ "monot5-3b-msmarco-10k": 512,
856
+ "monot5-base-msmarco-10k": 512,
857
  "msmarco-bert-co-condensor": 512,
858
  "multi-qa-MiniLM-L6-cos-v1": 512,
859
  "multilingual-e5-base": 514,
 
882
  "sup-simcse-bert-base-uncased": 512,
883
  "st-polish-paraphrase-from-distilroberta": 514,
884
  "st-polish-paraphrase-from-mpnet": 514,
885
+ "tart-dual-contriever-msmarco": 512,
886
+ "tart-full-flan-t5-xl": 512,
887
  "text2vec-base-chinese": 512,
888
  "text2vec-large-chinese": 512,
889
  "text-embedding-3-large": 8191,
 
919
 
920
  EXTERNAL_MODEL_TO_SIZE = {
921
  "DanskBERT": 125,
922
+ "FollowIR-7B": 7242,
923
+ "GritLM-7B": 7242,
924
  "LASER2": 43,
925
  "LLM2Vec-Llama-supervised": 6607,
926
  "LLM2Vec-Llama-unsupervised": 6607,
 
944
  "bge-large-zh-v1.5": 326,
945
  "bge-large-zh-noinstruct": 326,
946
  "bge-small-zh-v1.5": 24,
947
+ "bm25": 0,
948
  "camembert-base": 111,
949
  "camembert-large": 338,
950
  "cross-en-de-roberta-sentence-transformer": 278,
 
958
  "dfm-encoder-large-v1": 355,
959
  "dfm-sentence-encoder-large-1": 355,
960
  "e5-base": 110,
961
+ "e5-base-v2": 110,
962
  "e5-large": 335,
963
+ "e5-large-v2": 335,
964
  "e5-mistral-7b-instruct": 7111,
965
  "e5-small": 33,
966
  "electra-small-nordic": 23,
967
  "electra-small-swedish-cased-discriminator": 16,
968
+ "flan-t5-base": 220,
969
+ "flan-t5-large": 770,
970
  "flaubert_base_cased": 138,
971
  "flaubert_base_uncased": 138,
972
  "flaubert_large_cased": 372,
 
983
  "gtr-t5-xl": 1240,
984
  "gtr-t5-xxl": 4865,
985
  "herbert-base-retrieval-v2": 125,
986
+ "instructor-base": 110,
987
+ "instructor-xl": 1241,
988
  "komninos": 134,
989
+ "llama-2-7b-chat": 7000,
990
  "luotuo-bert-medium": 328,
991
  "m3e-base": 102,
992
  "m3e-large": 102,
993
+ "mistral-7b-instruct-v0.2": 7111,
994
  "msmarco-bert-co-condensor": 110,
995
+ "monobert-large-msmarco": 335,
996
+ "monot5-3b-msmarco-10k": 2480,
997
+ "monot5-base-msmarco-10k": 220,
998
  "multi-qa-MiniLM-L6-cos-v1": 23,
999
  "multilingual-e5-base": 278,
1000
  "multilingual-e5-small": 118,
 
1020
  "silver-retriever-base-v1": 125,
1021
  "sup-simcse-bert-base-uncased": 110,
1022
  "st-polish-paraphrase-from-distilroberta": 125,
1023
+ "st-polish-paraphrase-from-mpnet": 125,
1024
+ "tart-dual-contriever-msmarco": 110,
1025
+ "tart-full-flan-t5-xl": 2480,
1026
  "text2vec-base-chinese": 102,
1027
  "text2vec-large-chinese": 326,
1028
  "unsup-simcse-bert-base-uncased": 110,
 
1100
  "dfm-encoder-large-v1",
1101
  "dfm-sentence-encoder-large-1",
1102
  "e5-base",
1103
+ "e5-base-v2",
1104
  "e5-large",
1105
+ "e5-large-v2",
1106
  "e5-mistral-7b-instruct",
1107
  "e5-small",
1108
  "electra-small-nordic",
 
1153
  "sup-simcse-bert-base-uncased",
1154
  "st-polish-paraphrase-from-distilroberta",
1155
  "st-polish-paraphrase-from-mpnet",
1156
+ "tart-dual-contriever-msmarco",
1157
  "text2vec-base-chinese",
1158
  "text2vec-large-chinese",
1159
  "udever-bloom-1b1",
 
1336
  examples["mteb_task"] = "Summarization"
1337
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
1338
  examples["mteb_task"] = "BitextMining"
1339
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_INSTRUCTIONS):
1340
+ examples["mteb_task"] = "InstructionRetrieval"
1341
  else:
1342
  print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
1343
  examples["mteb_task"] = "Unknown"
 
1424
  # Initialize list to models that we cannot fetch metadata from
1425
  df_list = []
1426
  for model in EXTERNAL_MODEL_RESULTS:
1427
+ results_list = []
1428
+ for task in tasks:
1429
+ # Not all models have InstructionRetrieval, other new tasks
1430
+ if task not in EXTERNAL_MODEL_RESULTS[model]:
1431
+ continue
1432
+ results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
1433
+
1434
  if len(datasets) > 0:
1435
  res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
1436
  elif langs:
 
1756
  DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
1757
  DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
1758
  DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
1759
+ DATA_RETRIEVAL_INSTRUCTIONS = get_mteb_data(["InstructionRetrieval"], [], TASK_LIST_RETRIEVAL_INSTRUCTIONS)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_INSTRUCTIONS]
1760
 
1761
  # Exact, add all non-nan integer values for every dataset
1762
  NUM_SCORES = 0
 
1791
  DATA_RETRIEVAL_PL,
1792
  DATA_RETRIEVAL_ZH,
1793
  DATA_RETRIEVAL_LAW,
1794
+ DATA_RETRIEVAL_INSTRUCTIONS,
1795
  DATA_STS_EN,
1796
  DATA_STS_FR,
1797
  DATA_STS_PL,
 
1850
  danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
1851
  norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
1852
  polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
1853
+ instruction_credits = "[Orion Weller, FollowIR paper](https://arxiv.org/abs/2403.15246)"
1854
 
1855
  data = {
1856
  "Overall": {
 
2157
  "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
2158
  }
2159
  ]
2160
+ },
2161
+ "Retrieval w/Instructions": {
2162
+ "metric": "paired mean reciprocal rank (p-MRR)",
2163
+ "data": [
2164
+ {
2165
+ "language": "English",
2166
+ "description": "**Retrieval with Instructions Leaderboard** 🔎📋",
2167
+ "credits": instruction_credits,
2168
+ "data": DATA_RETRIEVAL_INSTRUCTIONS,
2169
+ "refresh": partial(get_mteb_data, tasks=TASK_LIST_RETRIEVAL_INSTRUCTIONS)
2170
+ }
2171
+ ]
2172
  }
2173
  }
2174