leaderboard_demo

Running

App Files Files Community

orionweller commited on Apr 29

Commit

cf7ddc6

•

1 Parent(s): 807cc67

add instruction following

Browse files

Files changed (2) hide show

EXTERNAL_MODEL_RESULTS.json +0 -0
app.py +114 -2

EXTERNAL_MODEL_RESULTS.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -226,6 +226,12 @@ TASK_LIST_RETRIEVAL_LAW = [
     "LegalSummarization",
 ]
 TASK_LIST_RETRIEVAL_PL = [
     "ArguAna-PL",
     "DBPedia-PL",
@@ -322,6 +328,7 @@ TASK_TO_METRIC = {
     "Retrieval": "ndcg_at_10",
     "STS": "cos_sim_spearman",
     "Summarization": "cos_sim_spearman",
 }
 def make_clickable_model(model_name, link=None):
@@ -339,6 +346,8 @@ EXTERNAL_MODELS = [
     "Cohere-embed-multilingual-v3.0",
     "Cohere-embed-multilingual-light-v3.0",
     "DanskBERT",
     "LASER2",
     "LLM2Vec-Llama-supervised",
     "LLM2Vec-Llama-unsupervised",
@@ -364,17 +373,22 @@ EXTERNAL_MODELS = [
     "bge-large-zh-v1.5",
     "bge-large-zh-noinstruct",
     "bge-small-zh-v1.5",
     "contriever-base-msmarco",
     "cross-en-de-roberta-sentence-transformer",
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "distiluse-base-multilingual-cased-v2",
     "e5-base",
     "e5-large",
     "e5-mistral-7b-instruct",
     "e5-small",
     "electra-small-nordic",
     "electra-small-swedish-cased-discriminator",
     "flaubert_base_cased",
     "flaubert_base_uncased",
     "flaubert_large_cased",
@@ -391,11 +405,18 @@ EXTERNAL_MODELS = [
     "gtr-t5-xl",
     "gtr-t5-xxl",
     "herbert-base-retrieval-v2",
     "komninos",
     "luotuo-bert-medium",
     "m3e-base",
     "m3e-large",
     "mistral-embed",
     "msmarco-bert-co-condensor",
     "multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base",
@@ -423,6 +444,8 @@ EXTERNAL_MODELS = [
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet",
     "text2vec-base-chinese",
     "text2vec-base-multilingual",
     "text2vec-large-chinese",
@@ -470,6 +493,8 @@ EXTERNAL_MODEL_TO_LINK = {
     "LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
     "LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
     "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
     "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
@@ -488,6 +513,7 @@ EXTERNAL_MODEL_TO_LINK = {
     "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
     "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
     "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
     "camembert-base": "https://huggingface.co/almanach/camembert-base",
     "camembert-large": "https://huggingface.co/almanach/camembert-large",
     "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
@@ -501,11 +527,15 @@ EXTERNAL_MODEL_TO_LINK = {
     "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "e5-base": "https://huggingface.co/intfloat/e5-base",
     "e5-large": "https://huggingface.co/intfloat/e5-large",
     "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
     "e5-small": "https://huggingface.co/intfloat/e5-small",
     "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
     "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
     "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
     "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
     "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
@@ -522,11 +552,18 @@ EXTERNAL_MODEL_TO_LINK = {
     "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
     "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
     "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
     "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
     "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
     "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
     "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
     "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
     "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
     "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
@@ -554,6 +591,8 @@ EXTERNAL_MODEL_TO_LINK = {
     "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
     "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
     "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
     "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
@@ -593,6 +632,8 @@ EXTERNAL_MODEL_TO_DIM = {
     "Cohere-embed-multilingual-v3.0": 1024,
     "Cohere-embed-multilingual-light-v3.0": 384,
     "DanskBERT": 768,
     "LASER2": 1024,
     "LLM2Vec-Llama-supervised": 4096,
     "LLM2Vec-Llama-unsupervised": 4096,
@@ -617,6 +658,7 @@ EXTERNAL_MODEL_TO_DIM = {
     "bge-large-zh-v1.5": 1024,
     "bge-large-zh-noinstruct": 1024,
     "bge-small-zh-v1.5": 512,
     "camembert-base": 512,
     "camembert-large": 768,
     "contriever-base-msmarco": 768,
@@ -630,11 +672,15 @@ EXTERNAL_MODEL_TO_DIM = {
     "dfm-encoder-large-v1": 1024,
     "dfm-sentence-encoder-large-1": 1024,
     "e5-base": 768,
     "e5-large": 1024,
     "e5-mistral-7b-instruct": 4096,
     "e5-small": 384,
     "electra-small-nordic": 256,
     "electra-small-swedish-cased-discriminator": 256,
     "flaubert_base_cased": 768,
     "flaubert_base_uncased": 768,
     "flaubert_large_cased": 1024,
@@ -652,10 +698,17 @@ EXTERNAL_MODEL_TO_DIM = {
     "gtr-t5-xl": 768,
     "gtr-t5-xxl": 768,
     "herbert-base-retrieval-v2": 768,
     "komninos": 300,
     "m3e-base": 768,
     "m3e-large": 768,
     "mistral-embed": 1024,
     "msmarco-bert-co-condensor": 768,
     "multi-qa-MiniLM-L6-cos-v1": 384,
     "multilingual-e5-base": 768,
@@ -684,6 +737,8 @@ EXTERNAL_MODEL_TO_DIM = {
     "sup-simcse-bert-base-uncased": 768,
     "st-polish-paraphrase-from-distilroberta": 768,
     "st-polish-paraphrase-from-mpnet": 768,
     "text2vec-base-chinese": 768,
     "text2vec-large-chinese": 1024,
     "text-embedding-3-large": 3072,
@@ -723,6 +778,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "Cohere-embed-multilingual-v3.0": 512,
     "Cohere-embed-multilingual-light-v3.0": 512,
     "DanskBERT": 514,
     "LASER2": "N/A",
     "LLM2Vec-Llama-supervised": 4096,
     "LLM2Vec-Llama-unsupervised": 4096,
@@ -760,11 +817,15 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "dfm-sentence-encoder-large-1": 512,
     "distiluse-base-multilingual-cased-v2": 512,
     "e5-base": 512,
     "e5-large": 512,
     "e5-mistral-7b-instruct": 32768,
     "e5-small": 512,
     "electra-small-nordic": 512,
     "electra-small-swedish-cased-discriminator": 512,
     "flaubert_base_cased": 512,
     "flaubert_base_uncased": 512,
     "flaubert_large_cased": 512,
@@ -781,11 +842,18 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "gtr-t5-xl": 512,
     "gtr-t5-xxl": 512,
     "herbert-base-retrieval-v2": 514,
     "komninos": "N/A",
     "luotuo-bert-medium": 512,
     "m3e-base": 512,
     "m3e-large": 512,
 #    "mistral-embed": "?",
     "msmarco-bert-co-condensor": 512,
     "multi-qa-MiniLM-L6-cos-v1": 512,
     "multilingual-e5-base": 514,
@@ -814,6 +882,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "sup-simcse-bert-base-uncased": 512,
     "st-polish-paraphrase-from-distilroberta": 514,
     "st-polish-paraphrase-from-mpnet": 514,
     "text2vec-base-chinese": 512,
     "text2vec-large-chinese": 512,
     "text-embedding-3-large": 8191,
@@ -849,6 +919,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
 EXTERNAL_MODEL_TO_SIZE = {
     "DanskBERT": 125,
     "LASER2": 43,
     "LLM2Vec-Llama-supervised": 6607,
     "LLM2Vec-Llama-unsupervised": 6607,
@@ -872,6 +944,7 @@ EXTERNAL_MODEL_TO_SIZE = {
     "bge-large-zh-v1.5": 326,
     "bge-large-zh-noinstruct": 326,
     "bge-small-zh-v1.5": 24,
     "camembert-base": 111,
     "camembert-large": 338,
     "cross-en-de-roberta-sentence-transformer": 278,
@@ -885,11 +958,15 @@ EXTERNAL_MODEL_TO_SIZE = {
     "dfm-encoder-large-v1": 355,
     "dfm-sentence-encoder-large-1": 355,
     "e5-base": 110,
     "e5-large": 335,
     "e5-mistral-7b-instruct": 7111,
     "e5-small": 33,
     "electra-small-nordic": 23,
     "electra-small-swedish-cased-discriminator": 16,
     "flaubert_base_cased": 138,
     "flaubert_base_uncased": 138,
     "flaubert_large_cased": 372,
@@ -906,11 +983,18 @@ EXTERNAL_MODEL_TO_SIZE = {
     "gtr-t5-xl": 1240,
     "gtr-t5-xxl": 4865,
     "herbert-base-retrieval-v2": 125,
     "komninos": 134,
     "luotuo-bert-medium": 328,
     "m3e-base": 102,
     "m3e-large": 102,
     "msmarco-bert-co-condensor": 110,
     "multi-qa-MiniLM-L6-cos-v1": 23,
     "multilingual-e5-base": 278,
     "multilingual-e5-small": 118,
@@ -936,7 +1020,9 @@ EXTERNAL_MODEL_TO_SIZE = {
     "silver-retriever-base-v1": 125,
     "sup-simcse-bert-base-uncased": 110,
     "st-polish-paraphrase-from-distilroberta": 125,
-    "st-polish-paraphrase-from-mpnet": 125,
     "text2vec-base-chinese": 102,
     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
@@ -1014,7 +1100,9 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "e5-base",
     "e5-large",
     "e5-mistral-7b-instruct",
     "e5-small",
     "electra-small-nordic",
@@ -1065,6 +1153,7 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet",
     "text2vec-base-chinese",
     "text2vec-large-chinese",
     "udever-bloom-1b1",
@@ -1247,6 +1336,8 @@ def add_task(examples):
         examples["mteb_task"] = "Summarization"
     elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
         examples["mteb_task"] = "BitextMining"
     else:
         print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
         examples["mteb_task"] = "Unknown"
@@ -1333,7 +1424,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
     # Initialize list to models that we cannot fetch metadata from
     df_list = []
     for model in EXTERNAL_MODEL_RESULTS:
-        results_list = [res for task in tasks for res in EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]]
         if len(datasets) > 0:
             res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
         elif langs:
@@ -1659,6 +1756,7 @@ DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLAS
 DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
 DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
 DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
 # Exact, add all non-nan integer values for every dataset
 NUM_SCORES = 0
@@ -1693,6 +1791,7 @@ for d in [
     DATA_RETRIEVAL_PL,
     DATA_RETRIEVAL_ZH,
     DATA_RETRIEVAL_LAW,
     DATA_STS_EN,
     DATA_STS_FR,
     DATA_STS_PL,
@@ -1751,6 +1850,7 @@ french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](ht
 danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
 norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
 polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
 data = {
     "Overall": {
@@ -2057,6 +2157,18 @@ data = {
                 "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
             }
         ]
     }
 }

     "LegalSummarization",
 ]
+TASK_LIST_RETRIEVAL_INSTRUCTIONS = [
+    "Robust04InstructionRetrieval",
+    "News21InstructionRetrieval",
+    "Core17InstructionRetrieval",
+]
 TASK_LIST_RETRIEVAL_PL = [
     "ArguAna-PL",
     "DBPedia-PL",
     "Retrieval": "ndcg_at_10",
     "STS": "cos_sim_spearman",
     "Summarization": "cos_sim_spearman",
+    "InstructionRetrieval": "p-MRR",
 }
 def make_clickable_model(model_name, link=None):
     "Cohere-embed-multilingual-v3.0",
     "Cohere-embed-multilingual-light-v3.0",
     "DanskBERT",
+    "FollowIR-7B",
+    "GritLM-7B",
     "LASER2",
     "LLM2Vec-Llama-supervised",
     "LLM2Vec-Llama-unsupervised",
     "bge-large-zh-v1.5",
     "bge-large-zh-noinstruct",
     "bge-small-zh-v1.5",
+    "bm25",
     "contriever-base-msmarco",
     "cross-en-de-roberta-sentence-transformer",
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "distiluse-base-multilingual-cased-v2",
     "e5-base",
+    "e5-base-v2",
     "e5-large",
+    "e5-large-v2",
     "e5-mistral-7b-instruct",
     "e5-small",
     "electra-small-nordic",
     "electra-small-swedish-cased-discriminator",
+    "flan-t5-base",
+    "flan-t5-large",
     "flaubert_base_cased",
     "flaubert_base_uncased",
     "flaubert_large_cased",
     "gtr-t5-xl",
     "gtr-t5-xxl",
     "herbert-base-retrieval-v2",
+    "instructor-base",
+    "instructor-xl",
     "komninos",
+    "llama-2-7b-chat",
     "luotuo-bert-medium",
     "m3e-base",
     "m3e-large",
+    "mistral-7b-instruct-v0.2",
     "mistral-embed",
+    "monobert-large-msmarco",
+    "monot5-3b-msmarco-10k",
+    "monot5-base-msmarco-10k",
     "msmarco-bert-co-condensor",
     "multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base",
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet",
+    "tart-dual-contriever-msmarco",
+    "tart-full-flan-t5-xl",
     "text2vec-base-chinese",
     "text2vec-base-multilingual",
     "text2vec-large-chinese",
     "LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
     "LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
     "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
+    "FollowIR-7B": "https://huggingface.co/jhu-clsp/FollowIR-7B",
+    "GritLM-7B": "https://huggingface.co/GritLM/GritLM-7B",
     "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
     "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
     "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
+    "bm25": "https://en.wikipedia.org/wiki/Okapi_BM25",
     "camembert-base": "https://huggingface.co/almanach/camembert-base",
     "camembert-large": "https://huggingface.co/almanach/camembert-large",
     "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
     "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "e5-base": "https://huggingface.co/intfloat/e5-base",
+    "e5-base-v2": "https://huggingface.co/intfloat/e5-base-v2",
     "e5-large": "https://huggingface.co/intfloat/e5-large",
+    "e5-large-v2": "https://huggingface.co/intfloat/e5-large-v2",
     "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
     "e5-small": "https://huggingface.co/intfloat/e5-small",
     "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
     "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
+    "flan-t5-base": "https://huggingface.co/google/flan-t5-base",
+    "flan-t5-large": "https://huggingface.co/google/flan-t5-large",
     "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
     "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
     "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
     "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
     "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
     "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
+    "instructor-base": "https://huggingface.co/hkunlp/instructor-base",
+    "instructor-xl": "https://huggingface.co/hkunlp/instructor-xl",
     "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
+    "llama-2-7b-chat": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
     "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
     "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
     "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
+    "mistral-7b-instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
     "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
+    "monobert-large-msmarco": "https://huggingface.co/castorini/monobert-large-msmarco",
+    "monot5-3b-msmarco-10k": "https://huggingface.co/castorini/monot5-3b-msmarco-10k",
+    "monot5-base-msmarco-10k": "https://huggingface.co/castorini/monot5-base-msmarco-10k",
     "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
     "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
     "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
+    "tart-dual-contriever-msmarco": "https://huggingface.co/orionweller/tart-dual-contriever-msmarco",
+    "tart-full-flan-t5-xl": "https://huggingface.co/facebook/tart-full-flan-t5-xl",
     "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
     "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
     "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
     "Cohere-embed-multilingual-v3.0": 1024,
     "Cohere-embed-multilingual-light-v3.0": 384,
     "DanskBERT": 768,
+    "FollowIR-7B": -1,
+    "GritLM-7B": 4096,
     "LASER2": 1024,
     "LLM2Vec-Llama-supervised": 4096,
     "LLM2Vec-Llama-unsupervised": 4096,
     "bge-large-zh-v1.5": 1024,
     "bge-large-zh-noinstruct": 1024,
     "bge-small-zh-v1.5": 512,
+    "bm25": -1,
     "camembert-base": 512,
     "camembert-large": 768,
     "contriever-base-msmarco": 768,
     "dfm-encoder-large-v1": 1024,
     "dfm-sentence-encoder-large-1": 1024,
     "e5-base": 768,
+    "e5-base-v2": 768,
     "e5-large": 1024,
+    "e5-large-v2": 1024,
     "e5-mistral-7b-instruct": 4096,
     "e5-small": 384,
     "electra-small-nordic": 256,
     "electra-small-swedish-cased-discriminator": 256,
+    "flan-t5-base": -1,
+    "flan-t5-large": -1,
     "flaubert_base_cased": 768,
     "flaubert_base_uncased": 768,
     "flaubert_large_cased": 1024,
     "gtr-t5-xl": 768,
     "gtr-t5-xxl": 768,
     "herbert-base-retrieval-v2": 768,
+    "instructor-base": 768,
+    "instructor-xl": 768,
     "komninos": 300,
+    "llama-2-7b-chat": -1,
     "m3e-base": 768,
     "m3e-large": 768,
+    "mistral-7b-instruct-v0.2": -1,
     "mistral-embed": 1024,
+    "monobert-large-msmarco": -1,
+    "monot5-3b-msmarco-10k": -1,
+    "monot5-base-msmarco-10k": -1,
     "msmarco-bert-co-condensor": 768,
     "multi-qa-MiniLM-L6-cos-v1": 384,
     "multilingual-e5-base": 768,
     "sup-simcse-bert-base-uncased": 768,
     "st-polish-paraphrase-from-distilroberta": 768,
     "st-polish-paraphrase-from-mpnet": 768,
+    "tart-dual-contriever-msmarco": 768,
+    "tart-full-flan-t5-xl": -1,
     "text2vec-base-chinese": 768,
     "text2vec-large-chinese": 1024,
     "text-embedding-3-large": 3072,
     "Cohere-embed-multilingual-v3.0": 512,
     "Cohere-embed-multilingual-light-v3.0": 512,
     "DanskBERT": 514,
+    "FollowIR-7B": 32768,
+    "GritLM-7B": 32768,
     "LASER2": "N/A",
     "LLM2Vec-Llama-supervised": 4096,
     "LLM2Vec-Llama-unsupervised": 4096,
     "dfm-sentence-encoder-large-1": 512,
     "distiluse-base-multilingual-cased-v2": 512,
     "e5-base": 512,
+    "e5-base-v2": 512,
     "e5-large": 512,
+    "e5-large-v2": 512,
     "e5-mistral-7b-instruct": 32768,
     "e5-small": 512,
     "electra-small-nordic": 512,
     "electra-small-swedish-cased-discriminator": 512,
+    "flan-t5-base": 512,
+    "flan-t5-large": 512,
     "flaubert_base_cased": 512,
     "flaubert_base_uncased": 512,
     "flaubert_large_cased": 512,
     "gtr-t5-xl": 512,
     "gtr-t5-xxl": 512,
     "herbert-base-retrieval-v2": 514,
+    "instructor-base": 512,
+    "instructor-xl": 512,
     "komninos": "N/A",
+    "llama-2-7b-chat": 4096,
     "luotuo-bert-medium": 512,
     "m3e-base": 512,
     "m3e-large": 512,
+    "mistral-7b-instruct-v0.2": 32768,
 #    "mistral-embed": "?",
+    "monobert-large-msmarco": 512,
+    "monot5-3b-msmarco-10k": 512,
+    "monot5-base-msmarco-10k": 512,
     "msmarco-bert-co-condensor": 512,
     "multi-qa-MiniLM-L6-cos-v1": 512,
     "multilingual-e5-base": 514,
     "sup-simcse-bert-base-uncased": 512,
     "st-polish-paraphrase-from-distilroberta": 514,
     "st-polish-paraphrase-from-mpnet": 514,
+    "tart-dual-contriever-msmarco": 512,
+    "tart-full-flan-t5-xl": 512,
     "text2vec-base-chinese": 512,
     "text2vec-large-chinese": 512,
     "text-embedding-3-large": 8191,
 EXTERNAL_MODEL_TO_SIZE = {
     "DanskBERT": 125,
+    "FollowIR-7B": 7242,
+    "GritLM-7B": 7242,
     "LASER2": 43,
     "LLM2Vec-Llama-supervised": 6607,
     "LLM2Vec-Llama-unsupervised": 6607,
     "bge-large-zh-v1.5": 326,
     "bge-large-zh-noinstruct": 326,
     "bge-small-zh-v1.5": 24,
+    "bm25": 0,
     "camembert-base": 111,
     "camembert-large": 338,
     "cross-en-de-roberta-sentence-transformer": 278,
     "dfm-encoder-large-v1": 355,
     "dfm-sentence-encoder-large-1": 355,
     "e5-base": 110,
+    "e5-base-v2": 110,
     "e5-large": 335,
+    "e5-large-v2": 335,
     "e5-mistral-7b-instruct": 7111,
     "e5-small": 33,
     "electra-small-nordic": 23,
     "electra-small-swedish-cased-discriminator": 16,
+    "flan-t5-base": 220,
+    "flan-t5-large": 770,
     "flaubert_base_cased": 138,
     "flaubert_base_uncased": 138,
     "flaubert_large_cased": 372,
     "gtr-t5-xl": 1240,
     "gtr-t5-xxl": 4865,
     "herbert-base-retrieval-v2": 125,
+    "instructor-base": 110,
+    "instructor-xl": 1241,
     "komninos": 134,
+    "llama-2-7b-chat": 7000,
     "luotuo-bert-medium": 328,
     "m3e-base": 102,
     "m3e-large": 102,
+    "mistral-7b-instruct-v0.2": 7111,
     "msmarco-bert-co-condensor": 110,
+    "monobert-large-msmarco": 335,
+    "monot5-3b-msmarco-10k": 2480,
+    "monot5-base-msmarco-10k": 220,
     "multi-qa-MiniLM-L6-cos-v1": 23,
     "multilingual-e5-base": 278,
     "multilingual-e5-small": 118,
     "silver-retriever-base-v1": 125,
     "sup-simcse-bert-base-uncased": 110,
     "st-polish-paraphrase-from-distilroberta": 125,
+    "st-polish-paraphrase-from-mpnet": 125,
+    "tart-dual-contriever-msmarco": 110,
+    "tart-full-flan-t5-xl": 2480,
     "text2vec-base-chinese": 102,
     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "e5-base",
+    "e5-base-v2",
     "e5-large",
+    "e5-large-v2",
     "e5-mistral-7b-instruct",
     "e5-small",
     "electra-small-nordic",
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet",
+    "tart-dual-contriever-msmarco",
     "text2vec-base-chinese",
     "text2vec-large-chinese",
     "udever-bloom-1b1",
         examples["mteb_task"] = "Summarization"
     elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
         examples["mteb_task"] = "BitextMining"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_INSTRUCTIONS):
+        examples["mteb_task"] = "InstructionRetrieval"
     else:
         print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
         examples["mteb_task"] = "Unknown"
     # Initialize list to models that we cannot fetch metadata from
     df_list = []
     for model in EXTERNAL_MODEL_RESULTS:
+        results_list = []
+        for task in tasks:
+            # Not all models have InstructionRetrieval, other new tasks
+            if task not in EXTERNAL_MODEL_RESULTS[model]:
+                continue
+            results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
         if len(datasets) > 0:
             res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
         elif langs:
 DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
 DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
 DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
+DATA_RETRIEVAL_INSTRUCTIONS = get_mteb_data(["InstructionRetrieval"], [], TASK_LIST_RETRIEVAL_INSTRUCTIONS)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_INSTRUCTIONS]
 # Exact, add all non-nan integer values for every dataset
 NUM_SCORES = 0
     DATA_RETRIEVAL_PL,
     DATA_RETRIEVAL_ZH,
     DATA_RETRIEVAL_LAW,
+    DATA_RETRIEVAL_INSTRUCTIONS,
     DATA_STS_EN,
     DATA_STS_FR,
     DATA_STS_PL,
 danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
 norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
 polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
+instruction_credits = "[Orion Weller, FollowIR paper](https://arxiv.org/abs/2403.15246)"
 data = {
     "Overall": {
                 "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
             }
         ]
+    },
+    "Retrieval w/Instructions": {
+        "metric": "paired mean reciprocal rank (p-MRR)",
+        "data": [
+            {
+                "language": "English",
+                "description": "**Retrieval with Instructions Leaderboard** 🔎📋",
+                "credits": instruction_credits,
+                "data": DATA_RETRIEVAL_INSTRUCTIONS,
+                "refresh": partial(get_mteb_data, tasks=TASK_LIST_RETRIEVAL_INSTRUCTIONS)
+            }
+        ]
     }
 }