diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json"
--- "a/EXTERNAL_MODEL_RESULTS.json"
+++ "b/EXTERNAL_MODEL_RESULTS.json"
@@ -879,127 +879,6 @@
]
}
},
- "all-mpnet-base-v2": {
- "BitextMining": {
- "f1": [
- {
- "Model": "all-mpnet-base-v2"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "all-mpnet-base-v2",
- "AmazonCounterfactualClassification (en)": 65.27,
- "AmazonPolarityClassification": 67.13,
- "AmazonReviewsClassification (en)": 31.92,
- "Banking77Classification": 81.86,
- "EmotionClassification": 39.72,
- "ImdbClassification": 70.72,
- "MTOPDomainClassification (en)": 92.08,
- "MTOPIntentClassification (en)": 70.21,
- "MassiveIntentClassification (en)": 69.57,
- "MassiveScenarioClassification (en)": 76.01,
- "ToxicConversationsClassification": 60.86,
- "TweetSentimentExtractionClassification": 55.46
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "all-mpnet-base-v2",
- "ArxivClusteringP2P": 48.38,
- "ArxivClusteringS2S": 39.72,
- "BiorxivClusteringP2P": 39.62,
- "BiorxivClusteringS2S": 35.02,
- "MedrxivClusteringP2P": 35.58,
- "MedrxivClusteringS2S": 32.87,
- "RedditClustering": 54.82,
- "RedditClusteringP2P": 56.77,
- "StackExchangeClustering": 53.8,
- "StackExchangeClusteringP2P": 34.28,
- "TwentyNewsgroupsClustering": 49.74
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "all-mpnet-base-v2",
- "SprintDuplicateQuestions": 90.15,
- "TwitterSemEval2015": 73.85,
- "TwitterURLCorpus": 85.11
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "all-mpnet-base-v2",
- "AskUbuntuDupQuestions": 65.85,
- "MindSmallReranking": 30.97,
- "SciDocsRR": 88.65,
- "StackOverflowDupQuestions": 51.98
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "all-mpnet-base-v2",
- "ArguAna": 46.52,
- "CQADupstackRetrieval": 44.96,
- "ClimateFEVER": 21.97,
- "DBPedia": 32.09,
- "FEVER": 50.86,
- "FiQA2018": 49.96,
- "HotpotQA": 39.29,
- "MSMARCO": 39.75,
- "NFCorpus": 33.29,
- "NQ": 50.45,
- "QuoraRetrieval": 87.46,
- "SCIDOCS": 23.76,
- "SciFact": 65.57,
- "TRECCOVID": 51.33,
- "Touche2020": 19.93
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "all-mpnet-base-v2",
- "BIOSSES": 80.43,
- "SICK-R": 80.59,
- "STS12": 72.63,
- "STS13": 83.48,
- "STS14": 78.0,
- "STS15": 85.66,
- "STS16": 80.03,
- "STS17 (en-en)": 90.6,
- "STS22 (en)": 67.95,
- "STSBenchmark": 83.42
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "all-mpnet-base-v2",
- "SummEval": 27.49
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "all-mpnet-base-v2"
- }
- ]
- }
- },
"nb-bert-large": {
"BitextMining": {
"f1": [
@@ -1365,82 +1244,6 @@
]
}
},
- "Cohere-embed-english-v3.0": {
- "BitextMining": {
- "f1": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "Cohere-embed-english-v3.0",
- "AILACasedocs": 31.54,
- "AILAStatutes": 27.15,
- "GerDaLIRSmall": 6.05,
- "LeCaRDv2": 21.02,
- "LegalBenchConsumerContractsQA": 77.12,
- "LegalBenchCorporateLobbying": 93.68,
- "LegalQuAD": 26.08,
- "LegalSummarization": 61.7
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "Cohere-embed-english-v3.0",
- "Core17InstructionRetrieval": 2.8,
- "News21InstructionRetrieval": 0.2,
- "Robust04InstructionRetrieval": -3.63
- }
- ]
- }
- },
"LLM2Vec-Meta-Llama-3-supervised": {
"BitextMining": {
"f1": [
@@ -2777,79 +2580,6 @@
]
}
},
- "bge-large-en-v1.5": {
- "BitextMining": {
- "f1": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "bge-large-en-v1.5",
- "AILACasedocs": 25.15,
- "AILAStatutes": 20.74,
- "GerDaLIRSmall": 3.96,
- "LeCaRDv2": 22.68,
- "LegalBenchConsumerContractsQA": 73.52,
- "LegalBenchCorporateLobbying": 91.51,
- "LegalQuAD": 16.22,
- "LegalSummarization": 59.99
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- }
- },
"tart-full-flan-t5-xl": {
"BitextMining": {
"f1": [
@@ -6811,132 +6541,11 @@
]
}
},
- "voyage-large-2-instruct": {
+ "udever-bloom-560m": {
"BitextMining": {
"f1": [
{
- "Model": "voyage-large-2-instruct"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "voyage-large-2-instruct",
- "AmazonCounterfactualClassification (en)": 77.6,
- "AmazonPolarityClassification": 96.58,
- "AmazonReviewsClassification (en)": 50.77,
- "Banking77Classification": 86.96,
- "EmotionClassification": 59.81,
- "ImdbClassification": 96.13,
- "MTOPDomainClassification (en)": 98.86,
- "MTOPIntentClassification (en)": 86.97,
- "MassiveIntentClassification (en)": 81.08,
- "MassiveScenarioClassification (en)": 87.95,
- "ToxicConversationsClassification": 83.58,
- "TweetSentimentExtractionClassification": 71.55
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "voyage-large-2-instruct",
- "ArxivClusteringP2P": 51.81,
- "ArxivClusteringS2S": 44.73,
- "BiorxivClusteringP2P": 46.07,
- "BiorxivClusteringS2S": 40.64,
- "MedrxivClusteringP2P": 42.94,
- "MedrxivClusteringS2S": 41.44,
- "RedditClustering": 68.5,
- "RedditClusteringP2P": 64.86,
- "StackExchangeClustering": 74.16,
- "StackExchangeClusteringP2P": 45.1,
- "TwentyNewsgroupsClustering": 66.62
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "voyage-large-2-instruct",
- "SprintDuplicateQuestions": 94.5,
- "TwitterSemEval2015": 86.32,
- "TwitterURLCorpus": 86.9
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "voyage-large-2-instruct",
- "AskUbuntuDupQuestions": 64.92,
- "MindSmallReranking": 30.97,
- "SciDocsRR": 89.34,
- "StackOverflowDupQuestions": 55.11
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "voyage-large-2-instruct",
- "ArguAna": 64.06,
- "CQADupstackRetrieval": 46.6,
- "ClimateFEVER": 32.65,
- "DBPedia": 46.03,
- "FEVER": 91.47,
- "FiQA2018": 59.76,
- "HotpotQA": 70.86,
- "MSMARCO": 40.6,
- "NFCorpus": 40.32,
- "NQ": 65.92,
- "QuoraRetrieval": 87.4,
- "SCIDOCS": 24.32,
- "SciFact": 79.99,
- "TRECCOVID": 85.07,
- "Touche2020": 39.16
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "voyage-large-2-instruct",
- "BIOSSES": 89.12,
- "SICK-R": 83.16,
- "STS12": 76.15,
- "STS13": 88.49,
- "STS14": 86.49,
- "STS15": 91.13,
- "STS16": 85.68,
- "STS17 (en-en)": 90.06,
- "STS22 (en)": 66.32,
- "STSBenchmark": 89.22
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "voyage-large-2-instruct",
- "SummEval": 30.84
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "voyage-large-2-instruct"
- }
- ]
- }
- },
- "udever-bloom-560m": {
- "BitextMining": {
- "f1": [
- {
- "Model": "udever-bloom-560m"
+ "Model": "udever-bloom-560m"
}
]
},
@@ -8225,130 +7834,6 @@
]
}
},
- "google-gecko.text-embedding-preview-0409": {
- "BitextMining": {
- "f1": [
- {
- "Model": "google-gecko.text-embedding-preview-0409"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "AmazonCounterfactualClassification (en)": 75.34,
- "AmazonPolarityClassification": 97.34,
- "AmazonReviewsClassification (en)": 51.17,
- "Banking77Classification": 88.62,
- "EmotionClassification": 52.51,
- "ImdbClassification": 95.65,
- "MTOPDomainClassification (en)": 98.35,
- "MTOPIntentClassification (en)": 83.43,
- "MassiveIntentClassification (en)": 80.22,
- "MassiveScenarioClassification (en)": 87.19,
- "ToxicConversationsClassification": 89.67,
- "TweetSentimentExtractionClassification": 74.52
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "ArxivClusteringP2P": 46.27,
- "ArxivClusteringS2S": 38.36,
- "BiorxivClusteringP2P": 37.87,
- "BiorxivClusteringS2S": 35.67,
- "MedrxivClusteringP2P": 33.11,
- "MedrxivClusteringS2S": 31.54,
- "RedditClustering": 65.81,
- "RedditClusteringP2P": 66.62,
- "StackExchangeClustering": 74.52,
- "StackExchangeClusteringP2P": 37.63,
- "TwentyNewsgroupsClustering": 54.87
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "SprintDuplicateQuestions": 96.26,
- "TwitterSemEval2015": 79.04,
- "TwitterURLCorpus": 87.53
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "AskUbuntuDupQuestions": 64.4,
- "MindSmallReranking": 33.07,
- "SciDocsRR": 83.59,
- "StackOverflowDupQuestions": 54.56
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "ArguAna": 62.18,
- "CQADupstackRetrieval": 48.89,
- "ClimateFEVER": 33.21,
- "DBPedia": 47.12,
- "FEVER": 86.96,
- "FiQA2018": 59.24,
- "HotpotQA": 71.33,
- "MSMARCO": 32.58,
- "NFCorpus": 40.33,
- "NQ": 61.28,
- "QuoraRetrieval": 88.18,
- "SCIDOCS": 20.34,
- "SciFact": 75.42,
- "TRECCOVID": 82.62,
- "Touche2020": 25.86
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "BIOSSES": 89.46,
- "SICK-R": 81.93,
- "STS12": 77.59,
- "STS13": 90.36,
- "STS14": 85.25,
- "STS15": 89.66,
- "STS16": 87.34,
- "STS17 (en-en)": 92.06,
- "STS22 (en)": 68.02,
- "STSBenchmark": 88.99
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "SummEval": 32.63
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "Core17InstructionRetrieval": 5.44,
- "News21InstructionRetrieval": 3.94,
- "Robust04InstructionRetrieval": -2.4
- }
- ]
- }
- },
"voyage-code-2": {
"BitextMining": {
"f1": [
@@ -11313,115 +10798,47 @@
]
}
},
- "GritLM-7B": {
+ "text2vec-base-multilingual": {
"BitextMining": {
"f1": [
{
- "Model": "GritLM-7B"
+ "Model": "text2vec-base-multilingual"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "GritLM-7B"
+ "Model": "text2vec-base-multilingual",
+ "AmazonReviewsClassification (fr)": 34.25,
+ "MTOPDomainClassification (fr)": 71.83,
+ "MTOPIntentClassification (fr)": 44.53,
+ "MasakhaNEWSClassification (fra)": 73.84,
+ "MassiveIntentClassification (fr)": 51.93,
+ "MassiveScenarioClassification (fr)": 58.31
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "GritLM-7B"
+ "Model": "text2vec-base-multilingual",
+ "AlloProfClusteringP2P": 49.11,
+ "AlloProfClusteringS2S": 32.72,
+ "HALClusteringS2S": 16.19,
+ "MLSUMClusteringP2P": 36.19,
+ "MLSUMClusteringS2S": 30.39,
+ "MasakhaNEWSClusteringP2P (fra)": 38.51,
+ "MasakhaNEWSClusteringS2S (fra)": 32.51
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "GritLM-7B"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "GritLM-7B",
- "Core17InstructionRetrieval": 2.62,
- "News21InstructionRetrieval": -1.01,
- "Robust04InstructionRetrieval": -1.68
- }
- ]
- }
- },
- "text2vec-base-multilingual": {
- "BitextMining": {
- "f1": [
- {
- "Model": "text2vec-base-multilingual"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "text2vec-base-multilingual",
- "AmazonReviewsClassification (fr)": 34.25,
- "MTOPDomainClassification (fr)": 71.83,
- "MTOPIntentClassification (fr)": 44.53,
- "MasakhaNEWSClassification (fra)": 73.84,
- "MassiveIntentClassification (fr)": 51.93,
- "MassiveScenarioClassification (fr)": 58.31
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "text2vec-base-multilingual",
- "AlloProfClusteringP2P": 49.11,
- "AlloProfClusteringS2S": 32.72,
- "HALClusteringS2S": 16.19,
- "MLSUMClusteringP2P": 36.19,
- "MLSUMClusteringS2S": 30.39,
- "MasakhaNEWSClusteringP2P (fra)": 38.51,
- "MasakhaNEWSClusteringS2S (fra)": 32.51
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "text2vec-base-multilingual",
- "OpusparcusPC (fr)": 92.04,
- "PawsX (fr)": 65.57
+ "Model": "text2vec-base-multilingual",
+ "OpusparcusPC (fr)": 92.04,
+ "PawsX (fr)": 65.57
}
]
},
@@ -12616,74 +12033,6 @@
]
}
},
- "instructor-xl": {
- "BitextMining": {
- "f1": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "instructor-xl",
- "Core17InstructionRetrieval": 0.69,
- "News21InstructionRetrieval": -0.9,
- "Robust04InstructionRetrieval": -8.08
- }
- ]
- }
- },
"instructor-base": {
"BitextMining": {
"f1": [
@@ -13464,74 +12813,6 @@
]
}
},
- "bm25": {
- "BitextMining": {
- "f1": [
- {
- "Model": "bm25"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "bm25"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "bm25"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "bm25"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "bm25"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "bm25"
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "bm25"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "bm25"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "bm25",
- "Core17InstructionRetrieval": -1.06,
- "News21InstructionRetrieval": -2.15,
- "Robust04InstructionRetrieval": -3.06
- }
- ]
- }
- },
"nomic-embed-text-v1.5-256": {
"BitextMining": {
"f1": [
@@ -15969,185 +15250,30 @@
]
}
},
- "gte-Qwen1.5-7B-instruct": {
+ "elser-v2": {
"BitextMining": {
"f1": [
{
- "Model": "gte-Qwen1.5-7B-instruct"
+ "Model": "elser-v2"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "gte-Qwen1.5-7B-instruct",
- "AmazonCounterfactualClassification (en)": 83.16,
- "AmazonPolarityClassification": 96.7,
- "AmazonReviewsClassification (en)": 62.17,
- "AmazonReviewsClassification (zh)": 52.95,
- "Banking77Classification": 81.68,
- "EmotionClassification": 54.53,
- "IFlyTek": 53.77,
- "ImdbClassification": 95.58,
- "JDReview": 88.2,
- "MTOPDomainClassification (en)": 95.75,
- "MTOPIntentClassification (en)": 84.26,
- "MassiveIntentClassification (zh-CN)": 76.25,
- "MassiveIntentClassification (en)": 78.47,
- "MassiveScenarioClassification (en)": 78.19,
- "MassiveScenarioClassification (zh-CN)": 77.26,
- "MultilingualSentiment": 77.42,
- "OnlineShopping": 94.48,
- "TNews": 51.24,
- "ToxicConversationsClassification": 78.75,
- "TweetSentimentExtractionClassification": 66.0,
- "Waimai": 88.63
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "ArxivClusteringP2P": 56.4,
- "ArxivClusteringS2S": 51.45,
- "BiorxivClusteringP2P": 49.01,
- "BiorxivClusteringS2S": 45.06,
- "CLSClusteringP2P": 47.21,
- "CLSClusteringS2S": 45.79,
- "MedrxivClusteringP2P": 44.37,
- "MedrxivClusteringS2S": 42.0,
- "RedditClustering": 73.37,
- "RedditClusteringP2P": 72.51,
- "StackExchangeClustering": 79.07,
- "StackExchangeClusteringP2P": 49.57,
- "ThuNewsClusteringP2P": 87.43,
- "ThuNewsClusteringS2S": 87.9,
- "TwentyNewsgroupsClustering": 51.31
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "Cmnli": 91.81,
- "Ocnli": 85.22,
- "SprintDuplicateQuestions": 95.99,
- "TwitterSemEval2015": 79.36,
- "TwitterURLCorpus": 86.79
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "AskUbuntuDupQuestions": 66.0,
- "CMedQAv1": 86.37,
- "CMedQAv2": 87.41,
- "MindSmallReranking": 32.71,
- "SciDocsRR": 87.89,
- "StackOverflowDupQuestions": 53.93,
- "T2Reranking": 68.11
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "ArguAna": 62.65,
- "CQADupstackRetrieval": 40.64,
- "ClimateFEVER": 44.0,
- "CmedqaRetrieval": 43.47,
- "CovidRetrieval": 80.87,
- "DBPedia": 48.04,
- "DuRetrieval": 86.01,
- "EcomRetrieval": 66.46,
- "FEVER": 93.35,
- "FiQA2018": 55.31,
- "HotpotQA": 72.25,
- "MMarcoRetrieval": 73.83,
- "MSMARCO": 41.68,
- "MedicalRetrieval": 61.33,
- "NFCorpus": 38.25,
- "NQ": 61.79,
- "QuoraRetrieval": 89.61,
- "SCIDOCS": 27.69,
- "SciFact": 75.31,
- "T2Retrieval": 83.58,
- "TRECCOVID": 72.72,
- "Touche2020": 20.3,
- "VideoRetrieval": 69.41
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "AFQMC": 58.47,
- "ATEC": 55.46,
- "BIOSSES": 81.12,
- "BQ": 77.59,
- "LCQMC": 76.29,
- "PAWSX": 50.22,
- "QBQTC": 31.82,
- "SICK-R": 79.15,
- "STS12": 76.52,
- "STS13": 88.63,
- "STS14": 83.32,
- "STS15": 87.5,
- "STS16": 86.39,
- "STS17 (en-en)": 87.79,
- "STS22 (en)": 66.4,
- "STS22 (zh)": 67.36,
- "STSB": 81.37,
- "STSBenchmark": 87.35
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "SummEval": 31.46
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "gte-Qwen1.5-7B-instruct"
- }
- ]
- }
- },
- "elser-v2": {
- "BitextMining": {
- "f1": [
- {
- "Model": "elser-v2"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "elser-v2",
- "AmazonCounterfactualClassification (en)": 74.16,
- "AmazonPolarityClassification": 61.91,
- "AmazonReviewsClassification (en)": 32.06,
- "Banking77Classification": 82.05,
- "EmotionClassification": 46.65,
- "ImdbClassification": 65.02,
- "MTOPDomainClassification (en)": 93.17,
- "MTOPIntentClassification (en)": 71.1,
- "MassiveIntentClassification (en)": 68.48,
- "MassiveScenarioClassification (en)": 74.98,
- "ToxicConversationsClassification": 68.15,
- "TweetSentimentExtractionClassification": 53.57
+ "Model": "elser-v2",
+ "AmazonCounterfactualClassification (en)": 74.16,
+ "AmazonPolarityClassification": 61.91,
+ "AmazonReviewsClassification (en)": 32.06,
+ "Banking77Classification": 82.05,
+ "EmotionClassification": 46.65,
+ "ImdbClassification": 65.02,
+ "MTOPDomainClassification (en)": 93.17,
+ "MTOPIntentClassification (en)": 71.1,
+ "MassiveIntentClassification (en)": 68.48,
+ "MassiveScenarioClassification (en)": 74.98,
+ "ToxicConversationsClassification": 68.15,
+ "TweetSentimentExtractionClassification": 53.57
}
]
},
@@ -16245,114 +15371,6 @@
]
}
},
- "e5-mistral-7b-instruct": {
- "BitextMining": {
- "f1": [
- {
- "Model": "e5-mistral-7b-instruct"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AmazonReviewsClassification (fr)": 36.71,
- "MTOPDomainClassification (fr)": 74.8,
- "MTOPIntentClassification (fr)": 53.97,
- "MasakhaNEWSClassification (fra)": 80.59,
- "MassiveIntentClassification (fr)": 46.39,
- "MassiveScenarioClassification (fr)": 53.86
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AlloProfClusteringP2P": 61.06,
- "AlloProfClusteringS2S": 28.12,
- "HALClusteringS2S": 19.69,
- "MLSUMClusteringP2P": 45.59,
- "MLSUMClusteringS2S": 32.0,
- "MasakhaNEWSClusteringP2P (fra)": 52.47,
- "MasakhaNEWSClusteringS2S (fra)": 49.2
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "e5-mistral-7b-instruct",
- "OpusparcusPC (fr)": 88.5,
- "PawsX (fr)": 63.65
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AlloprofReranking": 47.36,
- "SyntecReranking": 77.05
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AILACasedocs": 38.76,
- "AILAStatutes": 38.07,
- "AlloprofRetrieval": 16.46,
- "BSARDRetrieval": 0.0,
- "GerDaLIRSmall": 37.18,
- "LEMBNarrativeQARetrieval": 44.62,
- "LEMBNeedleRetrieval": 48.25,
- "LEMBPasskeyRetrieval": 71.0,
- "LEMBQMSumRetrieval": 43.63,
- "LEMBSummScreenFDRetrieval": 96.82,
- "LEMBWikimQARetrieval": 82.11,
- "LeCaRDv2": 68.56,
- "LegalBenchConsumerContractsQA": 75.46,
- "LegalBenchCorporateLobbying": 94.01,
- "LegalQuAD": 59.64,
- "LegalSummarization": 66.51,
- "MintakaRetrieval (fr)": 3.57,
- "SyntecRetrieval": 55.9,
- "XPQARetrieval (fr)": 41.29
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "e5-mistral-7b-instruct",
- "SICKFr": 64.39,
- "STS22 (fr)": 69.82,
- "STSBenchmarkMultilingualSTS (fr)": 61.87
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "e5-mistral-7b-instruct",
- "SummEvalFr": 32.22
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "e5-mistral-7b-instruct",
- "Core17InstructionRetrieval": 0.09,
- "News21InstructionRetrieval": -0.86,
- "Robust04InstructionRetrieval": -9.59
- }
- ]
- }
- },
"voyage-multilingual-2": {
"BitextMining": {
"f1": [
@@ -16663,150 +15681,12 @@
]
}
},
- "text-embedding-3-large": {
+ "e5-base": {
"BitextMining": {
"f1": [
{
- "Model": "text-embedding-3-large"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "text-embedding-3-large",
- "AmazonCounterfactualClassification (en)": 78.93,
- "AmazonPolarityClassification": 92.85,
- "AmazonReviewsClassification (en)": 48.7,
- "Banking77Classification": 85.69,
- "EmotionClassification": 51.58,
- "ImdbClassification": 87.67,
- "MTOPDomainClassification (en)": 95.36,
- "MTOPIntentClassification (en)": 75.07,
- "MassiveIntentClassification (en)": 74.64,
- "MassiveScenarioClassification (en)": 79.79,
- "ToxicConversationsClassification": 72.92,
- "TweetSentimentExtractionClassification": 62.22
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "text-embedding-3-large",
- "ArxivClusteringP2P": 49.01,
- "ArxivClusteringS2S": 44.45,
- "BiorxivClusteringP2P": 38.03,
- "BiorxivClusteringS2S": 36.53,
- "MedrxivClusteringP2P": 32.7,
- "MedrxivClusteringS2S": 31.27,
- "RedditClustering": 67.84,
- "RedditClusteringP2P": 67.96,
- "StackExchangeClustering": 76.26,
- "StackExchangeClusteringP2P": 36.88,
- "TwentyNewsgroupsClustering": 58.14
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "text-embedding-3-large",
- "SprintDuplicateQuestions": 92.25,
- "TwitterSemEval2015": 77.13,
- "TwitterURLCorpus": 87.78
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "text-embedding-3-large",
- "AskUbuntuDupQuestions": 65.03,
- "MindSmallReranking": 29.86,
- "SciDocsRR": 86.66,
- "StackOverflowDupQuestions": 55.08
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "text-embedding-3-large",
- "AILACasedocs": 39.0,
- "AILAStatutes": 41.31,
- "ArguAna": 58.05,
- "CQADupstackRetrieval": 47.54,
- "ClimateFEVER": 30.27,
- "DBPedia": 44.76,
- "FEVER": 87.94,
- "FiQA2018": 55.0,
- "GerDaLIRSmall": 32.77,
- "HotpotQA": 71.58,
- "LEMBNarrativeQARetrieval": 44.09,
- "LEMBNeedleRetrieval": 29.25,
- "LEMBPasskeyRetrieval": 63.0,
- "LEMBQMSumRetrieval": 32.49,
- "LEMBSummScreenFDRetrieval": 84.8,
- "LEMBWikimQARetrieval": 54.16,
- "LeCaRDv2": 57.2,
- "LegalBenchConsumerContractsQA": 79.39,
- "LegalBenchCorporateLobbying": 95.09,
- "LegalQuAD": 57.47,
- "LegalSummarization": 71.55,
- "MSMARCO": 40.24,
- "NFCorpus": 42.07,
- "NQ": 61.27,
- "QuoraRetrieval": 89.05,
- "SCIDOCS": 23.11,
- "SciFact": 77.77,
- "TRECCOVID": 79.56,
- "Touche2020": 23.35
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "text-embedding-3-large",
- "BIOSSES": 84.68,
- "SICK-R": 79.0,
- "STS12": 72.84,
- "STS13": 86.1,
- "STS14": 81.15,
- "STS15": 88.49,
- "STS16": 85.08,
- "STS17 (en-en)": 90.22,
- "STS22 (en)": 66.14,
- "STSBenchmark": 83.56
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "text-embedding-3-large",
- "SummEval": 29.92
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "text-embedding-3-large",
- "Core17InstructionRetrieval": -0.2,
- "News21InstructionRetrieval": -2.03,
- "Robust04InstructionRetrieval": -5.81
- }
- ]
- }
- },
- "e5-base": {
- "BitextMining": {
- "f1": [
- {
- "Model": "e5-base",
- "BornholmBitextMining": 40.09
+ "Model": "e5-base",
+ "BornholmBitextMining": 40.09
}
]
},
@@ -17063,5 +15943,2408 @@
}
]
}
+ },
+ "SFR-Embedding-Mistral": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "BrightRetrieval (sustainable_living)": 19.79,
+ "BrightRetrieval (economics)": 17.84,
+ "BrightRetrieval (theoremqa_theorems)": 24.05,
+ "BrightRetrieval (aops)": 7.43,
+ "BrightRetrieval (theoremqa_questions)": 23.05,
+ "BrightRetrieval (psychology)": 18.97,
+ "BrightRetrieval (stackoverflow)": 12.72,
+ "BrightRetrieval (pony)": 1.97,
+ "BrightRetrieval (leetcode)": 27.35,
+ "BrightRetrieval (biology)": 19.49,
+ "BrightRetrieval (earth_science)": 26.63,
+ "BrightRetrieval (robotics)": 16.7
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "SFR-Embedding-Mistral"
+ }
+ ]
+ }
+ },
+ "gte-Qwen2-7B-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "BrightRetrieval (earth_science)": 40.66,
+ "BrightRetrieval (sustainable_living)": 20.82,
+ "BrightRetrieval (theoremqa_theorems)": 28.15,
+ "BrightRetrieval (aops)": 15.1,
+ "BrightRetrieval (economics)": 16.18,
+ "BrightRetrieval (pony)": 1.25,
+ "BrightRetrieval (stackoverflow)": 13.95,
+ "BrightRetrieval (leetcode)": 31.07,
+ "BrightRetrieval (biology)": 32.09,
+ "BrightRetrieval (theoremqa_questions)": 29.9,
+ "BrightRetrieval (robotics)": 12.82,
+ "BrightRetrieval (psychology)": 26.58
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "gte-Qwen2-7B-instruct"
+ }
+ ]
+ }
+ },
+ "google-gecko.text-embedding-preview-0409": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "AmazonCounterfactualClassification (en)": 75.34,
+ "AmazonPolarityClassification": 97.34,
+ "AmazonReviewsClassification (en)": 51.17,
+ "Banking77Classification": 88.62,
+ "EmotionClassification": 52.51,
+ "ImdbClassification": 95.65,
+ "MTOPDomainClassification (en)": 98.35,
+ "MTOPIntentClassification (en)": 83.43,
+ "MassiveIntentClassification (en)": 80.22,
+ "MassiveScenarioClassification (en)": 87.19,
+ "ToxicConversationsClassification": 89.67,
+ "TweetSentimentExtractionClassification": 74.52
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "ArxivClusteringP2P": 46.27,
+ "ArxivClusteringS2S": 38.36,
+ "BiorxivClusteringP2P": 37.87,
+ "BiorxivClusteringS2S": 35.67,
+ "MedrxivClusteringP2P": 33.11,
+ "MedrxivClusteringS2S": 31.54,
+ "RedditClustering": 65.81,
+ "RedditClusteringP2P": 66.62,
+ "StackExchangeClustering": 74.52,
+ "StackExchangeClusteringP2P": 37.63,
+ "TwentyNewsgroupsClustering": 54.87
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "SprintDuplicateQuestions": 96.26,
+ "TwitterSemEval2015": 79.04,
+ "TwitterURLCorpus": 87.53
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "AskUbuntuDupQuestions": 64.4,
+ "MindSmallReranking": 33.07,
+ "SciDocsRR": 83.59,
+ "StackOverflowDupQuestions": 54.56
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "ArguAna": 62.18,
+ "BrightRetrieval (earth_science)": 34.38,
+ "BrightRetrieval (leetcode)": 29.64,
+ "BrightRetrieval (theoremqa_questions)": 21.51,
+ "BrightRetrieval (aops)": 9.33,
+ "BrightRetrieval (sustainable_living)": 17.25,
+ "BrightRetrieval (pony)": 3.59,
+ "BrightRetrieval (theoremqa_theorems)": 16.77,
+ "BrightRetrieval (stackoverflow)": 17.93,
+ "BrightRetrieval (biology)": 22.98,
+ "BrightRetrieval (robotics)": 15.98,
+ "BrightRetrieval (economics)": 19.5,
+ "BrightRetrieval (psychology)": 27.86,
+ "CQADupstackRetrieval": 48.89,
+ "ClimateFEVER": 33.21,
+ "DBPedia": 47.12,
+ "FEVER": 86.96,
+ "FiQA2018": 59.24,
+ "HotpotQA": 71.33,
+ "MSMARCO": 32.58,
+ "NFCorpus": 40.33,
+ "NQ": 61.28,
+ "QuoraRetrieval": 88.18,
+ "SCIDOCS": 20.34,
+ "SciFact": 75.42,
+ "TRECCOVID": 82.62,
+ "Touche2020": 25.86
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "BIOSSES": 89.46,
+ "SICK-R": 81.93,
+ "STS12": 77.59,
+ "STS13": 90.36,
+ "STS14": 85.25,
+ "STS15": 89.66,
+ "STS16": 87.34,
+ "STS17 (en-en)": 92.06,
+ "STS22 (en)": 68.02,
+ "STSBenchmark": 88.99
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "SummEval": 32.63
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "Core17InstructionRetrieval": 5.44,
+ "News21InstructionRetrieval": 3.94,
+ "Robust04InstructionRetrieval": -2.4
+ }
+ ]
+ }
+ },
+ "instructor-xl": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "instructor-xl",
+ "BrightRetrieval (aops)": 8.26,
+ "BrightRetrieval (robotics)": 17.39,
+ "BrightRetrieval (economics)": 22.81,
+ "BrightRetrieval (stackoverflow)": 19.06,
+ "BrightRetrieval (leetcode)": 27.5,
+ "BrightRetrieval (theoremqa_questions)": 14.59,
+ "BrightRetrieval (psychology)": 27.43,
+ "BrightRetrieval (biology)": 21.91,
+ "BrightRetrieval (theoremqa_theorems)": 6.5,
+ "BrightRetrieval (earth_science)": 34.35,
+ "BrightRetrieval (sustainable_living)": 18.82,
+ "BrightRetrieval (pony)": 5.02
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "instructor-xl"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "instructor-xl",
+ "Core17InstructionRetrieval": 0.69,
+ "News21InstructionRetrieval": -0.9,
+ "Robust04InstructionRetrieval": -8.08
+ }
+ ]
+ }
+ },
+ "all-mpnet-base-v2": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "BornholmBitextMining (dan-Latn)": 27.44,
+ "Tatoeba (pol-Latn_eng-Latn)": 4.09,
+ "Tatoeba (ita-Latn_eng-Latn)": 11.1,
+ "Tatoeba (cat-Latn_eng-Latn)": 9.44,
+ "Tatoeba (aze-Latn_eng-Latn)": 1.49,
+ "Tatoeba (eus-Latn_eng-Latn)": 3.94,
+ "Tatoeba (epo-Latn_eng-Latn)": 7.15,
+ "Tatoeba (lit-Latn_eng-Latn)": 1.02,
+ "Tatoeba (ast-Latn_eng-Latn)": 9.78,
+ "Tatoeba (bul-Cyrl_eng-Latn)": 0.35,
+ "Tatoeba (ceb-Latn_eng-Latn)": 4.41,
+ "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0,
+ "Tatoeba (tzl-Latn_eng-Latn)": 3.55,
+ "Tatoeba (zsm-Latn_eng-Latn)": 4.75,
+ "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17,
+ "Tatoeba (pam-Latn_eng-Latn)": 4.32,
+ "Tatoeba (amh-Ethi_eng-Latn)": 0.0,
+ "Tatoeba (slv-Latn_eng-Latn)": 3.73,
+ "Tatoeba (lvs-Latn_eng-Latn)": 2.98,
+ "Tatoeba (sqi-Latn_eng-Latn)": 3.45,
+ "Tatoeba (orv-Cyrl_eng-Latn)": 0.0,
+ "Tatoeba (vie-Latn_eng-Latn)": 4.96,
+ "Tatoeba (pes-Arab_eng-Latn)": 0.2,
+ "Tatoeba (por-Latn_eng-Latn)": 10.48,
+ "Tatoeba (dtp-Latn_eng-Latn)": 3.54,
+ "Tatoeba (yid-Hebr_eng-Latn)": 0.08,
+ "Tatoeba (isl-Latn_eng-Latn)": 3.86,
+ "Tatoeba (cha-Latn_eng-Latn)": 12.2,
+ "Tatoeba (ron-Latn_eng-Latn)": 7.34,
+ "Tatoeba (hye-Armn_eng-Latn)": 0.14,
+ "Tatoeba (mar-Deva_eng-Latn)": 0.11,
+ "Tatoeba (hin-Deva_eng-Latn)": 0.02,
+ "Tatoeba (kor-Hang_eng-Latn)": 0.32,
+ "Tatoeba (srp-Cyrl_eng-Latn)": 1.89,
+ "Tatoeba (csb-Latn_eng-Latn)": 4.19,
+ "Tatoeba (jpn-Jpan_eng-Latn)": 1.71,
+ "Tatoeba (ber-Tfng_eng-Latn)": 4.56,
+ "Tatoeba (wuu-Hans_eng-Latn)": 0.91,
+ "Tatoeba (jav-Latn_eng-Latn)": 3.17,
+ "Tatoeba (nob-Latn_eng-Latn)": 4.37,
+ "Tatoeba (bre-Latn_eng-Latn)": 3.65,
+ "Tatoeba (kzj-Latn_eng-Latn)": 3.62,
+ "Tatoeba (urd-Arab_eng-Latn)": 0.0,
+ "Tatoeba (ces-Latn_eng-Latn)": 3.56,
+ "Tatoeba (cbk-Latn_eng-Latn)": 9.33,
+ "Tatoeba (gla-Latn_eng-Latn)": 2.04,
+ "Tatoeba (war-Latn_eng-Latn)": 5.14,
+ "Tatoeba (swh-Latn_eng-Latn)": 6.01,
+ "Tatoeba (swg-Latn_eng-Latn)": 7.86,
+ "Tatoeba (glg-Latn_eng-Latn)": 12.0,
+ "Tatoeba (fao-Latn_eng-Latn)": 7.08,
+ "Tatoeba (gsw-Latn_eng-Latn)": 10.67,
+ "Tatoeba (rus-Cyrl_eng-Latn)": 0.14,
+ "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52,
+ "Tatoeba (gle-Latn_eng-Latn)": 2.19,
+ "Tatoeba (slk-Latn_eng-Latn)": 3.4,
+ "Tatoeba (nno-Latn_eng-Latn)": 5.75,
+ "Tatoeba (cor-Latn_eng-Latn)": 2.42,
+ "Tatoeba (nov-Latn_eng-Latn)": 16.61,
+ "Tatoeba (swe-Latn_eng-Latn)": 6.55,
+ "Tatoeba (max-Deva_eng-Latn)": 6.46,
+ "Tatoeba (oci-Latn_eng-Latn)": 8.57,
+ "Tatoeba (lfn-Latn_eng-Latn)": 6.1,
+ "Tatoeba (fra-Latn_eng-Latn)": 16.9,
+ "Tatoeba (ben-Beng_eng-Latn)": 0.0,
+ "Tatoeba (bel-Cyrl_eng-Latn)": 0.65,
+ "Tatoeba (lat-Latn_eng-Latn)": 5.78,
+ "Tatoeba (cmn-Hans_eng-Latn)": 2.22,
+ "Tatoeba (kat-Geor_eng-Latn)": 0.43,
+ "Tatoeba (bos-Latn_eng-Latn)": 4.6,
+ "Tatoeba (xho-Latn_eng-Latn)": 3.3,
+ "Tatoeba (tha-Thai_eng-Latn)": 0.0,
+ "Tatoeba (cym-Latn_eng-Latn)": 4.88,
+ "Tatoeba (deu-Latn_eng-Latn)": 11.46,
+ "Tatoeba (awa-Deva_eng-Latn)": 0.44,
+ "Tatoeba (ido-Latn_eng-Latn)": 9.84,
+ "Tatoeba (tat-Cyrl_eng-Latn)": 0.24,
+ "Tatoeba (kab-Latn_eng-Latn)": 1.31,
+ "Tatoeba (uzb-Latn_eng-Latn)": 1.98,
+ "Tatoeba (heb-Hebr_eng-Latn)": 0.28,
+ "Tatoeba (ara-Arab_eng-Latn)": 0.1,
+ "Tatoeba (fry-Latn_eng-Latn)": 12.43,
+ "Tatoeba (afr-Latn_eng-Latn)": 6.08,
+ "Tatoeba (kur-Latn_eng-Latn)": 3.65,
+ "Tatoeba (pms-Latn_eng-Latn)": 7.63,
+ "Tatoeba (ell-Grek_eng-Latn)": 0.0,
+ "Tatoeba (spa-Latn_eng-Latn)": 10.12,
+ "Tatoeba (dsb-Latn_eng-Latn)": 2.96,
+ "Tatoeba (uig-Arab_eng-Latn)": 0.33,
+ "Tatoeba (nld-Latn_eng-Latn)": 9.29,
+ "Tatoeba (tel-Telu_eng-Latn)": 0.73,
+ "Tatoeba (hrv-Latn_eng-Latn)": 3.77,
+ "Tatoeba (nds-Latn_eng-Latn)": 10.96,
+ "Tatoeba (hun-Latn_eng-Latn)": 3.23,
+ "Tatoeba (est-Latn_eng-Latn)": 2.35,
+ "Tatoeba (mal-Mlym_eng-Latn)": 0.15,
+ "Tatoeba (khm-Khmr_eng-Latn)": 0.28,
+ "Tatoeba (hsb-Latn_eng-Latn)": 3.12,
+ "Tatoeba (tgl-Latn_eng-Latn)": 4.06,
+ "Tatoeba (ang-Latn_eng-Latn)": 9.77,
+ "Tatoeba (tur-Latn_eng-Latn)": 3.16,
+ "Tatoeba (tuk-Latn_eng-Latn)": 2.23,
+ "Tatoeba (ile-Latn_eng-Latn)": 17.84,
+ "Tatoeba (mon-Cyrl_eng-Latn)": 0.81,
+ "Tatoeba (yue-Hant_eng-Latn)": 1.16,
+ "Tatoeba (ina-Latn_eng-Latn)": 22.55,
+ "Tatoeba (tam-Taml_eng-Latn)": 0.73,
+ "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5,
+ "Tatoeba (dan-Latn_eng-Latn)": 10.01,
+ "Tatoeba (arq-Arab_eng-Latn)": 0.33,
+ "Tatoeba (arz-Arab_eng-Latn)": 0.0,
+ "Tatoeba (fin-Latn_eng-Latn)": 3.82,
+ "Tatoeba (ind-Latn_eng-Latn)": 4.88
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "AllegroReviews (pol-Latn)": 22.99,
+ "AmazonCounterfactualClassification (en-ext)": 67.5,
+ "AmazonCounterfactualClassification (en)": 65.03,
+ "AmazonCounterfactualClassification (deu-Latn)": 55.66,
+ "AmazonCounterfactualClassification (jpn-Jpan)": 60.69,
+ "AmazonPolarityClassification (default)": 67.14,
+ "AmazonReviewsClassification (en)": 31.44,
+ "AmazonReviewsClassification (deu-Latn)": 26.05,
+ "AmazonReviewsClassification (spa-Latn)": 27.73,
+ "AmazonReviewsClassification (fra-Latn)": 28.49,
+ "AmazonReviewsClassification (jpn-Jpan)": 23.65,
+ "AmazonReviewsClassification (cmn-Hans)": 23.62,
+ "AngryTweetsClassification (dan-Latn)": 44.13,
+ "Banking77Classification (default)": 81.7,
+ "CBD (pol-Latn)": 50.25,
+ "DanishPoliticalCommentsClassification (dan-Latn)": 28.31,
+ "EmotionClassification (default)": 42.22,
+ "IFlyTek (cmn-Hans)": 17.18,
+ "ImdbClassification (default)": 71.17,
+ "JDReview (cmn-Hans)": 60.19,
+ "LccSentimentClassification (dan-Latn)": 39.27,
+ "MTOPDomainClassification (en)": 91.89,
+ "MTOPDomainClassification (deu-Latn)": 71.86,
+ "MTOPDomainClassification (spa-Latn)": 71.3,
+ "MTOPDomainClassification (fra-Latn)": 74.88,
+ "MTOPDomainClassification (hin-Deva)": 39.93,
+ "MTOPDomainClassification (tha-Thai)": 17.54,
+ "MTOPIntentClassification (en)": 68.27,
+ "MTOPIntentClassification (deu-Latn)": 44.36,
+ "MTOPIntentClassification (spa-Latn)": 39.48,
+ "MTOPIntentClassification (fra-Latn)": 37.57,
+ "MTOPIntentClassification (hin-Deva)": 18.63,
+ "MTOPIntentClassification (tha-Thai)": 5.42,
+ "MasakhaNEWSClassification (amh-Ethi)": 36.49,
+ "MasakhaNEWSClassification (eng)": 79.75,
+ "MasakhaNEWSClassification (fra-Latn)": 77.77,
+ "MasakhaNEWSClassification (hau-Latn)": 59.22,
+ "MasakhaNEWSClassification (ibo-Latn)": 61.64,
+ "MasakhaNEWSClassification (lin-Latn)": 74.0,
+ "MasakhaNEWSClassification (lug-Latn)": 58.43,
+ "MasakhaNEWSClassification (orm-Ethi)": 48.15,
+ "MasakhaNEWSClassification (pcm-Latn)": 92.2,
+ "MasakhaNEWSClassification (run-Latn)": 64.72,
+ "MasakhaNEWSClassification (sna-Latn)": 73.69,
+ "MasakhaNEWSClassification (som-Latn)": 49.97,
+ "MasakhaNEWSClassification (swa-Latn)": 55.15,
+ "MasakhaNEWSClassification (tir-Ethi)": 27.46,
+ "MasakhaNEWSClassification (xho-Latn)": 60.98,
+ "MasakhaNEWSClassification (yor-Latn)": 63.33,
+ "MassiveIntentClassification (en)": 69.76,
+ "MassiveIntentClassification (jav-Latn)": 31.75,
+ "MassiveIntentClassification (fra-Latn)": 44.27,
+ "MassiveIntentClassification (msa-Latn)": 30.53,
+ "MassiveIntentClassification (hun-Latn)": 34.38,
+ "MassiveIntentClassification (pol-Latn)": 34.26,
+ "MassiveIntentClassification (nld-Latn)": 38.49,
+ "MassiveIntentClassification (tha-Thai)": 8.51,
+ "MassiveIntentClassification (tur-Latn)": 32.02,
+ "MassiveIntentClassification (tam-Taml)": 9.25,
+ "MassiveIntentClassification (hye-Armn)": 10.11,
+ "MassiveIntentClassification (khm-Khmr)": 4.74,
+ "MassiveIntentClassification (lav-Latn)": 35.08,
+ "MassiveIntentClassification (deu-Latn)": 44.54,
+ "MassiveIntentClassification (spa-Latn)": 39.75,
+ "MassiveIntentClassification (ben-Beng)": 12.35,
+ "MassiveIntentClassification (por-Latn)": 42.83,
+ "MassiveIntentClassification (ara-Arab)": 20.42,
+ "MassiveIntentClassification (cym-Latn)": 30.82,
+ "MassiveIntentClassification (dan-Latn)": 42.36,
+ "MassiveIntentClassification (mya-Mymr)": 4.6,
+ "MassiveIntentClassification (heb-Hebr)": 23.6,
+ "MassiveIntentClassification (kan-Knda)": 3.76,
+ "MassiveIntentClassification (swa-Latn)": 31.82,
+ "MassiveIntentClassification (fas-Arab)": 22.45,
+ "MassiveIntentClassification (hin-Deva)": 17.68,
+ "MassiveIntentClassification (kat-Geor)": 7.66,
+ "MassiveIntentClassification (mal-Mlym)": 2.64,
+ "MassiveIntentClassification (fin-Latn)": 34.58,
+ "MassiveIntentClassification (slv-Latn)": 34.49,
+ "MassiveIntentClassification (afr-Latn)": 36.49,
+ "MassiveIntentClassification (urd-Arab)": 12.86,
+ "MassiveIntentClassification (ron-Latn)": 38.07,
+ "MassiveIntentClassification (sqi-Latn)": 37.26,
+ "MassiveIntentClassification (cmo-Hant)": 22.43,
+ "MassiveIntentClassification (ita-Latn)": 40.29,
+ "MassiveIntentClassification (ind-Latn)": 36.31,
+ "MassiveIntentClassification (nob-Latn)": 39.3,
+ "MassiveIntentClassification (jpn-Jpan)": 33.13,
+ "MassiveIntentClassification (aze-Latn)": 28.92,
+ "MassiveIntentClassification (mon-Cyrl)": 19.65,
+ "MassiveIntentClassification (ell-Grek)": 24.52,
+ "MassiveIntentClassification (rus-Cyrl)": 23.98,
+ "MassiveIntentClassification (kor-Kore)": 13.35,
+ "MassiveIntentClassification (cmo-Hans)": 24.36,
+ "MassiveIntentClassification (isl-Latn)": 31.46,
+ "MassiveIntentClassification (swe-Latn)": 39.02,
+ "MassiveIntentClassification (tel-Telu)": 2.26,
+ "MassiveIntentClassification (vie-Latn)": 31.47,
+ "MassiveIntentClassification (tgl-Latn)": 36.33,
+ "MassiveIntentClassification (amh-Ethi)": 2.39,
+ "MassiveScenarioClassification (en)": 75.67,
+ "MassiveScenarioClassification (tur-Latn)": 39.11,
+ "MassiveScenarioClassification (kat-Geor)": 13.45,
+ "MassiveScenarioClassification (jpn-Jpan)": 40.57,
+ "MassiveScenarioClassification (spa-Latn)": 50.92,
+ "MassiveScenarioClassification (fas-Arab)": 27.8,
+ "MassiveScenarioClassification (hun-Latn)": 41.01,
+ "MassiveScenarioClassification (jav-Latn)": 40.0,
+ "MassiveScenarioClassification (por-Latn)": 52.06,
+ "MassiveScenarioClassification (sqi-Latn)": 44.67,
+ "MassiveScenarioClassification (lav-Latn)": 39.28,
+ "MassiveScenarioClassification (deu-Latn)": 54.09,
+ "MassiveScenarioClassification (nld-Latn)": 47.79,
+ "MassiveScenarioClassification (mon-Cyrl)": 25.58,
+ "MassiveScenarioClassification (swa-Latn)": 40.34,
+ "MassiveScenarioClassification (ben-Beng)": 17.49,
+ "MassiveScenarioClassification (cym-Latn)": 34.82,
+ "MassiveScenarioClassification (swe-Latn)": 44.53,
+ "MassiveScenarioClassification (rus-Cyrl)": 28.71,
+ "MassiveScenarioClassification (fra-Latn)": 54.26,
+ "MassiveScenarioClassification (dan-Latn)": 49.45,
+ "MassiveScenarioClassification (mya-Mymr)": 10.8,
+ "MassiveScenarioClassification (ron-Latn)": 47.86,
+ "MassiveScenarioClassification (cmo-Hans)": 35.33,
+ "MassiveScenarioClassification (hin-Deva)": 23.13,
+ "MassiveScenarioClassification (cmo-Hant)": 31.7,
+ "MassiveScenarioClassification (afr-Latn)": 43.63,
+ "MassiveScenarioClassification (aze-Latn)": 36.42,
+ "MassiveScenarioClassification (msa-Latn)": 37.28,
+ "MassiveScenarioClassification (ell-Grek)": 33.85,
+ "MassiveScenarioClassification (isl-Latn)": 39.36,
+ "MassiveScenarioClassification (fin-Latn)": 38.41,
+ "MassiveScenarioClassification (ind-Latn)": 43.05,
+ "MassiveScenarioClassification (pol-Latn)": 42.66,
+ "MassiveScenarioClassification (tam-Taml)": 14.55,
+ "MassiveScenarioClassification (ita-Latn)": 51.37,
+ "MassiveScenarioClassification (urd-Arab)": 20.0,
+ "MassiveScenarioClassification (kan-Knda)": 8.34,
+ "MassiveScenarioClassification (tel-Telu)": 7.81,
+ "MassiveScenarioClassification (mal-Mlym)": 7.69,
+ "MassiveScenarioClassification (ara-Arab)": 27.8,
+ "MassiveScenarioClassification (kor-Kore)": 17.28,
+ "MassiveScenarioClassification (vie-Latn)": 35.9,
+ "MassiveScenarioClassification (amh-Ethi)": 7.43,
+ "MassiveScenarioClassification (heb-Hebr)": 25.49,
+ "MassiveScenarioClassification (hye-Armn)": 16.86,
+ "MassiveScenarioClassification (khm-Khmr)": 9.63,
+ "MassiveScenarioClassification (slv-Latn)": 39.88,
+ "MassiveScenarioClassification (tgl-Latn)": 47.04,
+ "MassiveScenarioClassification (nob-Latn)": 45.75,
+ "MassiveScenarioClassification (tha-Thai)": 17.01,
+ "MultilingualSentiment (cmn-Hans)": 41.2,
+ "NoRecClassification (nob-Latn)": 38.34,
+ "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15,
+ "OnlineShopping (cmn-Hans)": 56.94,
+ "PAC (pol-Latn)": 62.1,
+ "PolEmo2.0-IN (pol-Latn)": 41.63,
+ "PolEmo2.0-OUT (pol-Latn)": 25.0,
+ "TNews (cmn-Hans)": 21.05,
+ "ToxicConversationsClassification (default)": 61.05,
+ "TweetSentimentExtractionClassification (default)": 55.05,
+ "Waimai (cmn-Hans)": 63.31
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "ArxivClusteringP2P": 48.38,
+ "ArxivClusteringS2S": 39.72,
+ "BiorxivClusteringP2P": 39.62,
+ "BiorxivClusteringS2S": 35.02,
+ "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49,
+ "MasakhaNEWSClusteringP2P (eng)": 67.24,
+ "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99,
+ "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17,
+ "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0,
+ "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68,
+ "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96,
+ "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42,
+ "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01,
+ "MasakhaNEWSClusteringP2P (run-Latn)": 57.6,
+ "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99,
+ "MasakhaNEWSClusteringP2P (som-Latn)": 31.16,
+ "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29,
+ "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85,
+ "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24,
+ "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15,
+ "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48,
+ "MasakhaNEWSClusteringS2S (eng)": 35.69,
+ "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05,
+ "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64,
+ "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63,
+ "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72,
+ "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97,
+ "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85,
+ "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7,
+ "MasakhaNEWSClusteringS2S (run-Latn)": 52.27,
+ "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64,
+ "MasakhaNEWSClusteringS2S (som-Latn)": 30.94,
+ "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12,
+ "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01,
+ "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16,
+ "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04,
+ "MedrxivClusteringP2P": 35.58,
+ "MedrxivClusteringS2S": 32.87,
+ "RedditClustering": 54.82,
+ "RedditClusteringP2P": 56.77,
+ "StackExchangeClustering": 53.8,
+ "StackExchangeClusteringP2P": 34.28,
+ "TwentyNewsgroupsClustering": 49.74
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "CDSC-E (pol-Latn)": 45.37,
+ "OpusparcusPC (deu-Latn)": 89.78,
+ "OpusparcusPC (en)": 97.75,
+ "OpusparcusPC (fin-Latn)": 85.82,
+ "OpusparcusPC (fra-Latn)": 86.61,
+ "OpusparcusPC (rus-Cyrl)": 79.85,
+ "OpusparcusPC (swe-Latn)": 81.81,
+ "PSC (pol-Latn)": 83.28,
+ "PawsXPairClassification (deu-Latn)": 52.17,
+ "PawsXPairClassification (en)": 61.99,
+ "PawsXPairClassification (spa-Latn)": 55.06,
+ "PawsXPairClassification (fra-Latn)": 56.42,
+ "PawsXPairClassification (jpn-Hira)": 47.43,
+ "PawsXPairClassification (kor-Hang)": 49.75,
+ "PawsXPairClassification (cmn-Hans)": 52.47,
+ "SICK-E-PL (pol-Latn)": 46.51,
+ "SprintDuplicateQuestions (default)": 90.15,
+ "TwitterSemEval2015 (default)": 73.85,
+ "TwitterURLCorpus (default)": 85.11
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "AlloprofReranking (fra-Latn)": 69.63,
+ "AskUbuntuDupQuestions (default)": 65.85,
+ "MMarcoReranking (cmn-Hans)": 4.65,
+ "MindSmallReranking (default)": 30.97,
+ "SciDocsRR (default)": 88.65,
+ "StackOverflowDupQuestions (default)": 51.98,
+ "SyntecReranking (fra-Latn)": 66.12,
+ "T2Reranking (cmn-Hans)": 58.3
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "AILACasedocs (default)": 22.51,
+ "AILAStatutes (default)": 21.27,
+ "AlloprofRetrieval (fra-Latn)": 34.27,
+ "ArguAna": 46.52,
+ "ArguAna (default)": 46.52,
+ "ArguAna-PL (pol-Latn)": 14.72,
+ "BSARDRetrieval (fra-Latn)": 6.98,
+ "BrightRetrieval (robotics)": 8.36,
+ "BrightRetrieval (psychology)": 22.63,
+ "BrightRetrieval (leetcode)": 26.4,
+ "BrightRetrieval (biology)": 15.52,
+ "BrightRetrieval (theoremqa_questions)": 18.49,
+ "BrightRetrieval (economics)": 16.64,
+ "BrightRetrieval (stackoverflow)": 9.48,
+ "BrightRetrieval (pony)": 6.95,
+ "BrightRetrieval (earth_science)": 20.11,
+ "BrightRetrieval (theoremqa_theorems)": 12.38,
+ "BrightRetrieval (sustainable_living)": 15.34,
+ "BrightRetrieval (aops)": 5.32,
+ "CQADupstackRetrieval": 44.96,
+ "ClimateFEVER": 21.97,
+ "CmedqaRetrieval (cmn-Hans)": 2.0,
+ "CovidRetrieval (cmn-Hans)": 3.7,
+ "DBPedia": 32.09,
+ "DuRetrieval (cmn-Hans)": 4.92,
+ "EcomRetrieval (cmn-Hans)": 3.94,
+ "FEVER": 50.86,
+ "FiQA-PL (pol-Latn)": 3.6,
+ "FiQA2018": 49.96,
+ "FiQA2018 (default)": 49.96,
+ "GerDaLIRSmall (deu-Latn)": 3.78,
+ "HotpotQA": 39.29,
+ "LEMBNarrativeQARetrieval (default)": 19.34,
+ "LEMBNeedleRetrieval": 16.0,
+ "LEMBPasskeyRetrieval": 24.5,
+ "LEMBQMSumRetrieval (default)": 21.54,
+ "LEMBSummScreenFDRetrieval (default)": 60.43,
+ "LEMBWikimQARetrieval (default)": 44.92,
+ "LeCaRDv2 (zho-Hans)": 18.09,
+ "LegalBenchConsumerContractsQA (default)": 75.25,
+ "LegalBenchCorporateLobbying (default)": 89.04,
+ "LegalQuAD (deu-Latn)": 10.67,
+ "LegalSummarization (default)": 58.55,
+ "MMarcoRetrieval (cmn-Hans)": 7.13,
+ "MSMARCO": 39.75,
+ "MedicalRetrieval (cmn-Hans)": 1.71,
+ "MintakaRetrieval (ara-Arab)": 1.97,
+ "MintakaRetrieval (deu-Latn)": 17.21,
+ "MintakaRetrieval (spa-Latn)": 10.11,
+ "MintakaRetrieval (fra-Latn)": 12.93,
+ "MintakaRetrieval (hin-Deva)": 2.05,
+ "MintakaRetrieval (ita-Latn)": 5.63,
+ "MintakaRetrieval (jpn-Hira)": 6.72,
+ "MintakaRetrieval (por-Latn)": 8.05,
+ "NFCorpus": 33.29,
+ "NFCorpus (default)": 33.29,
+ "NFCorpus-PL (pol-Latn)": 8.77,
+ "NQ": 50.45,
+ "QuoraRetrieval": 87.46,
+ "SCIDOCS": 23.76,
+ "SCIDOCS (default)": 23.76,
+ "SCIDOCS-PL (pol-Latn)": 4.02,
+ "SciFact": 65.57,
+ "SciFact (default)": 65.57,
+ "SciFact-PL (pol-Latn)": 13.31,
+ "SyntecRetrieval (fra-Latn)": 57.39,
+ "T2Retrieval (cmn-Hans)": 2.98,
+ "TRECCOVID": 51.33,
+ "TRECCOVID (default)": 51.33,
+ "TRECCOVID-PL (pol-Latn)": 12.12,
+ "Touche2020": 19.93,
+ "Touche2020 (default)": 19.93,
+ "VideoRetrieval (cmn-Hans)": 8.48,
+ "XPQARetrieval (ara-Arab_ara-Arab)": 9.42,
+ "XPQARetrieval (eng-Latn_ara-Arab)": 2.39,
+ "XPQARetrieval (ara-Arab_eng-Latn)": 8.98,
+ "XPQARetrieval (deu-Latn_deu-Latn)": 55.82,
+ "XPQARetrieval (eng-Latn_deu-Latn)": 11.74,
+ "XPQARetrieval (deu-Latn_eng-Latn)": 30.44,
+ "XPQARetrieval (spa-Latn_spa-Latn)": 40.01,
+ "XPQARetrieval (eng-Latn_spa-Latn)": 6.12,
+ "XPQARetrieval (spa-Latn_eng-Latn)": 29.44,
+ "XPQARetrieval (fra-Latn_fra-Latn)": 51.94,
+ "XPQARetrieval (eng-Latn_fra-Latn)": 11.48,
+ "XPQARetrieval (fra-Latn_eng-Latn)": 32.52,
+ "XPQARetrieval (hin-Deva_hin-Deva)": 37.48,
+ "XPQARetrieval (eng-Latn_hin-Deva)": 5.11,
+ "XPQARetrieval (hin-Deva_eng-Latn)": 7.37,
+ "XPQARetrieval (ita-Latn_ita-Latn)": 54.2,
+ "XPQARetrieval (eng-Latn_ita-Latn)": 6.08,
+ "XPQARetrieval (ita-Latn_eng-Latn)": 30.32,
+ "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.45,
+ "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79,
+ "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77,
+ "XPQARetrieval (kor-Hang_kor-Hang)": 10.4,
+ "XPQARetrieval (eng-Latn_kor-Hang)": 7.09,
+ "XPQARetrieval (kor-Hang_eng-Latn)": 6.95,
+ "XPQARetrieval (pol-Latn_pol-Latn)": 23.67,
+ "XPQARetrieval (eng-Latn_pol-Latn)": 8.83,
+ "XPQARetrieval (pol-Latn_eng-Latn)": 15.94,
+ "XPQARetrieval (por-Latn_por-Latn)": 33.56,
+ "XPQARetrieval (eng-Latn_por-Latn)": 3.76,
+ "XPQARetrieval (por-Latn_eng-Latn)": 23.45,
+ "XPQARetrieval (tam-Taml_tam-Taml)": 5.53,
+ "XPQARetrieval (eng-Latn_tam-Taml)": 3.3,
+ "XPQARetrieval (tam-Taml_eng-Latn)": 4.0,
+ "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.84,
+ "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2,
+ "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "AFQMC (cmn-Hans)": 8.01,
+ "ATEC (cmn-Hans)": 14.03,
+ "BIOSSES (default)": 80.43,
+ "BQ (cmn-Hans)": 21.39,
+ "CDSC-R (pol-Latn)": 77.04,
+ "LCQMC (cmn-Hans)": 22.84,
+ "PAWSX (cmn-Hans)": 6.44,
+ "SICK-R (default)": 80.59,
+ "SICK-R-PL (pol-Latn)": 50.2,
+ "SICKFr (fra-Latn)": 67.05,
+ "STS12 (default)": 72.63,
+ "STS13 (default)": 83.48,
+ "STS14 (default)": 78.0,
+ "STS15 (default)": 85.66,
+ "STS16 (default)": 80.03,
+ "STS17 (en-en)": 90.6,
+ "STS17 (eng-Latn_ara-Arab)": 6.76,
+ "STS17 (fra-Latn_eng-Latn)": 41.64,
+ "STS17 (eng-Latn_tur-Latn)": -4.58,
+ "STS17 (eng-Latn_deu-Latn)": 35.5,
+ "STS17 (spa-Latn_eng-Latn)": 25.28,
+ "STS17 (ita-Latn_eng-Latn)": 31.8,
+ "STS17 (spa-Latn)": 78.4,
+ "STS17 (kor-Hang)": 39.11,
+ "STS17 (ara-Arab)": 55.42,
+ "STS17 (nld-Latn_eng-Latn)": 32.89,
+ "STS22 (en)": 68.39,
+ "STS22 (spa-Latn_eng-Latn)": 55.09,
+ "STS22 (deu-Latn_pol-Latn)": 23.53,
+ "STS22 (cmn-Hans_eng-Latn)": 40.47,
+ "STS22 (pol-Latn)": 24.21,
+ "STS22 (tur-Latn)": 29.35,
+ "STS22 (spa-Latn_ita-Latn)": 41.61,
+ "STS22 (fra-Latn_pol-Latn)": 73.25,
+ "STS22 (rus-Cyrl)": 15.83,
+ "STS22 (deu-Latn)": 27.0,
+ "STS22 (spa-Latn)": 55.98,
+ "STS22 (pol-Latn_eng-Latn)": 51.07,
+ "STS22 (fra-Latn)": 77.1,
+ "STS22 (deu-Latn_eng-Latn)": 49.73,
+ "STS22 (ara-Arab)": 38.96,
+ "STS22 (deu-Latn_fra-Latn)": 31.39,
+ "STS22 (ita-Latn)": 58.02,
+ "STS22 (cmn-Hans)": 42.24,
+ "STSB (cmn-Hans)": 37.7,
+ "STSBenchmark (default)": 83.42,
+ "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54,
+ "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15,
+ "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72,
+ "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78,
+ "STSBenchmarkMultilingualSTS (en)": 83.42,
+ "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43,
+ "STSBenchmarkMultilingualSTS (por-Latn)": 62.12,
+ "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43,
+ "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "all-mpnet-base-v2",
+ "SummEval (default)": 27.49,
+ "SummEvalFr (fra-Latn)": 28.11
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "all-mpnet-base-v2"
+ }
+ ]
+ }
+ },
+ "Cohere-embed-english-v3.0": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "AILACasedocs": 31.54,
+ "AILAStatutes": 27.15,
+ "BrightRetrieval (psychology)": 21.82,
+ "BrightRetrieval (economics)": 20.18,
+ "BrightRetrieval (robotics)": 16.21,
+ "BrightRetrieval (biology)": 18.98,
+ "BrightRetrieval (stackoverflow)": 16.47,
+ "BrightRetrieval (theoremqa_theorems)": 6.04,
+ "BrightRetrieval (pony)": 1.77,
+ "BrightRetrieval (sustainable_living)": 17.69,
+ "BrightRetrieval (aops)": 6.46,
+ "BrightRetrieval (theoremqa_questions)": 15.07,
+ "BrightRetrieval (leetcode)": 26.78,
+ "BrightRetrieval (earth_science)": 27.45,
+ "GerDaLIRSmall": 6.05,
+ "LeCaRDv2": 21.02,
+ "LegalBenchConsumerContractsQA": 77.12,
+ "LegalBenchCorporateLobbying": 93.68,
+ "LegalQuAD": 26.08,
+ "LegalSummarization": 61.7
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "Cohere-embed-english-v3.0"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "Core17InstructionRetrieval": 2.8,
+ "News21InstructionRetrieval": 0.2,
+ "Robust04InstructionRetrieval": -3.63
+ }
+ ]
+ }
+ },
+ "bge-large-en-v1.5": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "AILACasedocs": 25.15,
+ "AILAStatutes": 20.74,
+ "BrightRetrieval (stackoverflow)": 9.51,
+ "BrightRetrieval (earth_science)": 24.15,
+ "BrightRetrieval (aops)": 6.08,
+ "BrightRetrieval (sustainable_living)": 13.27,
+ "BrightRetrieval (psychology)": 17.44,
+ "BrightRetrieval (robotics)": 12.21,
+ "BrightRetrieval (theoremqa_theorems)": 5.51,
+ "BrightRetrieval (pony)": 5.64,
+ "BrightRetrieval (biology)": 11.96,
+ "BrightRetrieval (theoremqa_questions)": 12.56,
+ "BrightRetrieval (leetcode)": 26.68,
+ "BrightRetrieval (economics)": 16.59,
+ "GerDaLIRSmall": 3.96,
+ "LeCaRDv2": 22.68,
+ "LegalBenchConsumerContractsQA": 73.52,
+ "LegalBenchCorporateLobbying": 91.51,
+ "LegalQuAD": 16.22,
+ "LegalSummarization": 59.99
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "bge-large-en-v1.5"
+ }
+ ]
+ }
+ },
+ "GritLM-7B": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "GritLM-7B",
+ "BornholmBitextMining (dan-Latn)": 45.13,
+ "Tatoeba (csb-Latn_eng-Latn)": 50.13,
+ "Tatoeba (ceb-Latn_eng-Latn)": 33.5,
+ "Tatoeba (cmn-Hans_eng-Latn)": 94.08,
+ "Tatoeba (uzb-Latn_eng-Latn)": 41.69,
+ "Tatoeba (kur-Latn_eng-Latn)": 27.94,
+ "Tatoeba (ita-Latn_eng-Latn)": 91.2,
+ "Tatoeba (lvs-Latn_eng-Latn)": 53.54,
+ "Tatoeba (yid-Hebr_eng-Latn)": 17.13,
+ "Tatoeba (gle-Latn_eng-Latn)": 48.14,
+ "Tatoeba (ast-Latn_eng-Latn)": 79.11,
+ "Tatoeba (ang-Latn_eng-Latn)": 76.84,
+ "Tatoeba (jav-Latn_eng-Latn)": 26.6,
+ "Tatoeba (ina-Latn_eng-Latn)": 91.24,
+ "Tatoeba (nob-Latn_eng-Latn)": 93.53,
+ "Tatoeba (swe-Latn_eng-Latn)": 90.43,
+ "Tatoeba (lfn-Latn_eng-Latn)": 62.23,
+ "Tatoeba (fin-Latn_eng-Latn)": 85.76,
+ "Tatoeba (fry-Latn_eng-Latn)": 61.16,
+ "Tatoeba (gsw-Latn_eng-Latn)": 53.28,
+ "Tatoeba (rus-Cyrl_eng-Latn)": 91.82,
+ "Tatoeba (tat-Cyrl_eng-Latn)": 24.46,
+ "Tatoeba (mal-Mlym_eng-Latn)": 33.79,
+ "Tatoeba (hrv-Latn_eng-Latn)": 91.04,
+ "Tatoeba (ind-Latn_eng-Latn)": 90.05,
+ "Tatoeba (tam-Taml_eng-Latn)": 46.27,
+ "Tatoeba (kaz-Cyrl_eng-Latn)": 36.27,
+ "Tatoeba (uig-Arab_eng-Latn)": 22.6,
+ "Tatoeba (slv-Latn_eng-Latn)": 82.71,
+ "Tatoeba (pms-Latn_eng-Latn)": 50.41,
+ "Tatoeba (lit-Latn_eng-Latn)": 56.36,
+ "Tatoeba (cha-Latn_eng-Latn)": 34.69,
+ "Tatoeba (est-Latn_eng-Latn)": 46.73,
+ "Tatoeba (mhr-Cyrl_eng-Latn)": 10.8,
+ "Tatoeba (dan-Latn_eng-Latn)": 92.01,
+ "Tatoeba (pol-Latn_eng-Latn)": 95.6,
+ "Tatoeba (nov-Latn_eng-Latn)": 64.85,
+ "Tatoeba (swh-Latn_eng-Latn)": 46.09,
+ "Tatoeba (tha-Thai_eng-Latn)": 81.25,
+ "Tatoeba (arz-Arab_eng-Latn)": 52.97,
+ "Tatoeba (epo-Latn_eng-Latn)": 76.87,
+ "Tatoeba (deu-Latn_eng-Latn)": 98.02,
+ "Tatoeba (hye-Armn_eng-Latn)": 35.94,
+ "Tatoeba (afr-Latn_eng-Latn)": 79.17,
+ "Tatoeba (gla-Latn_eng-Latn)": 40.8,
+ "Tatoeba (isl-Latn_eng-Latn)": 74.94,
+ "Tatoeba (awa-Deva_eng-Latn)": 44.31,
+ "Tatoeba (ido-Latn_eng-Latn)": 65.69,
+ "Tatoeba (kor-Hang_eng-Latn)": 87.43,
+ "Tatoeba (amh-Ethi_eng-Latn)": 6.18,
+ "Tatoeba (eus-Latn_eng-Latn)": 31.88,
+ "Tatoeba (mkd-Cyrl_eng-Latn)": 73.82,
+ "Tatoeba (tur-Latn_eng-Latn)": 86.62,
+ "Tatoeba (pes-Arab_eng-Latn)": 78.98,
+ "Tatoeba (heb-Hebr_eng-Latn)": 61.75,
+ "Tatoeba (aze-Latn_eng-Latn)": 64.11,
+ "Tatoeba (hun-Latn_eng-Latn)": 88.54,
+ "Tatoeba (bul-Cyrl_eng-Latn)": 90.37,
+ "Tatoeba (kab-Latn_eng-Latn)": 2.9,
+ "Tatoeba (cat-Latn_eng-Latn)": 90.66,
+ "Tatoeba (dsb-Latn_eng-Latn)": 51.72,
+ "Tatoeba (kat-Geor_eng-Latn)": 38.42,
+ "Tatoeba (urd-Arab_eng-Latn)": 68.02,
+ "Tatoeba (wuu-Hans_eng-Latn)": 80.28,
+ "Tatoeba (oci-Latn_eng-Latn)": 58.12,
+ "Tatoeba (arq-Arab_eng-Latn)": 30.52,
+ "Tatoeba (ron-Latn_eng-Latn)": 90.29,
+ "Tatoeba (bos-Latn_eng-Latn)": 87.33,
+ "Tatoeba (nds-Latn_eng-Latn)": 64.54,
+ "Tatoeba (tgl-Latn_eng-Latn)": 83.24,
+ "Tatoeba (glg-Latn_eng-Latn)": 86.69,
+ "Tatoeba (ben-Beng_eng-Latn)": 61.32,
+ "Tatoeba (khm-Khmr_eng-Latn)": 16.4,
+ "Tatoeba (ukr-Cyrl_eng-Latn)": 90.19,
+ "Tatoeba (max-Deva_eng-Latn)": 51.87,
+ "Tatoeba (lat-Latn_eng-Latn)": 80.43,
+ "Tatoeba (xho-Latn_eng-Latn)": 28.43,
+ "Tatoeba (spa-Latn_eng-Latn)": 96.75,
+ "Tatoeba (tzl-Latn_eng-Latn)": 42.85,
+ "Tatoeba (ara-Arab_eng-Latn)": 76.77,
+ "Tatoeba (vie-Latn_eng-Latn)": 91.32,
+ "Tatoeba (ces-Latn_eng-Latn)": 92.02,
+ "Tatoeba (jpn-Jpan_eng-Latn)": 91.9,
+ "Tatoeba (bel-Cyrl_eng-Latn)": 76.21,
+ "Tatoeba (mon-Cyrl_eng-Latn)": 27.38,
+ "Tatoeba (nld-Latn_eng-Latn)": 94.96,
+ "Tatoeba (war-Latn_eng-Latn)": 27.75,
+ "Tatoeba (bre-Latn_eng-Latn)": 12.59,
+ "Tatoeba (por-Latn_eng-Latn)": 93.41,
+ "Tatoeba (ile-Latn_eng-Latn)": 76.72,
+ "Tatoeba (mar-Deva_eng-Latn)": 51.54,
+ "Tatoeba (fao-Latn_eng-Latn)": 62.03,
+ "Tatoeba (slk-Latn_eng-Latn)": 84.96,
+ "Tatoeba (tel-Telu_eng-Latn)": 24.26,
+ "Tatoeba (cym-Latn_eng-Latn)": 50.03,
+ "Tatoeba (srp-Cyrl_eng-Latn)": 88.45,
+ "Tatoeba (swg-Latn_eng-Latn)": 52.09,
+ "Tatoeba (hin-Deva_eng-Latn)": 84.19,
+ "Tatoeba (yue-Hant_eng-Latn)": 79.5,
+ "Tatoeba (fra-Latn_eng-Latn)": 92.47,
+ "Tatoeba (cor-Latn_eng-Latn)": 6.97,
+ "Tatoeba (hsb-Latn_eng-Latn)": 64.48,
+ "Tatoeba (zsm-Latn_eng-Latn)": 90.06,
+ "Tatoeba (ber-Tfng_eng-Latn)": 6.2,
+ "Tatoeba (pam-Latn_eng-Latn)": 12.11,
+ "Tatoeba (kzj-Latn_eng-Latn)": 9.61,
+ "Tatoeba (dtp-Latn_eng-Latn)": 8.37,
+ "Tatoeba (nno-Latn_eng-Latn)": 80.89,
+ "Tatoeba (ell-Grek_eng-Latn)": 80.13,
+ "Tatoeba (orv-Cyrl_eng-Latn)": 45.88,
+ "Tatoeba (sqi-Latn_eng-Latn)": 54.37,
+ "Tatoeba (tuk-Latn_eng-Latn)": 30.47,
+ "Tatoeba (cbk-Latn_eng-Latn)": 67.64
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "GritLM-7B",
+ "AllegroReviews (pol-Latn)": 37.32,
+ "AmazonCounterfactualClassification (en-ext)": 70.34,
+ "AmazonCounterfactualClassification (en)": 71.1,
+ "AmazonCounterfactualClassification (deu-Latn)": 67.63,
+ "AmazonCounterfactualClassification (jpn-Jpan)": 73.3,
+ "AmazonPolarityClassification (default)": 86.69,
+ "AmazonReviewsClassification (en)": 45.51,
+ "AmazonReviewsClassification (deu-Latn)": 43.77,
+ "AmazonReviewsClassification (spa-Latn)": 43.0,
+ "AmazonReviewsClassification (fra-Latn)": 44.15,
+ "AmazonReviewsClassification (jpn-Jpan)": 41.49,
+ "AmazonReviewsClassification (cmn-Hans)": 35.34,
+ "AngryTweetsClassification (dan-Latn)": 54.68,
+ "Banking77Classification (default)": 79.36,
+ "CBD (pol-Latn)": 70.98,
+ "DanishPoliticalCommentsClassification (dan-Latn)": 37.69,
+ "EmotionClassification (default)": 48.79,
+ "IFlyTek (cmn-Hans)": 48.49,
+ "ImdbClassification (default)": 82.25,
+ "JDReview (cmn-Hans)": 84.02,
+ "LccSentimentClassification (dan-Latn)": 57.2,
+ "MTOPDomainClassification (en)": 92.67,
+ "MTOPDomainClassification (deu-Latn)": 88.32,
+ "MTOPDomainClassification (spa-Latn)": 88.45,
+ "MTOPDomainClassification (fra-Latn)": 88.44,
+ "MTOPDomainClassification (hin-Deva)": 86.89,
+ "MTOPDomainClassification (tha-Thai)": 82.97,
+ "MTOPIntentClassification (en)": 69.77,
+ "MTOPIntentClassification (deu-Latn)": 69.53,
+ "MTOPIntentClassification (spa-Latn)": 67.49,
+ "MTOPIntentClassification (fra-Latn)": 65.93,
+ "MTOPIntentClassification (hin-Deva)": 59.47,
+ "MTOPIntentClassification (tha-Thai)": 65.14,
+ "MasakhaNEWSClassification (amh-Ethi)": 53.06,
+ "MasakhaNEWSClassification (eng)": 77.57,
+ "MasakhaNEWSClassification (fra-Latn)": 77.39,
+ "MasakhaNEWSClassification (hau-Latn)": 74.66,
+ "MasakhaNEWSClassification (ibo-Latn)": 68.64,
+ "MasakhaNEWSClassification (lin-Latn)": 74.23,
+ "MasakhaNEWSClassification (lug-Latn)": 72.33,
+ "MasakhaNEWSClassification (orm-Ethi)": 77.6,
+ "MasakhaNEWSClassification (pcm-Latn)": 91.28,
+ "MasakhaNEWSClassification (run-Latn)": 76.3,
+ "MasakhaNEWSClassification (sna-Latn)": 85.99,
+ "MasakhaNEWSClassification (som-Latn)": 63.71,
+ "MasakhaNEWSClassification (swa-Latn)": 73.4,
+ "MasakhaNEWSClassification (tir-Ethi)": 34.41,
+ "MasakhaNEWSClassification (xho-Latn)": 83.27,
+ "MasakhaNEWSClassification (yor-Latn)": 80.92,
+ "MassiveIntentClassification (mya-Mymr)": 36.92,
+ "MassiveIntentClassification (en)": 71.52,
+ "MassiveIntentClassification (slv-Latn)": 63.08,
+ "MassiveIntentClassification (sqi-Latn)": 50.98,
+ "MassiveIntentClassification (kor-Kore)": 65.71,
+ "MassiveIntentClassification (aze-Latn)": 56.24,
+ "MassiveIntentClassification (isl-Latn)": 51.96,
+ "MassiveIntentClassification (hin-Deva)": 61.18,
+ "MassiveIntentClassification (dan-Latn)": 65.39,
+ "MassiveIntentClassification (vie-Latn)": 62.05,
+ "MassiveIntentClassification (heb-Hebr)": 57.71,
+ "MassiveIntentClassification (tur-Latn)": 65.26,
+ "MassiveIntentClassification (cmo-Hans)": 67.43,
+ "MassiveIntentClassification (khm-Khmr)": 38.86,
+ "MassiveIntentClassification (deu-Latn)": 67.75,
+ "MassiveIntentClassification (fas-Arab)": 65.98,
+ "MassiveIntentClassification (jav-Latn)": 50.25,
+ "MassiveIntentClassification (nld-Latn)": 66.82,
+ "MassiveIntentClassification (jpn-Jpan)": 68.56,
+ "MassiveIntentClassification (ita-Latn)": 68.04,
+ "MassiveIntentClassification (cym-Latn)": 48.59,
+ "MassiveIntentClassification (pol-Latn)": 67.97,
+ "MassiveIntentClassification (fin-Latn)": 60.55,
+ "MassiveIntentClassification (tha-Thai)": 58.99,
+ "MassiveIntentClassification (lav-Latn)": 51.12,
+ "MassiveIntentClassification (mal-Mlym)": 43.57,
+ "MassiveIntentClassification (hun-Latn)": 63.48,
+ "MassiveIntentClassification (ind-Latn)": 65.58,
+ "MassiveIntentClassification (por-Latn)": 67.76,
+ "MassiveIntentClassification (tel-Telu)": 44.73,
+ "MassiveIntentClassification (amh-Ethi)": 34.73,
+ "MassiveIntentClassification (kan-Knda)": 44.51,
+ "MassiveIntentClassification (spa-Latn)": 66.45,
+ "MassiveIntentClassification (urd-Arab)": 54.11,
+ "MassiveIntentClassification (kat-Geor)": 42.01,
+ "MassiveIntentClassification (tam-Taml)": 43.48,
+ "MassiveIntentClassification (afr-Latn)": 59.48,
+ "MassiveIntentClassification (rus-Cyrl)": 69.41,
+ "MassiveIntentClassification (tgl-Latn)": 61.83,
+ "MassiveIntentClassification (ell-Grek)": 60.45,
+ "MassiveIntentClassification (hye-Armn)": 43.12,
+ "MassiveIntentClassification (ara-Arab)": 54.46,
+ "MassiveIntentClassification (fra-Latn)": 67.69,
+ "MassiveIntentClassification (mon-Cyrl)": 40.84,
+ "MassiveIntentClassification (msa-Latn)": 62.61,
+ "MassiveIntentClassification (nob-Latn)": 63.58,
+ "MassiveIntentClassification (ben-Beng)": 52.6,
+ "MassiveIntentClassification (cmo-Hant)": 62.06,
+ "MassiveIntentClassification (ron-Latn)": 62.45,
+ "MassiveIntentClassification (swe-Latn)": 67.73,
+ "MassiveIntentClassification (swa-Latn)": 50.1,
+ "MassiveScenarioClassification (cmo-Hant)": 67.7,
+ "MassiveScenarioClassification (kat-Geor)": 49.31,
+ "MassiveScenarioClassification (ind-Latn)": 72.36,
+ "MassiveScenarioClassification (amh-Ethi)": 42.0,
+ "MassiveScenarioClassification (ita-Latn)": 71.86,
+ "MassiveScenarioClassification (tur-Latn)": 68.71,
+ "MassiveScenarioClassification (tel-Telu)": 50.8,
+ "MassiveScenarioClassification (ell-Grek)": 67.42,
+ "MassiveScenarioClassification (deu-Latn)": 73.64,
+ "MassiveScenarioClassification (sqi-Latn)": 57.5,
+ "MassiveScenarioClassification (cym-Latn)": 57.36,
+ "MassiveScenarioClassification (spa-Latn)": 71.12,
+ "MassiveScenarioClassification (nld-Latn)": 72.47,
+ "MassiveScenarioClassification (swa-Latn)": 58.93,
+ "MassiveScenarioClassification (cmo-Hans)": 71.91,
+ "MassiveScenarioClassification (fin-Latn)": 65.91,
+ "MassiveScenarioClassification (por-Latn)": 70.99,
+ "MassiveScenarioClassification (hun-Latn)": 69.68,
+ "MassiveScenarioClassification (slv-Latn)": 70.25,
+ "MassiveScenarioClassification (urd-Arab)": 62.48,
+ "MassiveScenarioClassification (hye-Armn)": 49.32,
+ "MassiveScenarioClassification (pol-Latn)": 71.86,
+ "MassiveScenarioClassification (khm-Khmr)": 45.52,
+ "MassiveScenarioClassification (kan-Knda)": 49.51,
+ "MassiveScenarioClassification (hin-Deva)": 66.18,
+ "MassiveScenarioClassification (heb-Hebr)": 63.3,
+ "MassiveScenarioClassification (rus-Cyrl)": 73.87,
+ "MassiveScenarioClassification (mal-Mlym)": 48.53,
+ "MassiveScenarioClassification (afr-Latn)": 67.34,
+ "MassiveScenarioClassification (vie-Latn)": 69.19,
+ "MassiveScenarioClassification (fra-Latn)": 70.79,
+ "MassiveScenarioClassification (ben-Beng)": 58.75,
+ "MassiveScenarioClassification (lav-Latn)": 57.3,
+ "MassiveScenarioClassification (tam-Taml)": 50.9,
+ "MassiveScenarioClassification (en)": 73.87,
+ "MassiveScenarioClassification (aze-Latn)": 61.74,
+ "MassiveScenarioClassification (swe-Latn)": 73.24,
+ "MassiveScenarioClassification (kor-Kore)": 70.76,
+ "MassiveScenarioClassification (ron-Latn)": 68.54,
+ "MassiveScenarioClassification (msa-Latn)": 69.72,
+ "MassiveScenarioClassification (mya-Mymr)": 44.25,
+ "MassiveScenarioClassification (fas-Arab)": 70.5,
+ "MassiveScenarioClassification (tha-Thai)": 64.51,
+ "MassiveScenarioClassification (jpn-Jpan)": 72.81,
+ "MassiveScenarioClassification (nob-Latn)": 69.75,
+ "MassiveScenarioClassification (tgl-Latn)": 69.0,
+ "MassiveScenarioClassification (dan-Latn)": 71.51,
+ "MassiveScenarioClassification (ara-Arab)": 61.51,
+ "MassiveScenarioClassification (jav-Latn)": 58.24,
+ "MassiveScenarioClassification (isl-Latn)": 61.61,
+ "MassiveScenarioClassification (mon-Cyrl)": 46.6,
+ "MultilingualSentiment (cmn-Hans)": 68.13,
+ "NoRecClassification (nob-Latn)": 52.05,
+ "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 63.6,
+ "OnlineShopping (cmn-Hans)": 86.99,
+ "PAC (pol-Latn)": 68.09,
+ "PolEmo2.0-IN (pol-Latn)": 66.07,
+ "PolEmo2.0-OUT (pol-Latn)": 32.94,
+ "TNews (cmn-Hans)": 49.94,
+ "ToxicConversationsClassification (default)": 63.9,
+ "TweetSentimentExtractionClassification (default)": 57.14,
+ "Waimai (cmn-Hans)": 84.92
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "GritLM-7B",
+ "MasakhaNEWSClusteringP2P (amh-Ethi)": 45.2,
+ "MasakhaNEWSClusteringP2P (eng)": 70.5,
+ "MasakhaNEWSClusteringP2P (fra-Latn)": 73.54,
+ "MasakhaNEWSClusteringP2P (hau-Latn)": 51.33,
+ "MasakhaNEWSClusteringP2P (ibo-Latn)": 66.75,
+ "MasakhaNEWSClusteringP2P (lin-Latn)": 59.57,
+ "MasakhaNEWSClusteringP2P (lug-Latn)": 58.93,
+ "MasakhaNEWSClusteringP2P (orm-Ethi)": 54.38,
+ "MasakhaNEWSClusteringP2P (pcm-Latn)": 92.67,
+ "MasakhaNEWSClusteringP2P (run-Latn)": 59.51,
+ "MasakhaNEWSClusteringP2P (sna-Latn)": 68.86,
+ "MasakhaNEWSClusteringP2P (som-Latn)": 41.42,
+ "MasakhaNEWSClusteringP2P (swa-Latn)": 33.61,
+ "MasakhaNEWSClusteringP2P (tir-Ethi)": 51.68,
+ "MasakhaNEWSClusteringP2P (xho-Latn)": 46.65,
+ "MasakhaNEWSClusteringP2P (yor-Latn)": 52.39,
+ "MasakhaNEWSClusteringS2S (amh-Ethi)": 43.39,
+ "MasakhaNEWSClusteringS2S (eng)": 65.85,
+ "MasakhaNEWSClusteringS2S (fra-Latn)": 68.87,
+ "MasakhaNEWSClusteringS2S (hau-Latn)": 33.02,
+ "MasakhaNEWSClusteringS2S (ibo-Latn)": 64.55,
+ "MasakhaNEWSClusteringS2S (lin-Latn)": 72.01,
+ "MasakhaNEWSClusteringS2S (lug-Latn)": 47.42,
+ "MasakhaNEWSClusteringS2S (orm-Ethi)": 32.59,
+ "MasakhaNEWSClusteringS2S (pcm-Latn)": 97.82,
+ "MasakhaNEWSClusteringS2S (run-Latn)": 59.41,
+ "MasakhaNEWSClusteringS2S (sna-Latn)": 71.58,
+ "MasakhaNEWSClusteringS2S (som-Latn)": 40.91,
+ "MasakhaNEWSClusteringS2S (swa-Latn)": 33.54,
+ "MasakhaNEWSClusteringS2S (tir-Ethi)": 45.32,
+ "MasakhaNEWSClusteringS2S (xho-Latn)": 28.94,
+ "MasakhaNEWSClusteringS2S (yor-Latn)": 63.26
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "GritLM-7B",
+ "CDSC-E (pol-Latn)": 72.65,
+ "OpusparcusPC (deu-Latn)": 96.65,
+ "OpusparcusPC (en)": 98.57,
+ "OpusparcusPC (fin-Latn)": 90.41,
+ "OpusparcusPC (fra-Latn)": 93.41,
+ "OpusparcusPC (rus-Cyrl)": 88.63,
+ "OpusparcusPC (swe-Latn)": 94.04,
+ "PSC (pol-Latn)": 99.43,
+ "PawsXPairClassification (deu-Latn)": 58.5,
+ "PawsXPairClassification (en)": 63.78,
+ "PawsXPairClassification (spa-Latn)": 59.15,
+ "PawsXPairClassification (fra-Latn)": 61.89,
+ "PawsXPairClassification (jpn-Hira)": 51.46,
+ "PawsXPairClassification (kor-Hang)": 52.15,
+ "PawsXPairClassification (cmn-Hans)": 57.66,
+ "SICK-E-PL (pol-Latn)": 75.98,
+ "SprintDuplicateQuestions (default)": 93.06,
+ "TwitterSemEval2015 (default)": 71.24,
+ "TwitterURLCorpus (default)": 84.54
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "GritLM-7B",
+ "AlloprofReranking (fra-Latn)": 77.95,
+ "AskUbuntuDupQuestions (default)": 61.11,
+ "MMarcoReranking (cmn-Hans)": 21.7,
+ "MindSmallReranking (default)": 31.53,
+ "SciDocsRR (default)": 84.78,
+ "StackOverflowDupQuestions (default)": 50.95,
+ "SyntecReranking (fra-Latn)": 83.32,
+ "T2Reranking (cmn-Hans)": 65.63
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "GritLM-7B",
+ "AILACasedocs (default)": 35.31,
+ "AILAStatutes (default)": 41.8,
+ "AlloprofRetrieval (fra-Latn)": 55.42,
+ "ArguAna (default)": 63.17,
+ "ArguAna-PL (pol-Latn)": 48.89,
+ "BSARDRetrieval (fra-Latn)": 26.63,
+ "BrightRetrieval (pony)": 21.98,
+ "BrightRetrieval (robotics)": 17.31,
+ "BrightRetrieval (economics)": 19.0,
+ "BrightRetrieval (theoremqa_questions)": 23.34,
+ "BrightRetrieval (leetcode)": 29.85,
+ "BrightRetrieval (earth_science)": 32.77,
+ "BrightRetrieval (stackoverflow)": 11.62,
+ "BrightRetrieval (sustainable_living)": 18.04,
+ "BrightRetrieval (biology)": 25.04,
+ "BrightRetrieval (psychology)": 19.92,
+ "BrightRetrieval (theoremqa_theorems)": 17.41,
+ "BrightRetrieval (aops)": 8.91,
+ "CmedqaRetrieval (cmn-Hans)": 35.58,
+ "CovidRetrieval (cmn-Hans)": 73.47,
+ "DuRetrieval (cmn-Hans)": 88.18,
+ "EcomRetrieval (cmn-Hans)": 54.33,
+ "FiQA-PL (pol-Latn)": 38.04,
+ "FiQA2018 (default)": 59.91,
+ "GerDaLIRSmall (deu-Latn)": 20.61,
+ "LEMBNarrativeQARetrieval (default)": 41.46,
+ "LEMBNeedleRetrieval": 33.25,
+ "LEMBPasskeyRetrieval": 38.25,
+ "LEMBQMSumRetrieval (default)": 30.32,
+ "LEMBSummScreenFDRetrieval (default)": 78.49,
+ "LEMBWikimQARetrieval (default)": 60.8,
+ "LeCaRDv2 (zho-Hans)": 64.05,
+ "LegalBenchConsumerContractsQA (default)": 82.1,
+ "LegalBenchCorporateLobbying (default)": 95.0,
+ "LegalQuAD (deu-Latn)": 44.18,
+ "LegalSummarization (default)": 70.64,
+ "MMarcoRetrieval (cmn-Hans)": 76.54,
+ "MedicalRetrieval (cmn-Hans)": 55.81,
+ "MintakaRetrieval (ara-Arab)": 25.88,
+ "MintakaRetrieval (deu-Latn)": 55.66,
+ "MintakaRetrieval (spa-Latn)": 53.36,
+ "MintakaRetrieval (fra-Latn)": 51.68,
+ "MintakaRetrieval (hin-Deva)": 26.06,
+ "MintakaRetrieval (ita-Latn)": 54.91,
+ "MintakaRetrieval (jpn-Hira)": 34.1,
+ "MintakaRetrieval (por-Latn)": 54.91,
+ "NFCorpus (default)": 40.86,
+ "NFCorpus-PL (pol-Latn)": 32.88,
+ "SCIDOCS (default)": 24.4,
+ "SCIDOCS-PL (pol-Latn)": 18.39,
+ "SciFact (default)": 79.13,
+ "SciFact-PL (pol-Latn)": 73.22,
+ "SyntecRetrieval (fra-Latn)": 89.48,
+ "T2Retrieval (cmn-Hans)": 82.96,
+ "TRECCOVID (default)": 74.36,
+ "TRECCOVID-PL (pol-Latn)": 58.01,
+ "Touche2020 (default)": 27.81,
+ "VideoRetrieval (cmn-Hans)": 53.85,
+ "XPQARetrieval (ara-Arab_ara-Arab)": 45.21,
+ "XPQARetrieval (eng-Latn_ara-Arab)": 27.32,
+ "XPQARetrieval (ara-Arab_eng-Latn)": 39.43,
+ "XPQARetrieval (deu-Latn_deu-Latn)": 76.58,
+ "XPQARetrieval (eng-Latn_deu-Latn)": 55.44,
+ "XPQARetrieval (deu-Latn_eng-Latn)": 72.56,
+ "XPQARetrieval (spa-Latn_spa-Latn)": 64.55,
+ "XPQARetrieval (eng-Latn_spa-Latn)": 45.49,
+ "XPQARetrieval (spa-Latn_eng-Latn)": 61.03,
+ "XPQARetrieval (fra-Latn_fra-Latn)": 70.85,
+ "XPQARetrieval (eng-Latn_fra-Latn)": 48.14,
+ "XPQARetrieval (fra-Latn_eng-Latn)": 66.96,
+ "XPQARetrieval (hin-Deva_hin-Deva)": 74.75,
+ "XPQARetrieval (eng-Latn_hin-Deva)": 25.61,
+ "XPQARetrieval (hin-Deva_eng-Latn)": 63.9,
+ "XPQARetrieval (ita-Latn_ita-Latn)": 76.53,
+ "XPQARetrieval (eng-Latn_ita-Latn)": 46.88,
+ "XPQARetrieval (ita-Latn_eng-Latn)": 71.03,
+ "XPQARetrieval (jpn-Hira_jpn-Hira)": 72.27,
+ "XPQARetrieval (eng-Latn_jpn-Hira)": 41.94,
+ "XPQARetrieval (jpn-Hira_eng-Latn)": 69.42,
+ "XPQARetrieval (kor-Hang_kor-Hang)": 40.64,
+ "XPQARetrieval (eng-Latn_kor-Hang)": 32.68,
+ "XPQARetrieval (kor-Hang_eng-Latn)": 36.0,
+ "XPQARetrieval (pol-Latn_pol-Latn)": 50.74,
+ "XPQARetrieval (eng-Latn_pol-Latn)": 33.14,
+ "XPQARetrieval (pol-Latn_eng-Latn)": 48.06,
+ "XPQARetrieval (por-Latn_por-Latn)": 49.86,
+ "XPQARetrieval (eng-Latn_por-Latn)": 33.01,
+ "XPQARetrieval (por-Latn_eng-Latn)": 48.45,
+ "XPQARetrieval (tam-Taml_tam-Taml)": 41.78,
+ "XPQARetrieval (eng-Latn_tam-Taml)": 10.95,
+ "XPQARetrieval (tam-Taml_eng-Latn)": 21.28,
+ "XPQARetrieval (cmn-Hans_cmn-Hans)": 65.29,
+ "XPQARetrieval (eng-Latn_cmn-Hans)": 35.86,
+ "XPQARetrieval (cmn-Hans_eng-Latn)": 58.12
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "GritLM-7B",
+ "AFQMC (cmn-Hans)": 32.65,
+ "ATEC (cmn-Hans)": 37.34,
+ "BIOSSES (default)": 85.01,
+ "BQ (cmn-Hans)": 38.03,
+ "CDSC-R (pol-Latn)": 92.23,
+ "LCQMC (cmn-Hans)": 71.38,
+ "PAWSX (cmn-Hans)": 16.4,
+ "SICK-R (default)": 81.47,
+ "SICK-R-PL (pol-Latn)": 72.78,
+ "SICKFr (fra-Latn)": 76.91,
+ "STS12 (default)": 65.84,
+ "STS13 (default)": 78.37,
+ "STS14 (default)": 77.52,
+ "STS15 (default)": 85.43,
+ "STS16 (default)": 79.94,
+ "STS17 (ita-Latn_eng-Latn)": 88.42,
+ "STS17 (fra-Latn_eng-Latn)": 87.9,
+ "STS17 (kor-Hang)": 78.74,
+ "STS17 (en-en)": 90.12,
+ "STS17 (nld-Latn_eng-Latn)": 88.29,
+ "STS17 (ara-Arab)": 79.28,
+ "STS17 (eng-Latn_deu-Latn)": 88.92,
+ "STS17 (spa-Latn)": 87.12,
+ "STS17 (eng-Latn_tur-Latn)": 77.47,
+ "STS17 (spa-Latn_eng-Latn)": 87.47,
+ "STS17 (eng-Latn_ara-Arab)": 74.45,
+ "STS22 (spa-Latn_eng-Latn)": 80.76,
+ "STS22 (ara-Arab)": 55.45,
+ "STS22 (pol-Latn_eng-Latn)": 77.77,
+ "STS22 (deu-Latn_pol-Latn)": 55.09,
+ "STS22 (en)": 68.59,
+ "STS22 (rus-Cyrl)": 68.46,
+ "STS22 (deu-Latn_eng-Latn)": 62.33,
+ "STS22 (cmn-Hans)": 72.29,
+ "STS22 (pol-Latn)": 48.07,
+ "STS22 (fra-Latn)": 83.09,
+ "STS22 (cmn-Hans_eng-Latn)": 72.73,
+ "STS22 (deu-Latn_fra-Latn)": 62.14,
+ "STS22 (spa-Latn_ita-Latn)": 77.63,
+ "STS22 (fra-Latn_pol-Latn)": 84.52,
+ "STS22 (ita-Latn)": 77.58,
+ "STS22 (spa-Latn)": 72.24,
+ "STS22 (deu-Latn)": 59.34,
+ "STS22 (tur-Latn)": 70.83,
+ "STSB (cmn-Hans)": 74.11,
+ "STSBenchmark (default)": 83.1,
+ "STSBenchmarkMultilingualSTS (spa-Latn)": 79.51,
+ "STSBenchmarkMultilingualSTS (ita-Latn)": 76.24,
+ "STSBenchmarkMultilingualSTS (por-Latn)": 76.61,
+ "STSBenchmarkMultilingualSTS (fra-Latn)": 77.48,
+ "STSBenchmarkMultilingualSTS (deu-Latn)": 77.57,
+ "STSBenchmarkMultilingualSTS (en)": 83.12,
+ "STSBenchmarkMultilingualSTS (nld-Latn)": 74.83,
+ "STSBenchmarkMultilingualSTS (pol-Latn)": 74.67,
+ "STSBenchmarkMultilingualSTS (cmn-Hans)": 75.27,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 76.19
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "GritLM-7B",
+ "SummEval (default)": 30.26,
+ "SummEvalFr (fra-Latn)": 29.97
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "GritLM-7B",
+ "Core17InstructionRetrieval": 2.62,
+ "News21InstructionRetrieval": -1.01,
+ "Robust04InstructionRetrieval": -1.68
+ }
+ ]
+ }
+ },
+ "gte-Qwen1.5-7B-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AmazonCounterfactualClassification (en)": 83.16,
+ "AmazonPolarityClassification": 96.7,
+ "AmazonReviewsClassification (en)": 62.17,
+ "AmazonReviewsClassification (zh)": 52.95,
+ "Banking77Classification": 81.68,
+ "EmotionClassification": 54.53,
+ "IFlyTek": 53.77,
+ "ImdbClassification": 95.58,
+ "JDReview": 88.2,
+ "MTOPDomainClassification (en)": 95.75,
+ "MTOPIntentClassification (en)": 84.26,
+ "MassiveIntentClassification (zh-CN)": 76.25,
+ "MassiveIntentClassification (en)": 78.47,
+ "MassiveScenarioClassification (en)": 78.19,
+ "MassiveScenarioClassification (zh-CN)": 77.26,
+ "MultilingualSentiment": 77.42,
+ "OnlineShopping": 94.48,
+ "TNews": 51.24,
+ "ToxicConversationsClassification": 78.75,
+ "TweetSentimentExtractionClassification": 66.0,
+ "Waimai": 88.63
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "ArxivClusteringP2P": 56.4,
+ "ArxivClusteringS2S": 51.45,
+ "BiorxivClusteringP2P": 49.01,
+ "BiorxivClusteringS2S": 45.06,
+ "CLSClusteringP2P": 47.21,
+ "CLSClusteringS2S": 45.79,
+ "MedrxivClusteringP2P": 44.37,
+ "MedrxivClusteringS2S": 42.0,
+ "RedditClustering": 73.37,
+ "RedditClusteringP2P": 72.51,
+ "StackExchangeClustering": 79.07,
+ "StackExchangeClusteringP2P": 49.57,
+ "ThuNewsClusteringP2P": 87.43,
+ "ThuNewsClusteringS2S": 87.9,
+ "TwentyNewsgroupsClustering": 51.31
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "Cmnli": 91.81,
+ "Ocnli": 85.22,
+ "SprintDuplicateQuestions": 95.99,
+ "TwitterSemEval2015": 79.36,
+ "TwitterURLCorpus": 86.79
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AskUbuntuDupQuestions": 66.0,
+ "CMedQAv1": 86.37,
+ "CMedQAv2": 87.41,
+ "MindSmallReranking": 32.71,
+ "SciDocsRR": 87.89,
+ "StackOverflowDupQuestions": 53.93,
+ "T2Reranking": 68.11
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "ArguAna": 62.65,
+ "BrightRetrieval (stackoverflow)": 19.85,
+ "BrightRetrieval (earth_science)": 36.22,
+ "BrightRetrieval (leetcode)": 25.46,
+ "BrightRetrieval (theoremqa_questions)": 26.97,
+ "BrightRetrieval (economics)": 17.72,
+ "BrightRetrieval (robotics)": 13.47,
+ "BrightRetrieval (pony)": 9.79,
+ "BrightRetrieval (aops)": 14.36,
+ "BrightRetrieval (psychology)": 24.61,
+ "BrightRetrieval (theoremqa_theorems)": 26.66,
+ "BrightRetrieval (biology)": 30.92,
+ "BrightRetrieval (sustainable_living)": 14.93,
+ "CQADupstackRetrieval": 40.64,
+ "ClimateFEVER": 44.0,
+ "CmedqaRetrieval": 43.47,
+ "CovidRetrieval": 80.87,
+ "DBPedia": 48.04,
+ "DuRetrieval": 86.01,
+ "EcomRetrieval": 66.46,
+ "FEVER": 93.35,
+ "FiQA2018": 55.31,
+ "HotpotQA": 72.25,
+ "MMarcoRetrieval": 73.83,
+ "MSMARCO": 41.68,
+ "MedicalRetrieval": 61.33,
+ "NFCorpus": 38.25,
+ "NQ": 61.79,
+ "QuoraRetrieval": 89.61,
+ "SCIDOCS": 27.69,
+ "SciFact": 75.31,
+ "T2Retrieval": 83.58,
+ "TRECCOVID": 72.72,
+ "Touche2020": 20.3,
+ "VideoRetrieval": 69.41
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AFQMC": 58.47,
+ "ATEC": 55.46,
+ "BIOSSES": 81.12,
+ "BQ": 77.59,
+ "LCQMC": 76.29,
+ "PAWSX": 50.22,
+ "QBQTC": 31.82,
+ "SICK-R": 79.15,
+ "STS12": 76.52,
+ "STS13": 88.63,
+ "STS14": 83.32,
+ "STS15": 87.5,
+ "STS16": 86.39,
+ "STS17 (en-en)": 87.79,
+ "STS22 (en)": 66.4,
+ "STS22 (zh)": 67.36,
+ "STSB": 81.37,
+ "STSBenchmark": 87.35
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "SummEval": 31.46
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct"
+ }
+ ]
+ }
+ },
+ "voyage-large-2-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "voyage-large-2-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "AmazonCounterfactualClassification (en)": 77.6,
+ "AmazonPolarityClassification": 96.58,
+ "AmazonReviewsClassification (en)": 50.77,
+ "Banking77Classification": 86.96,
+ "EmotionClassification": 59.81,
+ "ImdbClassification": 96.13,
+ "MTOPDomainClassification (en)": 98.86,
+ "MTOPIntentClassification (en)": 86.97,
+ "MassiveIntentClassification (en)": 81.08,
+ "MassiveScenarioClassification (en)": 87.95,
+ "ToxicConversationsClassification": 83.58,
+ "TweetSentimentExtractionClassification": 71.55
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "ArxivClusteringP2P": 51.81,
+ "ArxivClusteringS2S": 44.73,
+ "BiorxivClusteringP2P": 46.07,
+ "BiorxivClusteringS2S": 40.64,
+ "MedrxivClusteringP2P": 42.94,
+ "MedrxivClusteringS2S": 41.44,
+ "RedditClustering": 68.5,
+ "RedditClusteringP2P": 64.86,
+ "StackExchangeClustering": 74.16,
+ "StackExchangeClusteringP2P": 45.1,
+ "TwentyNewsgroupsClustering": 66.62
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "SprintDuplicateQuestions": 94.5,
+ "TwitterSemEval2015": 86.32,
+ "TwitterURLCorpus": 86.9
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "AskUbuntuDupQuestions": 64.92,
+ "MindSmallReranking": 30.97,
+ "SciDocsRR": 89.34,
+ "StackOverflowDupQuestions": 55.11
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "ArguAna": 64.06,
+ "BrightRetrieval (theoremqa_questions)": 26.06,
+ "BrightRetrieval (earth_science)": 25.09,
+ "BrightRetrieval (leetcode)": 30.6,
+ "BrightRetrieval (economics)": 19.85,
+ "BrightRetrieval (robotics)": 11.21,
+ "BrightRetrieval (psychology)": 24.79,
+ "BrightRetrieval (aops)": 7.45,
+ "BrightRetrieval (sustainable_living)": 15.58,
+ "BrightRetrieval (pony)": 1.48,
+ "BrightRetrieval (theoremqa_theorems)": 10.13,
+ "BrightRetrieval (biology)": 23.55,
+ "BrightRetrieval (stackoverflow)": 15.03,
+ "CQADupstackRetrieval": 46.6,
+ "ClimateFEVER": 32.65,
+ "DBPedia": 46.03,
+ "FEVER": 91.47,
+ "FiQA2018": 59.76,
+ "HotpotQA": 70.86,
+ "MSMARCO": 40.6,
+ "NFCorpus": 40.32,
+ "NQ": 65.92,
+ "QuoraRetrieval": 87.4,
+ "SCIDOCS": 24.32,
+ "SciFact": 79.99,
+ "TRECCOVID": 85.07,
+ "Touche2020": 39.16
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "BIOSSES": 89.12,
+ "BIOSSES (default)": 89.24,
+ "SICK-R": 83.16,
+ "STS12": 76.15,
+ "STS12 (default)": 73.34,
+ "STS13": 88.49,
+ "STS14": 86.49,
+ "STS15": 91.13,
+ "STS16": 85.68,
+ "STS17 (en-en)": 90.06,
+ "STS22 (en)": 66.32,
+ "STSBenchmark": 89.22
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "SummEval": 30.84
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "voyage-large-2-instruct"
+ }
+ ]
+ }
+ },
+ "e5-mistral-7b-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "e5-mistral-7b-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "AmazonReviewsClassification (fr)": 36.71,
+ "MTOPDomainClassification (fr)": 74.8,
+ "MTOPIntentClassification (fr)": 53.97,
+ "MasakhaNEWSClassification (fra)": 80.59,
+ "MassiveIntentClassification (fr)": 46.39,
+ "MassiveScenarioClassification (fr)": 53.86
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "AlloProfClusteringP2P": 61.06,
+ "AlloProfClusteringS2S": 28.12,
+ "HALClusteringS2S": 19.69,
+ "MLSUMClusteringP2P": 45.59,
+ "MLSUMClusteringS2S": 32.0,
+ "MasakhaNEWSClusteringP2P (fra)": 52.47,
+ "MasakhaNEWSClusteringS2S (fra)": 49.2
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "OpusparcusPC (fr)": 88.5,
+ "PawsXPairClassification (fr)": 63.65
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "AlloprofReranking": 47.36,
+ "SyntecReranking": 77.05
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "AILACasedocs": 38.76,
+ "AILAStatutes": 38.07,
+ "AlloprofRetrieval": 16.46,
+ "BSARDRetrieval": 0.0,
+ "BrightRetrieval (sustainable_living)": 18.51,
+ "BrightRetrieval (economics)": 15.49,
+ "BrightRetrieval (theoremqa_theorems)": 23.78,
+ "BrightRetrieval (aops)": 7.1,
+ "BrightRetrieval (theoremqa_questions)": 23.94,
+ "BrightRetrieval (stackoverflow)": 9.83,
+ "BrightRetrieval (psychology)": 15.79,
+ "BrightRetrieval (pony)": 4.81,
+ "BrightRetrieval (leetcode)": 28.72,
+ "BrightRetrieval (biology)": 18.84,
+ "BrightRetrieval (earth_science)": 25.96,
+ "BrightRetrieval (robotics)": 16.37,
+ "GerDaLIRSmall": 37.18,
+ "LEMBNarrativeQARetrieval": 44.62,
+ "LEMBNeedleRetrieval": 48.25,
+ "LEMBPasskeyRetrieval": 71.0,
+ "LEMBQMSumRetrieval": 43.63,
+ "LEMBSummScreenFDRetrieval": 96.82,
+ "LEMBWikimQARetrieval": 82.11,
+ "LeCaRDv2": 68.56,
+ "LegalBenchConsumerContractsQA": 75.46,
+ "LegalBenchCorporateLobbying": 94.01,
+ "LegalQuAD": 59.64,
+ "LegalSummarization": 66.51,
+ "MintakaRetrieval (fr)": 3.57,
+ "SyntecRetrieval": 55.9,
+ "XPQARetrieval (fr)": 41.29
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "SICKFr": 64.39,
+ "STS22 (fr)": 69.82,
+ "STSBenchmarkMultilingualSTS (fr)": 61.87
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "SummEvalFr": 32.22
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "Core17InstructionRetrieval": 0.09,
+ "News21InstructionRetrieval": -0.86,
+ "Robust04InstructionRetrieval": -9.59
+ }
+ ]
+ }
+ },
+ "bm25": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "bm25",
+ "BrightRetrieval (robotics)": 13.53,
+ "BrightRetrieval (pony)": 7.93,
+ "BrightRetrieval (leetcode)": 24.37,
+ "BrightRetrieval (earth_science)": 27.06,
+ "BrightRetrieval (stackoverflow)": 16.55,
+ "BrightRetrieval (economics)": 14.87,
+ "BrightRetrieval (theoremqa_questions)": 9.78,
+ "BrightRetrieval (theoremqa_theorems)": 4.25,
+ "BrightRetrieval (psychology)": 12.51,
+ "BrightRetrieval (sustainable_living)": 15.22,
+ "BrightRetrieval (biology)": 19.19,
+ "BrightRetrieval (aops)": 6.2
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "bm25"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "bm25",
+ "Core17InstructionRetrieval": -1.06,
+ "News21InstructionRetrieval": -2.15,
+ "Robust04InstructionRetrieval": -3.06
+ }
+ ]
+ }
+ },
+ "text-embedding-3-large": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "text-embedding-3-large"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "text-embedding-3-large",
+ "AmazonCounterfactualClassification (en)": 78.93,
+ "AmazonPolarityClassification": 92.85,
+ "AmazonReviewsClassification (en)": 48.7,
+ "Banking77Classification": 85.69,
+ "EmotionClassification": 51.58,
+ "ImdbClassification": 87.67,
+ "MTOPDomainClassification (en)": 95.36,
+ "MTOPIntentClassification (en)": 75.07,
+ "MassiveIntentClassification (en)": 74.64,
+ "MassiveScenarioClassification (en)": 79.79,
+ "ToxicConversationsClassification": 72.92,
+ "TweetSentimentExtractionClassification": 62.22
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "text-embedding-3-large",
+ "ArxivClusteringP2P": 49.01,
+ "ArxivClusteringS2S": 44.45,
+ "BiorxivClusteringP2P": 38.03,
+ "BiorxivClusteringS2S": 36.53,
+ "MedrxivClusteringP2P": 32.7,
+ "MedrxivClusteringS2S": 31.27,
+ "RedditClustering": 67.84,
+ "RedditClusteringP2P": 67.96,
+ "StackExchangeClustering": 76.26,
+ "StackExchangeClusteringP2P": 36.88,
+ "TwentyNewsgroupsClustering": 58.14
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "text-embedding-3-large",
+ "SprintDuplicateQuestions": 92.25,
+ "TwitterSemEval2015": 77.13,
+ "TwitterURLCorpus": 87.78
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "text-embedding-3-large",
+ "AskUbuntuDupQuestions": 65.03,
+ "MindSmallReranking": 29.86,
+ "SciDocsRR": 86.66,
+ "StackOverflowDupQuestions": 55.08
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "text-embedding-3-large",
+ "AILACasedocs": 39.0,
+ "AILAStatutes": 41.31,
+ "ArguAna": 58.05,
+ "BrightRetrieval (theoremqa_questions)": 22.22,
+ "BrightRetrieval (leetcode)": 23.65,
+ "BrightRetrieval (earth_science)": 26.27,
+ "BrightRetrieval (psychology)": 27.52,
+ "BrightRetrieval (robotics)": 12.93,
+ "BrightRetrieval (economics)": 19.98,
+ "BrightRetrieval (stackoverflow)": 12.49,
+ "BrightRetrieval (biology)": 23.67,
+ "BrightRetrieval (theoremqa_theorems)": 9.25,
+ "BrightRetrieval (pony)": 2.45,
+ "BrightRetrieval (sustainable_living)": 20.32,
+ "BrightRetrieval (aops)": 8.45,
+ "CQADupstackRetrieval": 47.54,
+ "ClimateFEVER": 30.27,
+ "DBPedia": 44.76,
+ "FEVER": 87.94,
+ "FiQA2018": 55.0,
+ "GerDaLIRSmall": 32.77,
+ "HotpotQA": 71.58,
+ "LEMBNarrativeQARetrieval": 44.09,
+ "LEMBNeedleRetrieval": 29.25,
+ "LEMBPasskeyRetrieval": 63.0,
+ "LEMBQMSumRetrieval": 32.49,
+ "LEMBSummScreenFDRetrieval": 84.8,
+ "LEMBWikimQARetrieval": 54.16,
+ "LeCaRDv2": 57.2,
+ "LegalBenchConsumerContractsQA": 79.39,
+ "LegalBenchCorporateLobbying": 95.09,
+ "LegalQuAD": 57.47,
+ "LegalSummarization": 71.55,
+ "MSMARCO": 40.24,
+ "NFCorpus": 42.07,
+ "NQ": 61.27,
+ "QuoraRetrieval": 89.05,
+ "SCIDOCS": 23.11,
+ "SciFact": 77.77,
+ "TRECCOVID": 79.56,
+ "Touche2020": 23.35
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "text-embedding-3-large",
+ "BIOSSES": 84.68,
+ "SICK-R": 79.0,
+ "STS12": 72.84,
+ "STS13": 86.1,
+ "STS14": 81.15,
+ "STS15": 88.49,
+ "STS16": 85.08,
+ "STS17 (en-en)": 90.22,
+ "STS22 (en)": 66.14,
+ "STSBenchmark": 83.56
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "text-embedding-3-large",
+ "SummEval": 29.92
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "text-embedding-3-large",
+ "Core17InstructionRetrieval": -0.2,
+ "News21InstructionRetrieval": -2.03,
+ "Robust04InstructionRetrieval": -5.81
+ }
+ ]
+ }
+ },
+ "instructor-large": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "instructor-large",
+ "BrightRetrieval (pony)": 1.32,
+ "BrightRetrieval (sustainable_living)": 13.16,
+ "BrightRetrieval (aops)": 7.94,
+ "BrightRetrieval (biology)": 15.61,
+ "BrightRetrieval (stackoverflow)": 11.21,
+ "BrightRetrieval (theoremqa_theorems)": 9.29,
+ "BrightRetrieval (psychology)": 21.94,
+ "BrightRetrieval (economics)": 15.99,
+ "BrightRetrieval (robotics)": 11.45,
+ "BrightRetrieval (leetcode)": 20.0,
+ "BrightRetrieval (earth_science)": 21.52,
+ "BrightRetrieval (theoremqa_questions)": 20.07
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "instructor-large"
+ }
+ ]
+ }
}
}
\ No newline at end of file