diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json"
--- "a/EXTERNAL_MODEL_RESULTS.json"
+++ "b/EXTERNAL_MODEL_RESULTS.json"
@@ -1457,173 +1457,6 @@
]
}
},
- "gte-Qwen1.5-7B-instruct": {
- "BitextMining": {
- "f1": [
- {
- "Model": "gte-Qwen1.5-7B-instruct"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "AmazonCounterfactualClassification (en)": 83.16,
- "AmazonPolarityClassification": 96.7,
- "AmazonReviewsClassification (en)": 62.17,
- "AmazonReviewsClassification (zh)": 52.95,
- "Banking77Classification": 81.68,
- "EmotionClassification": 54.53,
- "IFlyTek": 53.77,
- "ImdbClassification": 95.58,
- "JDReview": 88.2,
- "MTOPDomainClassification (en)": 95.75,
- "MTOPIntentClassification (en)": 84.26,
- "MassiveIntentClassification (zh-CN)": 76.25,
- "MassiveIntentClassification (en)": 78.47,
- "MassiveScenarioClassification (en)": 78.19,
- "MassiveScenarioClassification (zh-CN)": 77.26,
- "MultilingualSentiment": 77.42,
- "OnlineShopping": 94.48,
- "TNews": 51.24,
- "ToxicConversationsClassification": 78.75,
- "TweetSentimentExtractionClassification": 66.0,
- "Waimai": 88.63
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "ArxivClusteringP2P": 56.4,
- "ArxivClusteringS2S": 51.45,
- "BiorxivClusteringP2P": 49.01,
- "BiorxivClusteringS2S": 45.06,
- "CLSClusteringP2P": 47.21,
- "CLSClusteringS2S": 45.79,
- "MedrxivClusteringP2P": 44.37,
- "MedrxivClusteringS2S": 42.0,
- "RedditClustering": 73.37,
- "RedditClusteringP2P": 72.51,
- "StackExchangeClustering": 79.07,
- "StackExchangeClusteringP2P": 49.57,
- "ThuNewsClusteringP2P": 87.43,
- "ThuNewsClusteringS2S": 87.9,
- "TwentyNewsgroupsClustering": 51.31
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "Cmnli": 91.81,
- "Ocnli": 85.22,
- "SprintDuplicateQuestions": 95.99,
- "TwitterSemEval2015": 79.36,
- "TwitterURLCorpus": 86.79
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "AskUbuntuDupQuestions": 66.0,
- "CMedQAv1": 86.37,
- "CMedQAv2": 87.41,
- "MindSmallReranking": 32.71,
- "SciDocsRR": 87.89,
- "StackOverflowDupQuestions": 53.93,
- "T2Reranking": 68.11
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "ArguAna": 62.65,
- "BrightRetrieval (stackoverflow)": 19.85,
- "BrightRetrieval (earth_science)": 36.22,
- "BrightRetrieval (leetcode)": 25.46,
- "BrightRetrieval (theoremqa_questions)": 26.97,
- "BrightRetrieval (economics)": 17.72,
- "BrightRetrieval (robotics)": 13.47,
- "BrightRetrieval (pony)": 9.79,
- "BrightRetrieval (aops)": 14.36,
- "BrightRetrieval (psychology)": 24.61,
- "BrightRetrieval (theoremqa_theorems)": 26.66,
- "BrightRetrieval (biology)": 30.92,
- "BrightRetrieval (sustainable_living)": 14.93,
- "CQADupstackRetrieval": 40.64,
- "ClimateFEVER": 44.0,
- "CmedqaRetrieval": 43.47,
- "CovidRetrieval": 80.87,
- "DBPedia": 48.04,
- "DuRetrieval": 86.01,
- "EcomRetrieval": 66.46,
- "FEVER": 93.35,
- "FiQA2018": 55.31,
- "HotpotQA": 72.25,
- "MMarcoRetrieval": 73.83,
- "MSMARCO": 41.68,
- "MedicalRetrieval": 61.33,
- "NFCorpus": 38.25,
- "NQ": 61.79,
- "QuoraRetrieval": 89.61,
- "SCIDOCS": 27.69,
- "SciFact": 75.31,
- "T2Retrieval": 83.58,
- "TRECCOVID": 72.72,
- "Touche2020": 20.3,
- "VideoRetrieval": 69.41
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "AFQMC": 58.47,
- "ATEC": 55.46,
- "BIOSSES": 81.12,
- "BQ": 77.59,
- "LCQMC": 76.29,
- "PAWSX": 50.22,
- "QBQTC": 31.82,
- "SICK-R": 79.15,
- "STS12": 76.52,
- "STS13": 88.63,
- "STS14": 83.32,
- "STS15": 87.5,
- "STS16": 86.39,
- "STS17 (en-en)": 87.79,
- "STS22 (en)": 66.4,
- "STS22 (zh)": 67.36,
- "STSB": 81.37,
- "STSBenchmark": 87.35
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "gte-Qwen1.5-7B-instruct",
- "SummEval": 31.46
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "gte-Qwen1.5-7B-instruct"
- }
- ]
- }
- },
"text-similarity-curie-001": {
"BitextMining": {
"f1": [
@@ -2719,83 +2552,6 @@
]
}
},
- "SFR-Embedding-Mistral": {
- "BitextMining": {
- "f1": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "SFR-Embedding-Mistral",
- "BrightRetrieval (sustainable_living)": 19.79,
- "BrightRetrieval (economics)": 17.84,
- "BrightRetrieval (theoremqa_theorems)": 24.05,
- "BrightRetrieval (aops)": 7.43,
- "BrightRetrieval (theoremqa_questions)": 23.05,
- "BrightRetrieval (psychology)": 18.97,
- "BrightRetrieval (stackoverflow)": 12.72,
- "BrightRetrieval (pony)": 1.97,
- "BrightRetrieval (leetcode)": 27.35,
- "BrightRetrieval (biology)": 19.49,
- "BrightRetrieval (earth_science)": 26.63,
- "BrightRetrieval (robotics)": 16.7
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "SFR-Embedding-Mistral"
- }
- ]
- }
- },
"glove.6B.300d": {
"BitextMining": {
"f1": [
@@ -4171,145 +3927,12 @@
]
}
},
- "voyage-large-2-instruct": {
+ "rubert-base-cased": {
"BitextMining": {
"f1": [
{
- "Model": "voyage-large-2-instruct"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "voyage-large-2-instruct",
- "AmazonCounterfactualClassification (en)": 77.6,
- "AmazonPolarityClassification": 96.58,
- "AmazonReviewsClassification (en)": 50.77,
- "Banking77Classification": 86.96,
- "EmotionClassification": 59.81,
- "ImdbClassification": 96.13,
- "MTOPDomainClassification (en)": 98.86,
- "MTOPIntentClassification (en)": 86.97,
- "MassiveIntentClassification (en)": 81.08,
- "MassiveScenarioClassification (en)": 87.95,
- "ToxicConversationsClassification": 83.58,
- "TweetSentimentExtractionClassification": 71.55
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "voyage-large-2-instruct",
- "ArxivClusteringP2P": 51.81,
- "ArxivClusteringS2S": 44.73,
- "BiorxivClusteringP2P": 46.07,
- "BiorxivClusteringS2S": 40.64,
- "MedrxivClusteringP2P": 42.94,
- "MedrxivClusteringS2S": 41.44,
- "RedditClustering": 68.5,
- "RedditClusteringP2P": 64.86,
- "StackExchangeClustering": 74.16,
- "StackExchangeClusteringP2P": 45.1,
- "TwentyNewsgroupsClustering": 66.62
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "voyage-large-2-instruct",
- "SprintDuplicateQuestions": 94.5,
- "TwitterSemEval2015": 86.32,
- "TwitterURLCorpus": 86.9
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "voyage-large-2-instruct",
- "AskUbuntuDupQuestions": 64.92,
- "MindSmallReranking": 30.97,
- "SciDocsRR": 89.34,
- "StackOverflowDupQuestions": 55.11
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "voyage-large-2-instruct",
- "ArguAna": 64.06,
- "BrightRetrieval (theoremqa_questions)": 26.06,
- "BrightRetrieval (earth_science)": 25.09,
- "BrightRetrieval (leetcode)": 30.6,
- "BrightRetrieval (economics)": 19.85,
- "BrightRetrieval (robotics)": 11.21,
- "BrightRetrieval (psychology)": 24.79,
- "BrightRetrieval (aops)": 7.45,
- "BrightRetrieval (sustainable_living)": 15.58,
- "BrightRetrieval (pony)": 1.48,
- "BrightRetrieval (theoremqa_theorems)": 10.13,
- "BrightRetrieval (biology)": 23.55,
- "BrightRetrieval (stackoverflow)": 15.03,
- "CQADupstackRetrieval": 46.6,
- "ClimateFEVER": 32.65,
- "DBPedia": 46.03,
- "FEVER": 91.47,
- "FiQA2018": 59.76,
- "HotpotQA": 70.86,
- "MSMARCO": 40.6,
- "NFCorpus": 40.32,
- "NQ": 65.92,
- "QuoraRetrieval": 87.4,
- "SCIDOCS": 24.32,
- "SciFact": 79.99,
- "TRECCOVID": 85.07,
- "Touche2020": 39.16
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "voyage-large-2-instruct",
- "BIOSSES": 89.24,
- "SICK-R": 83.16,
- "STS12": 73.34,
- "STS13": 88.49,
- "STS14": 86.49,
- "STS15": 91.13,
- "STS16": 85.68,
- "STS17 (en-en)": 90.06,
- "STS22 (en)": 66.32,
- "STSBenchmark": 89.22
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "voyage-large-2-instruct",
- "SummEval": 30.84
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "voyage-large-2-instruct"
- }
- ]
- }
- },
- "rubert-base-cased": {
- "BitextMining": {
- "f1": [
- {
- "Model": "rubert-base-cased",
- "Tatoeba (rus-Cyrl_eng-Latn)": 16.76
+ "Model": "rubert-base-cased",
+ "Tatoeba (rus-Cyrl_eng-Latn)": 16.76
}
]
},
@@ -7556,83 +7179,6 @@
]
}
},
- "instructor-large": {
- "BitextMining": {
- "f1": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "instructor-large",
- "BrightRetrieval (pony)": 1.32,
- "BrightRetrieval (sustainable_living)": 13.16,
- "BrightRetrieval (aops)": 7.94,
- "BrightRetrieval (biology)": 15.61,
- "BrightRetrieval (stackoverflow)": 11.21,
- "BrightRetrieval (theoremqa_theorems)": 9.29,
- "BrightRetrieval (psychology)": 21.94,
- "BrightRetrieval (economics)": 15.99,
- "BrightRetrieval (robotics)": 11.45,
- "BrightRetrieval (leetcode)": 20.0,
- "BrightRetrieval (earth_science)": 21.52,
- "BrightRetrieval (theoremqa_questions)": 20.07
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "instructor-large"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "instructor-large"
- }
- ]
- }
- },
"bert-base-multilingual-uncased": {
"BitextMining": {
"f1": [
@@ -10137,111 +9683,11 @@
]
}
},
- "bge-large-en-v1.5": {
+ "allenai-specter": {
"BitextMining": {
"f1": [
{
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "bge-large-en-v1.5",
- "AILACasedocs": 25.15,
- "AILAStatutes": 20.74,
- "ARCChallenge": 9.99,
- "AlphaNLI": 13.13,
- "BrightRetrieval (stackoverflow)": 9.51,
- "BrightRetrieval (earth_science)": 24.15,
- "BrightRetrieval (aops)": 6.08,
- "BrightRetrieval (sustainable_living)": 13.27,
- "BrightRetrieval (psychology)": 17.44,
- "BrightRetrieval (robotics)": 12.21,
- "BrightRetrieval (theoremqa_theorems)": 5.51,
- "BrightRetrieval (pony)": 5.64,
- "BrightRetrieval (biology)": 11.96,
- "BrightRetrieval (theoremqa_questions)": 12.56,
- "BrightRetrieval (leetcode)": 26.68,
- "BrightRetrieval (economics)": 16.59,
- "GerDaLIRSmall": 3.96,
- "HellaSwag": 28.5,
- "LeCaRDv2": 22.68,
- "LegalBenchConsumerContractsQA": 73.52,
- "LegalBenchCorporateLobbying": 91.51,
- "LegalQuAD": 16.22,
- "LegalSummarization": 59.99,
- "PIQA": 27.99,
- "Quail": 1.83,
- "RARbCode": 48.12,
- "RARbMath": 57.36,
- "SIQA": 1.04,
- "SpartQA": 2.99,
- "TempReasonL1": 1.46,
- "TempReasonL2Fact": 24.25,
- "TempReasonL2Pure": 2.35,
- "TempReasonL3Fact": 20.64,
- "TempReasonL3Pure": 6.67,
- "WinoGrande": 19.18
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "bge-large-en-v1.5"
- }
- ]
- }
- },
- "allenai-specter": {
- "BitextMining": {
- "f1": [
- {
- "Model": "allenai-specter"
+ "Model": "allenai-specter"
}
]
},
@@ -11312,101 +10758,6 @@
]
}
},
- "GritLM-7B": {
- "BitextMining": {
- "f1": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "GritLM-7B",
- "ARCChallenge": 26.68,
- "AlphaNLI": 34.0,
- "BrightRetrieval (pony)": 21.98,
- "BrightRetrieval (robotics)": 17.31,
- "BrightRetrieval (economics)": 19.0,
- "BrightRetrieval (theoremqa_questions)": 23.34,
- "BrightRetrieval (leetcode)": 29.85,
- "BrightRetrieval (earth_science)": 32.77,
- "BrightRetrieval (stackoverflow)": 11.62,
- "BrightRetrieval (sustainable_living)": 18.04,
- "BrightRetrieval (biology)": 25.04,
- "BrightRetrieval (psychology)": 19.92,
- "BrightRetrieval (theoremqa_theorems)": 17.41,
- "BrightRetrieval (aops)": 8.91,
- "HellaSwag": 39.45,
- "PIQA": 44.35,
- "Quail": 11.69,
- "RARbCode": 84.0,
- "RARbMath": 82.35,
- "SIQA": 7.23,
- "SpartQA": 9.29,
- "TempReasonL1": 7.15,
- "TempReasonL2Fact": 58.38,
- "TempReasonL2Pure": 11.22,
- "TempReasonL3Fact": 44.29,
- "TempReasonL3Pure": 14.15,
- "WinoGrande": 53.74
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "GritLM-7B"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "GritLM-7B",
- "Core17InstructionRetrieval": 2.62,
- "News21InstructionRetrieval": -1.01,
- "Robust04InstructionRetrieval": -1.68
- }
- ]
- }
- },
"bge-m3": {
"BitextMining": {
"f1": [
@@ -13236,163 +12587,6 @@
]
}
},
- "gte-Qwen2-7B-instruct": {
- "BitextMining": {
- "f1": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "gte-Qwen2-7B-instruct",
- "BrightRetrieval (earth_science)": 40.66,
- "BrightRetrieval (sustainable_living)": 20.82,
- "BrightRetrieval (theoremqa_theorems)": 28.15,
- "BrightRetrieval (aops)": 15.1,
- "BrightRetrieval (economics)": 16.18,
- "BrightRetrieval (pony)": 1.25,
- "BrightRetrieval (stackoverflow)": 13.95,
- "BrightRetrieval (leetcode)": 31.07,
- "BrightRetrieval (biology)": 32.09,
- "BrightRetrieval (theoremqa_questions)": 29.9,
- "BrightRetrieval (robotics)": 12.82,
- "BrightRetrieval (psychology)": 26.58
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "gte-Qwen2-7B-instruct"
- }
- ]
- }
- },
- "instructor-xl": {
- "BitextMining": {
- "f1": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "instructor-xl",
- "BrightRetrieval (aops)": 8.26,
- "BrightRetrieval (robotics)": 17.39,
- "BrightRetrieval (economics)": 22.81,
- "BrightRetrieval (stackoverflow)": 19.06,
- "BrightRetrieval (leetcode)": 27.5,
- "BrightRetrieval (theoremqa_questions)": 14.59,
- "BrightRetrieval (psychology)": 27.43,
- "BrightRetrieval (biology)": 21.91,
- "BrightRetrieval (theoremqa_theorems)": 6.5,
- "BrightRetrieval (earth_science)": 34.35,
- "BrightRetrieval (sustainable_living)": 18.82,
- "BrightRetrieval (pony)": 5.02
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "instructor-xl"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "instructor-xl",
- "Core17InstructionRetrieval": 0.69,
- "News21InstructionRetrieval": -0.9,
- "Robust04InstructionRetrieval": -8.08
- }
- ]
- }
- },
"mistral-embed": {
"BitextMining": {
"f1": [
@@ -13734,154 +12928,18 @@
]
}
},
- "google-gecko.text-embedding-preview-0409": {
+ "jina-embeddings-v2-base-en": {
"BitextMining": {
"f1": [
{
- "Model": "google-gecko.text-embedding-preview-0409"
+ "Model": "jina-embeddings-v2-base-en"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "google-gecko.text-embedding-preview-0409",
- "AmazonCounterfactualClassification (en)": 75.34,
- "AmazonPolarityClassification": 97.34,
- "AmazonReviewsClassification (en)": 51.17,
- "Banking77Classification": 88.62,
- "EmotionClassification": 52.51,
- "ImdbClassification": 95.65,
- "MTOPDomainClassification (en)": 98.35,
- "MTOPIntentClassification (en)": 83.43,
- "MassiveIntentClassification (en)": 80.22,
- "MassiveScenarioClassification (en)": 87.19,
- "ToxicConversationsClassification": 89.67,
- "TweetSentimentExtractionClassification": 74.52
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "ArxivClusteringP2P": 46.27,
- "ArxivClusteringS2S": 38.36,
- "BiorxivClusteringP2P": 37.87,
- "BiorxivClusteringS2S": 35.67,
- "MedrxivClusteringP2P": 33.11,
- "MedrxivClusteringS2S": 31.54,
- "RedditClustering": 65.81,
- "RedditClusteringP2P": 66.62,
- "StackExchangeClustering": 74.52,
- "StackExchangeClusteringP2P": 37.63,
- "TwentyNewsgroupsClustering": 54.87
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "SprintDuplicateQuestions": 96.26,
- "TwitterSemEval2015": 79.04,
- "TwitterURLCorpus": 87.53
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "AskUbuntuDupQuestions": 64.4,
- "MindSmallReranking": 33.07,
- "SciDocsRR": 83.59,
- "StackOverflowDupQuestions": 54.56
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "ArguAna": 62.18,
- "BrightRetrieval (earth_science)": 34.38,
- "BrightRetrieval (leetcode)": 29.64,
- "BrightRetrieval (theoremqa_questions)": 21.51,
- "BrightRetrieval (aops)": 9.33,
- "BrightRetrieval (sustainable_living)": 17.25,
- "BrightRetrieval (pony)": 3.59,
- "BrightRetrieval (theoremqa_theorems)": 16.77,
- "BrightRetrieval (stackoverflow)": 17.93,
- "BrightRetrieval (biology)": 22.98,
- "BrightRetrieval (robotics)": 15.98,
- "BrightRetrieval (economics)": 19.5,
- "BrightRetrieval (psychology)": 27.86,
- "CQADupstackRetrieval": 48.89,
- "ClimateFEVER": 33.21,
- "DBPedia": 47.12,
- "FEVER": 86.96,
- "FiQA2018": 59.24,
- "HotpotQA": 71.33,
- "MSMARCO": 32.58,
- "NFCorpus": 40.33,
- "NQ": 61.28,
- "QuoraRetrieval": 88.18,
- "SCIDOCS": 20.34,
- "SciFact": 75.42,
- "TRECCOVID": 82.62,
- "Touche2020": 25.86
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "BIOSSES": 89.46,
- "SICK-R": 81.93,
- "STS12": 77.59,
- "STS13": 90.36,
- "STS14": 85.25,
- "STS15": 89.66,
- "STS16": 87.34,
- "STS17 (en-en)": 92.06,
- "STS22 (en)": 68.02,
- "STSBenchmark": 88.99
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "SummEval": 32.63
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "google-gecko.text-embedding-preview-0409",
- "Core17InstructionRetrieval": 5.44,
- "News21InstructionRetrieval": 3.94,
- "Robust04InstructionRetrieval": -2.4
- }
- ]
- }
- },
- "jina-embeddings-v2-base-en": {
- "BitextMining": {
- "f1": [
- {
- "Model": "jina-embeddings-v2-base-en"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "jina-embeddings-v2-base-en"
+ "Model": "jina-embeddings-v2-base-en"
}
]
},
@@ -15850,162 +14908,6 @@
]
}
},
- "e5-mistral-7b-instruct": {
- "BitextMining": {
- "f1": [
- {
- "Model": "e5-mistral-7b-instruct",
- "Tatoeba (rus-Cyrl_eng-Latn)": 93.75
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AmazonReviewsClassification (fr)": 36.71,
- "GeoreviewClassification (rus-Cyrl)": 50.25,
- "HeadlineClassification (rus-Cyrl)": 85.68,
- "InappropriatenessClassification (rus-Cyrl)": 67.19,
- "KinopoiskClassification (rus-Cyrl)": 65.49,
- "MTOPDomainClassification (fr)": 74.8,
- "MTOPIntentClassification (fr)": 53.97,
- "MasakhaNEWSClassification (fra)": 80.59,
- "MassiveIntentClassification (rus-Cyrl)": 76.08,
- "MassiveIntentClassification (fr)": 46.39,
- "MassiveScenarioClassification (rus-Cyrl)": 79.61,
- "MassiveScenarioClassification (fr)": 53.86,
- "RuReviewsClassification (rus-Cyrl)": 67.68,
- "RuSciBenchGRNTIClassification (rus-Cyrl)": 64.59,
- "RuSciBenchOECDClassification (rus-Cyrl)": 51.13
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AlloProfClusteringP2P": 61.06,
- "AlloProfClusteringS2S": 28.12,
- "GeoreviewClusteringP2P (rus-Cyrl)": 65.68,
- "HALClusteringS2S": 19.69,
- "MLSUMClusteringP2P": 45.59,
- "MLSUMClusteringS2S": 32.0,
- "MasakhaNEWSClusteringP2P (fra)": 52.47,
- "MasakhaNEWSClusteringS2S (fra)": 49.2,
- "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 61.55,
- "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 52.72
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "e5-mistral-7b-instruct",
- "OpusparcusPC (rus-Cyrl)": 91.44,
- "OpusparcusPC (fr)": 88.5,
- "PawsXPairClassification (fr)": 63.65,
- "TERRa (rus-Cyrl)": 59.38
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AlloprofReranking": 47.36,
- "RuBQReranking (rus-Cyrl)": 74.61,
- "SyntecReranking": 77.05
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "e5-mistral-7b-instruct",
- "AILACasedocs": 38.76,
- "AILAStatutes": 38.07,
- "ARCChallenge": 17.81,
- "AlloprofRetrieval": 16.46,
- "AlphaNLI": 26.12,
- "BSARDRetrieval": 0.0,
- "BrightRetrieval (sustainable_living)": 18.51,
- "BrightRetrieval (economics)": 15.49,
- "BrightRetrieval (theoremqa_theorems)": 23.78,
- "BrightRetrieval (aops)": 7.1,
- "BrightRetrieval (theoremqa_questions)": 23.94,
- "BrightRetrieval (stackoverflow)": 9.83,
- "BrightRetrieval (psychology)": 15.79,
- "BrightRetrieval (pony)": 4.81,
- "BrightRetrieval (leetcode)": 28.72,
- "BrightRetrieval (biology)": 18.84,
- "BrightRetrieval (earth_science)": 25.96,
- "BrightRetrieval (robotics)": 16.37,
- "GerDaLIRSmall": 37.18,
- "HellaSwag": 34.85,
- "LEMBNarrativeQARetrieval": 44.62,
- "LEMBNeedleRetrieval": 48.25,
- "LEMBPasskeyRetrieval": 71.0,
- "LEMBQMSumRetrieval": 43.63,
- "LEMBSummScreenFDRetrieval": 96.82,
- "LEMBWikimQARetrieval": 82.11,
- "LeCaRDv2": 68.56,
- "LegalBenchConsumerContractsQA": 75.46,
- "LegalBenchCorporateLobbying": 94.01,
- "LegalQuAD": 59.64,
- "LegalSummarization": 66.51,
- "MintakaRetrieval (fr)": 3.57,
- "PIQA": 39.37,
- "Quail": 7.01,
- "RARbCode": 78.46,
- "RARbMath": 72.16,
- "RiaNewsRetrieval (rus-Cyrl)": 81.94,
- "RuBQRetrieval (rus-Cyrl)": 73.98,
- "SIQA": 5.42,
- "SpartQA": 9.92,
- "SyntecRetrieval": 55.9,
- "TempReasonL1": 3.31,
- "TempReasonL2Fact": 36.9,
- "TempReasonL2Pure": 9.18,
- "TempReasonL3Fact": 30.18,
- "TempReasonL3Pure": 14.31,
- "WinoGrande": 41.21,
- "XPQARetrieval (fr)": 41.29
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "e5-mistral-7b-instruct",
- "RUParaPhraserSTS (rus-Cyrl)": 76.17,
- "RuSTSBenchmarkSTS (rus-Cyrl)": 84.13,
- "SICKFr": 64.39,
- "STS22 (fr)": 69.82,
- "STSBenchmarkMultilingualSTS (rus-Cyrl)": 84.25,
- "STSBenchmarkMultilingualSTS (fr)": 61.87
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "e5-mistral-7b-instruct",
- "SummEvalFr": 32.22
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "e5-mistral-7b-instruct",
- "Core17InstructionRetrieval": 0.09,
- "News21InstructionRetrieval": -0.86,
- "Robust04InstructionRetrieval": -9.59
- }
- ]
- }
- },
"gbert-large": {
"BitextMining": {
"f1": [
@@ -19942,4872 +18844,5868 @@
]
}
},
- "all-mpnet-base-v2": {
+ "e5-base-v2": {
"BitextMining": {
"f1": [
{
- "Model": "all-mpnet-base-v2",
- "BornholmBitextMining (dan-Latn)": 27.44,
- "Tatoeba (pol-Latn_eng-Latn)": 4.09,
- "Tatoeba (ita-Latn_eng-Latn)": 11.1,
- "Tatoeba (cat-Latn_eng-Latn)": 9.44,
- "Tatoeba (aze-Latn_eng-Latn)": 1.49,
- "Tatoeba (eus-Latn_eng-Latn)": 3.94,
- "Tatoeba (epo-Latn_eng-Latn)": 7.15,
- "Tatoeba (lit-Latn_eng-Latn)": 1.02,
- "Tatoeba (ast-Latn_eng-Latn)": 9.78,
- "Tatoeba (bul-Cyrl_eng-Latn)": 0.35,
- "Tatoeba (ceb-Latn_eng-Latn)": 4.41,
- "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0,
- "Tatoeba (tzl-Latn_eng-Latn)": 3.55,
- "Tatoeba (zsm-Latn_eng-Latn)": 4.75,
- "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17,
- "Tatoeba (pam-Latn_eng-Latn)": 4.32,
- "Tatoeba (amh-Ethi_eng-Latn)": 0.0,
- "Tatoeba (slv-Latn_eng-Latn)": 3.73,
- "Tatoeba (lvs-Latn_eng-Latn)": 2.98,
- "Tatoeba (sqi-Latn_eng-Latn)": 3.45,
- "Tatoeba (orv-Cyrl_eng-Latn)": 0.0,
- "Tatoeba (vie-Latn_eng-Latn)": 4.96,
- "Tatoeba (pes-Arab_eng-Latn)": 0.2,
- "Tatoeba (por-Latn_eng-Latn)": 10.48,
- "Tatoeba (dtp-Latn_eng-Latn)": 3.54,
- "Tatoeba (yid-Hebr_eng-Latn)": 0.08,
- "Tatoeba (isl-Latn_eng-Latn)": 3.86,
- "Tatoeba (cha-Latn_eng-Latn)": 12.2,
- "Tatoeba (ron-Latn_eng-Latn)": 7.34,
- "Tatoeba (hye-Armn_eng-Latn)": 0.14,
- "Tatoeba (mar-Deva_eng-Latn)": 0.11,
- "Tatoeba (hin-Deva_eng-Latn)": 0.02,
- "Tatoeba (kor-Hang_eng-Latn)": 0.32,
- "Tatoeba (srp-Cyrl_eng-Latn)": 1.89,
- "Tatoeba (csb-Latn_eng-Latn)": 4.19,
- "Tatoeba (jpn-Jpan_eng-Latn)": 1.71,
- "Tatoeba (ber-Tfng_eng-Latn)": 4.56,
- "Tatoeba (wuu-Hans_eng-Latn)": 0.91,
- "Tatoeba (jav-Latn_eng-Latn)": 3.17,
- "Tatoeba (nob-Latn_eng-Latn)": 4.37,
- "Tatoeba (bre-Latn_eng-Latn)": 3.65,
- "Tatoeba (kzj-Latn_eng-Latn)": 3.62,
- "Tatoeba (urd-Arab_eng-Latn)": 0.0,
- "Tatoeba (ces-Latn_eng-Latn)": 3.56,
- "Tatoeba (cbk-Latn_eng-Latn)": 9.33,
- "Tatoeba (gla-Latn_eng-Latn)": 2.04,
- "Tatoeba (war-Latn_eng-Latn)": 5.14,
- "Tatoeba (swh-Latn_eng-Latn)": 6.01,
- "Tatoeba (swg-Latn_eng-Latn)": 7.86,
- "Tatoeba (glg-Latn_eng-Latn)": 12.0,
- "Tatoeba (fao-Latn_eng-Latn)": 7.08,
- "Tatoeba (gsw-Latn_eng-Latn)": 10.67,
- "Tatoeba (rus-Cyrl_eng-Latn)": 0.14,
- "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52,
- "Tatoeba (gle-Latn_eng-Latn)": 2.19,
- "Tatoeba (slk-Latn_eng-Latn)": 3.4,
- "Tatoeba (nno-Latn_eng-Latn)": 5.75,
- "Tatoeba (cor-Latn_eng-Latn)": 2.42,
- "Tatoeba (nov-Latn_eng-Latn)": 16.61,
- "Tatoeba (swe-Latn_eng-Latn)": 6.55,
- "Tatoeba (max-Deva_eng-Latn)": 6.46,
- "Tatoeba (oci-Latn_eng-Latn)": 8.57,
- "Tatoeba (lfn-Latn_eng-Latn)": 6.1,
- "Tatoeba (fra-Latn_eng-Latn)": 16.9,
- "Tatoeba (ben-Beng_eng-Latn)": 0.0,
- "Tatoeba (bel-Cyrl_eng-Latn)": 0.65,
- "Tatoeba (lat-Latn_eng-Latn)": 5.78,
- "Tatoeba (cmn-Hans_eng-Latn)": 2.22,
- "Tatoeba (kat-Geor_eng-Latn)": 0.43,
- "Tatoeba (bos-Latn_eng-Latn)": 4.6,
- "Tatoeba (xho-Latn_eng-Latn)": 3.3,
- "Tatoeba (tha-Thai_eng-Latn)": 0.0,
- "Tatoeba (cym-Latn_eng-Latn)": 4.88,
- "Tatoeba (deu-Latn_eng-Latn)": 11.46,
- "Tatoeba (awa-Deva_eng-Latn)": 0.44,
- "Tatoeba (ido-Latn_eng-Latn)": 9.84,
- "Tatoeba (tat-Cyrl_eng-Latn)": 0.24,
- "Tatoeba (kab-Latn_eng-Latn)": 1.31,
- "Tatoeba (uzb-Latn_eng-Latn)": 1.98,
- "Tatoeba (heb-Hebr_eng-Latn)": 0.28,
- "Tatoeba (ara-Arab_eng-Latn)": 0.1,
- "Tatoeba (fry-Latn_eng-Latn)": 12.43,
- "Tatoeba (afr-Latn_eng-Latn)": 6.08,
- "Tatoeba (kur-Latn_eng-Latn)": 3.65,
- "Tatoeba (pms-Latn_eng-Latn)": 7.63,
- "Tatoeba (ell-Grek_eng-Latn)": 0.0,
- "Tatoeba (spa-Latn_eng-Latn)": 10.12,
- "Tatoeba (dsb-Latn_eng-Latn)": 2.96,
- "Tatoeba (uig-Arab_eng-Latn)": 0.33,
- "Tatoeba (nld-Latn_eng-Latn)": 9.29,
- "Tatoeba (tel-Telu_eng-Latn)": 0.73,
- "Tatoeba (hrv-Latn_eng-Latn)": 3.77,
- "Tatoeba (nds-Latn_eng-Latn)": 10.96,
- "Tatoeba (hun-Latn_eng-Latn)": 3.23,
- "Tatoeba (est-Latn_eng-Latn)": 2.35,
- "Tatoeba (mal-Mlym_eng-Latn)": 0.15,
- "Tatoeba (khm-Khmr_eng-Latn)": 0.28,
- "Tatoeba (hsb-Latn_eng-Latn)": 3.12,
- "Tatoeba (tgl-Latn_eng-Latn)": 4.06,
- "Tatoeba (ang-Latn_eng-Latn)": 9.77,
- "Tatoeba (tur-Latn_eng-Latn)": 3.16,
- "Tatoeba (tuk-Latn_eng-Latn)": 2.23,
- "Tatoeba (ile-Latn_eng-Latn)": 17.84,
- "Tatoeba (mon-Cyrl_eng-Latn)": 0.81,
- "Tatoeba (yue-Hant_eng-Latn)": 1.16,
- "Tatoeba (ina-Latn_eng-Latn)": 22.55,
- "Tatoeba (tam-Taml_eng-Latn)": 0.73,
- "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5,
- "Tatoeba (dan-Latn_eng-Latn)": 10.01,
- "Tatoeba (arq-Arab_eng-Latn)": 0.33,
- "Tatoeba (arz-Arab_eng-Latn)": 0.0,
- "Tatoeba (fin-Latn_eng-Latn)": 3.82,
- "Tatoeba (ind-Latn_eng-Latn)": 4.88
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "e5-base-v2",
+ "BiorxivClusteringP2P": 37.12,
+ "BiorxivClusteringS2S": 33.41,
+ "MedrxivClusteringP2P": 31.82,
+ "MedrxivClusteringS2S": 29.68,
+ "RedditClustering": 56.54,
+ "RedditClusteringP2P": 63.23,
+ "StackExchangeClustering": 64.6,
+ "StackExchangeClusteringP2P": 33.02,
+ "TwentyNewsgroupsClustering": 49.86
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "e5-base-v2"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "e5-base-v2",
+ "Core17InstructionRetrieval": -2.9,
+ "News21InstructionRetrieval": -2.0,
+ "Robust04InstructionRetrieval": -6.73
+ }
+ ]
+ }
+ },
+ "text-search-babbage-001": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "text-search-babbage-001",
+ "ArguAna": 49.2,
+ "ClimateFEVER": 19.9,
+ "FEVER": 77.0,
+ "FiQA2018": 42.2,
+ "HotpotQA": 63.1,
+ "NFCorpus": 36.7,
+ "QuoraRetrieval": 69.7,
+ "SciFact": 70.4,
+ "TRECCOVID": 58.5,
+ "Touche2020": 29.7
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "text-search-babbage-001"
+ }
+ ]
+ }
+ },
+ "all-mpnet-base-v2-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "all-mpnet-base-v2-instruct",
+ "ARCChallenge": 10.35,
+ "AlphaNLI": 1.96,
+ "HellaSwag": 13.01,
+ "PIQA": 27.18,
+ "Quail": 3.02,
+ "RARbCode": 48.95,
+ "RARbMath": 69.21,
+ "SIQA": 1.29,
+ "SpartQA": 1.01,
+ "TempReasonL1": 1.52,
+ "TempReasonL2Fact": 7.28,
+ "TempReasonL2Pure": 1.03,
+ "TempReasonL3Fact": 7.03,
+ "TempReasonL3Pure": 5.16,
+ "WinoGrande": 9.66
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "all-mpnet-base-v2-instruct"
+ }
+ ]
+ }
+ },
+ "bge-small-en-v1.5-instruct": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "bge-small-en-v1.5-instruct",
+ "ARCChallenge": 7.72,
+ "AlphaNLI": 1.26,
+ "HellaSwag": 23.41,
+ "PIQA": 20.79,
+ "Quail": 2.01,
+ "RARbCode": 41.52,
+ "RARbMath": 46.5,
+ "SIQA": 0.98,
+ "SpartQA": 2.86,
+ "TempReasonL1": 1.27,
+ "TempReasonL2Fact": 16.72,
+ "TempReasonL2Pure": 1.1,
+ "TempReasonL3Fact": 12.81,
+ "TempReasonL3Pure": 4.63,
+ "WinoGrande": 5.35
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "bge-small-en-v1.5-instruct"
+ }
+ ]
+ }
+ },
+ "LaBSE-en-ru": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "LaBSE-en-ru",
+ "Tatoeba (rus-Cyrl_eng-Latn)": 93.62
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "LaBSE-en-ru",
+ "GeoreviewClassification (rus-Cyrl)": 40.89,
+ "HeadlineClassification (rus-Cyrl)": 68.75,
+ "InappropriatenessClassification (rus-Cyrl)": 58.48,
+ "KinopoiskClassification (rus-Cyrl)": 49.85,
+ "MassiveIntentClassification (swa-Latn)": 19.98,
+ "MassiveIntentClassification (aze-Latn)": 19.52,
+ "MassiveIntentClassification (tur-Latn)": 24.12,
+ "MassiveIntentClassification (cmo-Hans)": 3.96,
+ "MassiveIntentClassification (amh-Ethi)": 2.76,
+ "MassiveIntentClassification (kan-Knda)": 2.86,
+ "MassiveIntentClassification (hin-Deva)": 3.29,
+ "MassiveIntentClassification (tgl-Latn)": 27.08,
+ "MassiveIntentClassification (tha-Thai)": 4.0,
+ "MassiveIntentClassification (swe-Latn)": 32.01,
+ "MassiveIntentClassification (deu-Latn)": 35.14,
+ "MassiveIntentClassification (spa-Latn)": 37.67,
+ "MassiveIntentClassification (por-Latn)": 39.84,
+ "MassiveIntentClassification (jpn-Jpan)": 4.78,
+ "MassiveIntentClassification (fin-Latn)": 31.11,
+ "MassiveIntentClassification (kat-Geor)": 2.87,
+ "MassiveIntentClassification (slv-Latn)": 35.66,
+ "MassiveIntentClassification (rus-Cyrl)": 60.53,
+ "MassiveIntentClassification (ita-Latn)": 43.32,
+ "MassiveIntentClassification (tel-Telu)": 2.72,
+ "MassiveIntentClassification (afr-Latn)": 30.59,
+ "MassiveIntentClassification (isl-Latn)": 25.61,
+ "MassiveIntentClassification (fas-Arab)": 3.71,
+ "MassiveIntentClassification (vie-Latn)": 23.0,
+ "MassiveIntentClassification (ben-Beng)": 3.35,
+ "MassiveIntentClassification (hye-Armn)": 2.8,
+ "MassiveIntentClassification (pol-Latn)": 31.3,
+ "MassiveIntentClassification (cym-Latn)": 26.59,
+ "MassiveIntentClassification (jav-Latn)": 26.84,
+ "MassiveIntentClassification (mon-Cyrl)": 35.97,
+ "MassiveIntentClassification (en)": 60.48,
+ "MassiveIntentClassification (msa-Latn)": 27.82,
+ "MassiveIntentClassification (nob-Latn)": 35.78,
+ "MassiveIntentClassification (heb-Hebr)": 2.33,
+ "MassiveIntentClassification (khm-Khmr)": 4.6,
+ "MassiveIntentClassification (nld-Latn)": 34.66,
+ "MassiveIntentClassification (ind-Latn)": 33.31,
+ "MassiveIntentClassification (mal-Mlym)": 2.63,
+ "MassiveIntentClassification (tam-Taml)": 2.22,
+ "MassiveIntentClassification (mya-Mymr)": 3.57,
+ "MassiveIntentClassification (urd-Arab)": 3.36,
+ "MassiveIntentClassification (dan-Latn)": 38.66,
+ "MassiveIntentClassification (cmo-Hant)": 5.29,
+ "MassiveIntentClassification (ron-Latn)": 37.45,
+ "MassiveIntentClassification (lav-Latn)": 23.92,
+ "MassiveIntentClassification (fra-Latn)": 40.29,
+ "MassiveIntentClassification (ell-Grek)": 11.14,
+ "MassiveIntentClassification (sqi-Latn)": 35.84,
+ "MassiveIntentClassification (hun-Latn)": 26.74,
+ "MassiveIntentClassification (kor-Kore)": 2.69,
+ "MassiveIntentClassification (ara-Arab)": 5.19,
+ "MassiveScenarioClassification (swa-Latn)": 25.61,
+ "MassiveScenarioClassification (aze-Latn)": 24.48,
+ "MassiveScenarioClassification (tur-Latn)": 31.38,
+ "MassiveScenarioClassification (cmo-Hans)": 9.98,
+ "MassiveScenarioClassification (amh-Ethi)": 7.59,
+ "MassiveScenarioClassification (kan-Knda)": 8.73,
+ "MassiveScenarioClassification (hin-Deva)": 8.77,
+ "MassiveScenarioClassification (tgl-Latn)": 35.12,
+ "MassiveScenarioClassification (tha-Thai)": 8.69,
+ "MassiveScenarioClassification (swe-Latn)": 35.83,
+ "MassiveScenarioClassification (deu-Latn)": 41.72,
+ "MassiveScenarioClassification (spa-Latn)": 43.33,
+ "MassiveScenarioClassification (por-Latn)": 44.62,
+ "MassiveScenarioClassification (jpn-Jpan)": 9.51,
+ "MassiveScenarioClassification (fin-Latn)": 33.79,
+ "MassiveScenarioClassification (kat-Geor)": 7.32,
+ "MassiveScenarioClassification (slv-Latn)": 37.6,
+ "MassiveScenarioClassification (rus-Cyrl)": 65.15,
+ "MassiveScenarioClassification (ita-Latn)": 47.28,
+ "MassiveScenarioClassification (tel-Telu)": 7.53,
+ "MassiveScenarioClassification (afr-Latn)": 37.27,
+ "MassiveScenarioClassification (isl-Latn)": 30.32,
+ "MassiveScenarioClassification (fas-Arab)": 6.83,
+ "MassiveScenarioClassification (vie-Latn)": 28.92,
+ "MassiveScenarioClassification (ben-Beng)": 8.57,
+ "MassiveScenarioClassification (hye-Armn)": 8.91,
+ "MassiveScenarioClassification (pol-Latn)": 33.75,
+ "MassiveScenarioClassification (cym-Latn)": 30.38,
+ "MassiveScenarioClassification (jav-Latn)": 33.94,
+ "MassiveScenarioClassification (mon-Cyrl)": 41.53,
+ "MassiveScenarioClassification (en)": 65.43,
+ "MassiveScenarioClassification (msa-Latn)": 36.28,
+ "MassiveScenarioClassification (nob-Latn)": 42.43,
+ "MassiveScenarioClassification (heb-Hebr)": 8.64,
+ "MassiveScenarioClassification (khm-Khmr)": 9.99,
+ "MassiveScenarioClassification (nld-Latn)": 41.47,
+ "MassiveScenarioClassification (ind-Latn)": 39.05,
+ "MassiveScenarioClassification (mal-Mlym)": 7.24,
+ "MassiveScenarioClassification (tam-Taml)": 7.71,
+ "MassiveScenarioClassification (mya-Mymr)": 9.94,
+ "MassiveScenarioClassification (urd-Arab)": 9.16,
+ "MassiveScenarioClassification (dan-Latn)": 44.69,
+ "MassiveScenarioClassification (cmo-Hant)": 10.48,
+ "MassiveScenarioClassification (ron-Latn)": 44.55,
+ "MassiveScenarioClassification (lav-Latn)": 26.26,
+ "MassiveScenarioClassification (fra-Latn)": 45.08,
+ "MassiveScenarioClassification (ell-Grek)": 19.46,
+ "MassiveScenarioClassification (sqi-Latn)": 40.9,
+ "MassiveScenarioClassification (hun-Latn)": 33.92,
+ "MassiveScenarioClassification (kor-Kore)": 7.37,
+ "MassiveScenarioClassification (ara-Arab)": 12.43,
+ "RuReviewsClassification (rus-Cyrl)": 58.01,
+ "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.8,
+ "RuSciBenchOECDClassification (rus-Cyrl)": 40.36
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "LaBSE-en-ru",
+ "GeoreviewClusteringP2P (rus-Cyrl)": 51.89,
+ "MLSUMClusteringP2P (rus-Cyrl)": 37.87,
+ "MLSUMClusteringS2S (rus-Cyrl)": 41.24,
+ "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.48,
+ "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.16
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "LaBSE-en-ru",
+ "OpusparcusPC (rus-Cyrl)": 87.18,
+ "TERRa (rus-Cyrl)": 55.61
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "LaBSE-en-ru",
+ "RuBQReranking (rus-Cyrl)": 54.83
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "LaBSE-en-ru",
+ "RiaNewsRetrieval (rus-Cyrl)": 34.73,
+ "RuBQRetrieval (rus-Cyrl)": 29.03
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "LaBSE-en-ru",
+ "RUParaPhraserSTS (rus-Cyrl)": 65.87,
+ "RuSTSBenchmarkSTS (rus-Cyrl)": 73.32,
+ "STS22 (deu-Latn)": 38.9,
+ "STS22 (en)": 59.47,
+ "STS22 (pol-Latn_eng-Latn)": 58.73,
+ "STS22 (spa-Latn)": 60.85,
+ "STS22 (fra-Latn)": 74.98,
+ "STS22 (deu-Latn_eng-Latn)": 47.98,
+ "STS22 (deu-Latn_fra-Latn)": 59.4,
+ "STS22 (deu-Latn_pol-Latn)": 39.48,
+ "STS22 (pol-Latn)": 32.74,
+ "STS22 (tur-Latn)": 55.04,
+ "STS22 (spa-Latn_eng-Latn)": 70.8,
+ "STS22 (rus-Cyrl)": 58.53,
+ "STS22 (ita-Latn)": 68.58,
+ "STS22 (fra-Latn_pol-Latn)": 61.98,
+ "STS22 (spa-Latn_ita-Latn)": 66.83,
+ "STS22 (cmn-Hans_eng-Latn)": 24.98,
+ "STS22 (ara-Arab)": 31.85,
+ "STS22 (cmn-Hans)": 35.1,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.02
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "LaBSE-en-ru"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "LaBSE-en-ru"
+ }
+ ]
+ }
+ },
+ "text2vec-base-chinese": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "text2vec-base-chinese"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "text2vec-base-chinese",
+ "AmazonReviewsClassification (zh)": 34.12,
+ "IFlyTek": 42.05,
+ "JDReview": 82.14,
+ "MassiveIntentClassification (zh-CN)": 63.98,
+ "MassiveScenarioClassification (zh-CN)": 70.52,
+ "MultilingualSentiment": 60.98,
+ "OnlineShopping": 85.69,
+ "TNews": 43.01,
+ "Waimai": 77.22
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "text2vec-base-chinese",
+ "CLSClusteringP2P": 35.27,
+ "CLSClusteringS2S": 32.42,
+ "ThuNewsClusteringP2P": 42.92,
+ "ThuNewsClusteringS2S": 40.01
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "text2vec-base-chinese",
+ "Cmnli": 73.87,
+ "Ocnli": 60.95
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "text2vec-base-chinese",
+ "CMedQAv1": 59.26,
+ "CMedQAv2": 59.82,
+ "MMarcoReranking": 12.76,
+ "T2Reranking": 65.95
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "text2vec-base-chinese",
+ "CmedqaRetrieval": 15.91,
+ "CovidRetrieval": 44.81,
+ "DuRetrieval": 52.23,
+ "EcomRetrieval": 34.6,
+ "MMarcoRetrieval": 44.06,
+ "MedicalRetrieval": 27.56,
+ "T2Retrieval": 51.67,
+ "VideoRetrieval": 39.52
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "text2vec-base-chinese",
+ "AFQMC": 26.06,
+ "ATEC": 31.93,
+ "BQ": 42.67,
+ "LCQMC": 70.16,
+ "PAWSX": 17.21,
+ "QBQTC": 24.62,
+ "STS22 (zh)": 55.35,
+ "STSB": 79.3
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "text2vec-base-chinese"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "text2vec-base-chinese"
+ }
+ ]
+ }
+ },
+ "herbert-base-retrieval-v2": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "herbert-base-retrieval-v2"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "herbert-base-retrieval-v2",
+ "AllegroReviews": 34.11,
+ "CBD": 68.35,
+ "MassiveIntentClassification (pl)": 65.53,
+ "MassiveScenarioClassification (pl)": 68.51,
+ "PAC": 68.4,
+ "PolEmo2.0-IN": 64.18,
+ "PolEmo2.0-OUT": 45.73
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "herbert-base-retrieval-v2",
+ "8TagsClustering": 28.15
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "herbert-base-retrieval-v2",
+ "CDSC-E": 63.31,
+ "PPC": 84.18,
+ "PSC": 98.87,
+ "SICK-E-PL": 54.93
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "herbert-base-retrieval-v2"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "herbert-base-retrieval-v2",
+ "ArguAna-PL": 41.97,
+ "DBPedia-PL": 24.07,
+ "FiQA-PL": 24.25,
+ "HotpotQA-PL": 43.41,
+ "MSMARCO-PL": 51.56,
+ "NFCorpus-PL": 25.95,
+ "NQ-PL": 35.09,
+ "Quora-PL": 78.86,
+ "SCIDOCS-PL": 11.0,
+ "SciFact-PL": 51.92,
+ "TRECCOVID-PL": 42.64
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "herbert-base-retrieval-v2",
+ "CDSC-R": 86.18,
+ "SICK-R-PL": 64.67,
+ "STS22 (pl)": 39.73
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "herbert-base-retrieval-v2"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "herbert-base-retrieval-v2"
+ }
+ ]
+ }
+ },
+ "voyage-law-2": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "voyage-law-2"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "all-mpnet-base-v2",
- "AllegroReviews (pol-Latn)": 22.99,
- "AmazonCounterfactualClassification (en-ext)": 67.5,
- "AmazonCounterfactualClassification (en)": 65.03,
- "AmazonCounterfactualClassification (deu-Latn)": 55.66,
- "AmazonCounterfactualClassification (jpn-Jpan)": 60.69,
- "AmazonPolarityClassification": 67.14,
- "AmazonReviewsClassification (en)": 31.44,
- "AmazonReviewsClassification (deu-Latn)": 26.05,
- "AmazonReviewsClassification (spa-Latn)": 27.73,
- "AmazonReviewsClassification (fra-Latn)": 28.49,
- "AmazonReviewsClassification (jpn-Jpan)": 23.65,
- "AmazonReviewsClassification (cmn-Hans)": 23.62,
- "AngryTweetsClassification (dan-Latn)": 44.13,
- "Banking77Classification": 81.7,
- "CBD (pol-Latn)": 50.25,
- "DanishPoliticalCommentsClassification (dan-Latn)": 28.31,
- "EmotionClassification": 42.22,
- "GeoreviewClassification (rus-Cyrl)": 25.93,
- "HeadlineClassification (rus-Cyrl)": 28.53,
- "IFlyTek (cmn-Hans)": 17.18,
- "ImdbClassification": 71.17,
- "InappropriatenessClassification (rus-Cyrl)": 51.82,
- "JDReview (cmn-Hans)": 60.19,
- "KinopoiskClassification (rus-Cyrl)": 34.18,
- "LccSentimentClassification (dan-Latn)": 39.27,
- "MTOPDomainClassification (en)": 91.89,
- "MTOPDomainClassification (deu-Latn)": 71.86,
- "MTOPDomainClassification (spa-Latn)": 71.3,
- "MTOPDomainClassification (fra-Latn)": 74.88,
- "MTOPDomainClassification (hin-Deva)": 39.93,
- "MTOPDomainClassification (tha-Thai)": 17.54,
- "MTOPIntentClassification (en)": 68.27,
- "MTOPIntentClassification (deu-Latn)": 44.36,
- "MTOPIntentClassification (spa-Latn)": 39.48,
- "MTOPIntentClassification (fra-Latn)": 37.57,
- "MTOPIntentClassification (hin-Deva)": 18.63,
- "MTOPIntentClassification (tha-Thai)": 5.42,
- "MasakhaNEWSClassification (amh-Ethi)": 36.49,
- "MasakhaNEWSClassification (eng)": 79.75,
- "MasakhaNEWSClassification (fra-Latn)": 77.77,
- "MasakhaNEWSClassification (hau-Latn)": 59.22,
- "MasakhaNEWSClassification (ibo-Latn)": 61.64,
- "MasakhaNEWSClassification (lin-Latn)": 74.0,
- "MasakhaNEWSClassification (lug-Latn)": 58.43,
- "MasakhaNEWSClassification (orm-Ethi)": 48.15,
- "MasakhaNEWSClassification (pcm-Latn)": 92.2,
- "MasakhaNEWSClassification (run-Latn)": 64.72,
- "MasakhaNEWSClassification (sna-Latn)": 73.69,
- "MasakhaNEWSClassification (som-Latn)": 49.97,
- "MasakhaNEWSClassification (swa-Latn)": 55.15,
- "MasakhaNEWSClassification (tir-Ethi)": 27.46,
- "MasakhaNEWSClassification (xho-Latn)": 60.98,
- "MasakhaNEWSClassification (yor-Latn)": 63.33,
- "MassiveIntentClassification (en)": 69.76,
- "MassiveIntentClassification (jav-Latn)": 31.75,
- "MassiveIntentClassification (fra-Latn)": 44.27,
- "MassiveIntentClassification (msa-Latn)": 30.53,
- "MassiveIntentClassification (hun-Latn)": 34.38,
- "MassiveIntentClassification (pol-Latn)": 34.26,
- "MassiveIntentClassification (nld-Latn)": 38.49,
- "MassiveIntentClassification (tha-Thai)": 8.51,
- "MassiveIntentClassification (tur-Latn)": 32.02,
- "MassiveIntentClassification (tam-Taml)": 9.25,
- "MassiveIntentClassification (hye-Armn)": 10.11,
- "MassiveIntentClassification (khm-Khmr)": 4.74,
- "MassiveIntentClassification (lav-Latn)": 35.08,
- "MassiveIntentClassification (deu-Latn)": 44.54,
- "MassiveIntentClassification (spa-Latn)": 39.75,
- "MassiveIntentClassification (ben-Beng)": 12.35,
- "MassiveIntentClassification (por-Latn)": 42.83,
- "MassiveIntentClassification (ara-Arab)": 20.42,
- "MassiveIntentClassification (cym-Latn)": 30.82,
- "MassiveIntentClassification (dan-Latn)": 42.36,
- "MassiveIntentClassification (mya-Mymr)": 4.6,
- "MassiveIntentClassification (heb-Hebr)": 23.6,
- "MassiveIntentClassification (kan-Knda)": 3.76,
- "MassiveIntentClassification (swa-Latn)": 31.82,
- "MassiveIntentClassification (fas-Arab)": 22.45,
- "MassiveIntentClassification (hin-Deva)": 17.68,
- "MassiveIntentClassification (kat-Geor)": 7.66,
- "MassiveIntentClassification (mal-Mlym)": 2.64,
- "MassiveIntentClassification (fin-Latn)": 34.58,
- "MassiveIntentClassification (slv-Latn)": 34.49,
- "MassiveIntentClassification (afr-Latn)": 36.49,
- "MassiveIntentClassification (urd-Arab)": 12.86,
- "MassiveIntentClassification (ron-Latn)": 38.07,
- "MassiveIntentClassification (sqi-Latn)": 37.26,
- "MassiveIntentClassification (cmo-Hant)": 22.43,
- "MassiveIntentClassification (ita-Latn)": 40.29,
- "MassiveIntentClassification (ind-Latn)": 36.31,
- "MassiveIntentClassification (nob-Latn)": 39.3,
- "MassiveIntentClassification (jpn-Jpan)": 33.13,
- "MassiveIntentClassification (aze-Latn)": 28.92,
- "MassiveIntentClassification (mon-Cyrl)": 19.65,
- "MassiveIntentClassification (ell-Grek)": 24.52,
- "MassiveIntentClassification (rus-Cyrl)": 23.98,
- "MassiveIntentClassification (kor-Kore)": 13.35,
- "MassiveIntentClassification (cmo-Hans)": 24.36,
- "MassiveIntentClassification (isl-Latn)": 31.46,
- "MassiveIntentClassification (swe-Latn)": 39.02,
- "MassiveIntentClassification (tel-Telu)": 2.26,
- "MassiveIntentClassification (vie-Latn)": 31.47,
- "MassiveIntentClassification (tgl-Latn)": 36.33,
- "MassiveIntentClassification (amh-Ethi)": 2.39,
- "MassiveScenarioClassification (en)": 75.67,
- "MassiveScenarioClassification (tur-Latn)": 39.11,
- "MassiveScenarioClassification (kat-Geor)": 13.45,
- "MassiveScenarioClassification (jpn-Jpan)": 40.57,
- "MassiveScenarioClassification (spa-Latn)": 50.92,
- "MassiveScenarioClassification (fas-Arab)": 27.8,
- "MassiveScenarioClassification (hun-Latn)": 41.01,
- "MassiveScenarioClassification (jav-Latn)": 40.0,
- "MassiveScenarioClassification (por-Latn)": 52.06,
- "MassiveScenarioClassification (sqi-Latn)": 44.67,
- "MassiveScenarioClassification (lav-Latn)": 39.28,
- "MassiveScenarioClassification (deu-Latn)": 54.09,
- "MassiveScenarioClassification (nld-Latn)": 47.79,
- "MassiveScenarioClassification (mon-Cyrl)": 25.58,
- "MassiveScenarioClassification (swa-Latn)": 40.34,
- "MassiveScenarioClassification (ben-Beng)": 17.49,
- "MassiveScenarioClassification (cym-Latn)": 34.82,
- "MassiveScenarioClassification (swe-Latn)": 44.53,
- "MassiveScenarioClassification (rus-Cyrl)": 28.71,
- "MassiveScenarioClassification (fra-Latn)": 54.26,
- "MassiveScenarioClassification (dan-Latn)": 49.45,
- "MassiveScenarioClassification (mya-Mymr)": 10.8,
- "MassiveScenarioClassification (ron-Latn)": 47.86,
- "MassiveScenarioClassification (cmo-Hans)": 35.33,
- "MassiveScenarioClassification (hin-Deva)": 23.13,
- "MassiveScenarioClassification (cmo-Hant)": 31.7,
- "MassiveScenarioClassification (afr-Latn)": 43.63,
- "MassiveScenarioClassification (aze-Latn)": 36.42,
- "MassiveScenarioClassification (msa-Latn)": 37.28,
- "MassiveScenarioClassification (ell-Grek)": 33.85,
- "MassiveScenarioClassification (isl-Latn)": 39.36,
- "MassiveScenarioClassification (fin-Latn)": 38.41,
- "MassiveScenarioClassification (ind-Latn)": 43.05,
- "MassiveScenarioClassification (pol-Latn)": 42.66,
- "MassiveScenarioClassification (tam-Taml)": 14.55,
- "MassiveScenarioClassification (ita-Latn)": 51.37,
- "MassiveScenarioClassification (urd-Arab)": 20.0,
- "MassiveScenarioClassification (kan-Knda)": 8.34,
- "MassiveScenarioClassification (tel-Telu)": 7.81,
- "MassiveScenarioClassification (mal-Mlym)": 7.69,
- "MassiveScenarioClassification (ara-Arab)": 27.8,
- "MassiveScenarioClassification (kor-Kore)": 17.28,
- "MassiveScenarioClassification (vie-Latn)": 35.9,
- "MassiveScenarioClassification (amh-Ethi)": 7.43,
- "MassiveScenarioClassification (heb-Hebr)": 25.49,
- "MassiveScenarioClassification (hye-Armn)": 16.86,
- "MassiveScenarioClassification (khm-Khmr)": 9.63,
- "MassiveScenarioClassification (slv-Latn)": 39.88,
- "MassiveScenarioClassification (tgl-Latn)": 47.04,
- "MassiveScenarioClassification (nob-Latn)": 45.75,
- "MassiveScenarioClassification (tha-Thai)": 17.01,
- "MultilingualSentiment (cmn-Hans)": 41.2,
- "NoRecClassification (nob-Latn)": 38.34,
- "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15,
- "OnlineShopping (cmn-Hans)": 56.94,
- "PAC (pol-Latn)": 62.1,
- "PolEmo2.0-IN (pol-Latn)": 41.63,
- "PolEmo2.0-OUT (pol-Latn)": 25.0,
- "RuReviewsClassification (rus-Cyrl)": 42.33,
- "RuSciBenchGRNTIClassification (rus-Cyrl)": 13.29,
- "RuSciBenchOECDClassification (rus-Cyrl)": 10.62,
- "TNews (cmn-Hans)": 21.05,
- "ToxicConversationsClassification": 61.05,
- "TweetSentimentExtractionClassification": 55.05,
- "Waimai (cmn-Hans)": 63.31
+ "Model": "voyage-law-2",
+ "AmazonReviewsClassification (fr)": 41.98,
+ "MTOPDomainClassification (fr)": 90.12,
+ "MTOPIntentClassification (fr)": 62.44,
+ "MasakhaNEWSClassification (fra)": 76.42,
+ "MassiveIntentClassification (fr)": 66.94,
+ "MassiveScenarioClassification (fr)": 72.78
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "all-mpnet-base-v2",
- "ArxivClusteringP2P": 48.38,
- "ArxivClusteringS2S": 39.72,
- "BiorxivClusteringP2P": 39.62,
- "BiorxivClusteringS2S": 35.02,
- "GeoreviewClusteringP2P (rus-Cyrl)": 20.33,
- "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49,
- "MasakhaNEWSClusteringP2P (eng)": 67.24,
- "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99,
- "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17,
- "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0,
- "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68,
- "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96,
- "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42,
- "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01,
- "MasakhaNEWSClusteringP2P (run-Latn)": 57.6,
- "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99,
- "MasakhaNEWSClusteringP2P (som-Latn)": 31.16,
- "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29,
- "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85,
- "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24,
- "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15,
- "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48,
- "MasakhaNEWSClusteringS2S (eng)": 35.69,
- "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05,
- "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64,
- "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63,
- "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72,
- "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97,
- "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85,
- "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7,
- "MasakhaNEWSClusteringS2S (run-Latn)": 52.27,
- "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64,
- "MasakhaNEWSClusteringS2S (som-Latn)": 30.94,
- "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12,
- "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01,
- "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16,
- "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04,
- "MedrxivClusteringP2P": 35.58,
- "MedrxivClusteringS2S": 32.87,
- "RedditClustering": 54.82,
- "RedditClusteringP2P": 56.77,
- "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 14.66,
- "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 12.49,
- "StackExchangeClustering": 53.8,
- "StackExchangeClusteringP2P": 34.28,
- "TwentyNewsgroupsClustering": 49.74
+ "Model": "voyage-law-2",
+ "AlloProfClusteringP2P": 62.5,
+ "AlloProfClusteringS2S": 44.28,
+ "HALClusteringS2S": 26.36,
+ "MLSUMClusteringP2P (fr)": 44.03,
+ "MLSUMClusteringS2S (fr)": 42.95,
+ "MasakhaNEWSClusteringP2P (fra)": 50.68,
+ "MasakhaNEWSClusteringS2S (fra)": 38.79
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "all-mpnet-base-v2",
- "CDSC-E (pol-Latn)": 45.37,
- "OpusparcusPC (deu-Latn)": 89.78,
- "OpusparcusPC (en)": 97.75,
- "OpusparcusPC (fin-Latn)": 85.82,
- "OpusparcusPC (fra-Latn)": 86.61,
- "OpusparcusPC (rus-Cyrl)": 79.85,
- "OpusparcusPC (swe-Latn)": 81.81,
- "PSC (pol-Latn)": 83.28,
- "PawsXPairClassification (deu-Latn)": 52.17,
- "PawsXPairClassification (en)": 61.99,
- "PawsXPairClassification (spa-Latn)": 55.06,
- "PawsXPairClassification (fra-Latn)": 56.42,
- "PawsXPairClassification (jpn-Hira)": 47.43,
- "PawsXPairClassification (kor-Hang)": 49.75,
- "PawsXPairClassification (cmn-Hans)": 52.47,
- "SICK-E-PL (pol-Latn)": 46.51,
- "SprintDuplicateQuestions": 90.15,
- "TERRa (rus-Cyrl)": 44.52,
- "TwitterSemEval2015": 73.85,
- "TwitterURLCorpus": 85.11
+ "Model": "voyage-law-2",
+ "OpusparcusPC (fr)": 93.06,
+ "PawsXPairClassification (fr)": 61.54
}
]
},
"Reranking": {
"map": [
{
- "Model": "all-mpnet-base-v2",
- "AlloprofReranking (fra-Latn)": 69.63,
- "AskUbuntuDupQuestions": 65.85,
- "MMarcoReranking (cmn-Hans)": 4.65,
- "MindSmallReranking": 30.97,
- "RuBQReranking (rus-Cyrl)": 30.96,
- "SciDocsRR": 88.65,
- "StackOverflowDupQuestions": 51.98,
- "SyntecReranking (fra-Latn)": 66.12,
- "T2Reranking (cmn-Hans)": 58.3
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "all-mpnet-base-v2",
- "AILACasedocs": 22.51,
- "AILAStatutes": 21.27,
- "ARCChallenge": 11.8,
- "AlloprofRetrieval (fra-Latn)": 34.27,
- "AlphaNLI": 22.41,
- "ArguAna": 46.52,
- "ArguAna-PL (pol-Latn)": 14.72,
- "BSARDRetrieval (fra-Latn)": 6.98,
- "BrightRetrieval (robotics)": 8.36,
- "BrightRetrieval (psychology)": 22.63,
- "BrightRetrieval (leetcode)": 26.4,
- "BrightRetrieval (biology)": 15.52,
- "BrightRetrieval (theoremqa_questions)": 18.49,
- "BrightRetrieval (economics)": 16.64,
- "BrightRetrieval (stackoverflow)": 9.48,
- "BrightRetrieval (pony)": 6.95,
- "BrightRetrieval (earth_science)": 20.11,
- "BrightRetrieval (theoremqa_theorems)": 12.38,
- "BrightRetrieval (sustainable_living)": 15.34,
- "BrightRetrieval (aops)": 5.32,
- "CQADupstackRetrieval": 44.96,
- "ClimateFEVER": 21.97,
- "CmedqaRetrieval (cmn-Hans)": 2.0,
- "CovidRetrieval (cmn-Hans)": 3.7,
- "DBPedia": 32.09,
- "DuRetrieval (cmn-Hans)": 4.92,
- "EcomRetrieval (cmn-Hans)": 3.94,
- "FEVER": 50.86,
- "FiQA-PL (pol-Latn)": 3.6,
- "FiQA2018": 49.96,
- "GerDaLIRSmall (deu-Latn)": 3.78,
- "HellaSwag": 26.27,
- "HotpotQA": 39.29,
- "LEMBNarrativeQARetrieval": 19.34,
- "LEMBNeedleRetrieval": 16.0,
- "LEMBPasskeyRetrieval": 24.5,
- "LEMBQMSumRetrieval": 21.54,
- "LEMBSummScreenFDRetrieval": 60.43,
- "LEMBWikimQARetrieval": 44.92,
- "LeCaRDv2 (zho-Hans)": 18.09,
- "LegalBenchConsumerContractsQA": 75.25,
- "LegalBenchCorporateLobbying": 89.04,
- "LegalQuAD (deu-Latn)": 10.67,
- "LegalSummarization": 58.55,
- "MMarcoRetrieval (cmn-Hans)": 7.13,
- "MSMARCO": 39.75,
- "MedicalRetrieval (cmn-Hans)": 1.71,
- "MintakaRetrieval (ara-Arab)": 1.97,
- "MintakaRetrieval (deu-Latn)": 17.21,
- "MintakaRetrieval (spa-Latn)": 10.11,
- "MintakaRetrieval (fra-Latn)": 12.93,
- "MintakaRetrieval (hin-Deva)": 2.05,
- "MintakaRetrieval (ita-Latn)": 5.63,
- "MintakaRetrieval (jpn-Hira)": 6.72,
- "MintakaRetrieval (por-Latn)": 8.05,
- "NFCorpus": 33.29,
- "NFCorpus-PL (pol-Latn)": 8.77,
- "NQ": 50.45,
- "PIQA": 29.03,
- "Quail": 3.41,
- "QuoraRetrieval": 87.46,
- "RARbCode": 53.21,
- "RARbMath": 71.85,
- "RuBQRetrieval (rus-Cyrl)": 4.75,
- "SCIDOCS": 23.76,
- "SCIDOCS-PL (pol-Latn)": 4.02,
- "SIQA": 2.38,
- "SciFact": 65.57,
- "SciFact-PL (pol-Latn)": 13.31,
- "SpartQA": 0.22,
- "SyntecRetrieval (fra-Latn)": 57.39,
- "T2Retrieval (cmn-Hans)": 2.98,
- "TRECCOVID": 51.33,
- "TRECCOVID-PL (pol-Latn)": 12.12,
- "TempReasonL1": 1.77,
- "TempReasonL2Fact": 11.2,
- "TempReasonL2Pure": 1.15,
- "TempReasonL3Fact": 9.42,
- "TempReasonL3Pure": 5.59,
- "Touche2020": 19.93,
- "VideoRetrieval (cmn-Hans)": 8.48,
- "WinoGrande": 20.8,
- "XPQARetrieval (ara-Arab_ara-Arab)": 9.42,
- "XPQARetrieval (eng-Latn_ara-Arab)": 2.39,
- "XPQARetrieval (ara-Arab_eng-Latn)": 8.98,
- "XPQARetrieval (deu-Latn_deu-Latn)": 55.82,
- "XPQARetrieval (eng-Latn_deu-Latn)": 11.74,
- "XPQARetrieval (deu-Latn_eng-Latn)": 30.44,
- "XPQARetrieval (spa-Latn_spa-Latn)": 40.01,
- "XPQARetrieval (eng-Latn_spa-Latn)": 6.12,
- "XPQARetrieval (spa-Latn_eng-Latn)": 29.44,
- "XPQARetrieval (fra-Latn_fra-Latn)": 51.94,
- "XPQARetrieval (eng-Latn_fra-Latn)": 11.48,
- "XPQARetrieval (fra-Latn_eng-Latn)": 32.52,
- "XPQARetrieval (hin-Deva_hin-Deva)": 37.48,
- "XPQARetrieval (eng-Latn_hin-Deva)": 5.11,
- "XPQARetrieval (hin-Deva_eng-Latn)": 7.37,
- "XPQARetrieval (ita-Latn_ita-Latn)": 54.2,
- "XPQARetrieval (eng-Latn_ita-Latn)": 6.08,
- "XPQARetrieval (ita-Latn_eng-Latn)": 30.32,
- "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.45,
- "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79,
- "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77,
- "XPQARetrieval (kor-Hang_kor-Hang)": 10.4,
- "XPQARetrieval (eng-Latn_kor-Hang)": 7.09,
- "XPQARetrieval (kor-Hang_eng-Latn)": 6.95,
- "XPQARetrieval (pol-Latn_pol-Latn)": 23.67,
- "XPQARetrieval (eng-Latn_pol-Latn)": 8.83,
- "XPQARetrieval (pol-Latn_eng-Latn)": 15.94,
- "XPQARetrieval (por-Latn_por-Latn)": 33.56,
- "XPQARetrieval (eng-Latn_por-Latn)": 3.76,
- "XPQARetrieval (por-Latn_eng-Latn)": 23.45,
- "XPQARetrieval (tam-Taml_tam-Taml)": 5.53,
- "XPQARetrieval (eng-Latn_tam-Taml)": 3.3,
- "XPQARetrieval (tam-Taml_eng-Latn)": 4.0,
- "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.84,
- "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2,
- "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84
+ "Model": "voyage-law-2",
+ "AlloprofReranking": 72.92,
+ "SyntecReranking": 91.2
}
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "all-mpnet-base-v2",
- "AFQMC (cmn-Hans)": 8.01,
- "ATEC (cmn-Hans)": 14.03,
- "BIOSSES": 80.43,
- "BQ (cmn-Hans)": 21.39,
- "CDSC-R (pol-Latn)": 77.04,
- "LCQMC (cmn-Hans)": 22.84,
- "PAWSX (cmn-Hans)": 6.44,
- "RUParaPhraserSTS (rus-Cyrl)": 42.15,
- "RuSTSBenchmarkSTS (rus-Cyrl)": 55.68,
- "SICK-R": 80.59,
- "SICK-R-PL (pol-Latn)": 50.2,
- "SICKFr (fra-Latn)": 67.05,
- "STS12": 72.63,
- "STS13": 83.48,
- "STS14": 78.0,
- "STS15": 85.66,
- "STS16": 80.03,
- "STS17 (en-en)": 90.6,
- "STS17 (eng-Latn_ara-Arab)": 6.76,
- "STS17 (fra-Latn_eng-Latn)": 41.64,
- "STS17 (eng-Latn_tur-Latn)": -4.58,
- "STS17 (eng-Latn_deu-Latn)": 35.5,
- "STS17 (spa-Latn_eng-Latn)": 25.28,
- "STS17 (ita-Latn_eng-Latn)": 31.8,
- "STS17 (spa-Latn)": 78.4,
- "STS17 (kor-Hang)": 39.11,
- "STS17 (ara-Arab)": 55.42,
- "STS17 (nld-Latn_eng-Latn)": 32.89,
- "STS22 (en)": 68.39,
- "STS22 (spa-Latn_eng-Latn)": 55.09,
- "STS22 (deu-Latn_pol-Latn)": 23.53,
- "STS22 (cmn-Hans_eng-Latn)": 40.47,
- "STS22 (pol-Latn)": 24.21,
- "STS22 (tur-Latn)": 29.35,
- "STS22 (spa-Latn_ita-Latn)": 41.61,
- "STS22 (fra-Latn_pol-Latn)": 73.25,
- "STS22 (rus-Cyrl)": 15.83,
- "STS22 (deu-Latn)": 27.0,
- "STS22 (spa-Latn)": 55.98,
- "STS22 (pol-Latn_eng-Latn)": 51.07,
- "STS22 (fra-Latn)": 77.1,
- "STS22 (deu-Latn_eng-Latn)": 49.73,
- "STS22 (ara-Arab)": 38.96,
- "STS22 (deu-Latn_fra-Latn)": 31.39,
- "STS22 (ita-Latn)": 58.02,
- "STS22 (cmn-Hans)": 42.24,
- "STSB (cmn-Hans)": 37.7,
- "STSBenchmark": 83.42,
- "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01,
- "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54,
- "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15,
- "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72,
- "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78,
- "STSBenchmarkMultilingualSTS (en)": 83.42,
- "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43,
- "STSBenchmarkMultilingualSTS (por-Latn)": 62.12,
- "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43,
- "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "voyage-law-2",
+ "AILACasedocs": 44.56,
+ "AILAStatutes": 45.51,
+ "AlloprofRetrieval": 57.28,
+ "BSARDRetrieval": 11.83,
+ "GerDaLIRSmall": 44.91,
+ "LEMBNarrativeQARetrieval": 55.78,
+ "LEMBNeedleRetrieval": 80.5,
+ "LEMBPasskeyRetrieval": 93.75,
+ "LEMBQMSumRetrieval": 57.26,
+ "LEMBSummScreenFDRetrieval": 98.72,
+ "LEMBWikimQARetrieval": 87.08,
+ "LeCaRDv2": 72.75,
+ "LegalBenchConsumerContractsQA": 83.27,
+ "LegalBenchCorporateLobbying": 95.66,
+ "LegalQuAD": 67.47,
+ "LegalSummarization": 68.96,
+ "MintakaRetrieval (fr)": 34.92,
+ "SyntecRetrieval": 87.33,
+ "XPQARetrieval (fr)": 73.56
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "voyage-law-2",
+ "SICKFr": 74.09,
+ "STS22 (fr)": 83.75,
+ "STSBenchmarkMultilingualSTS (fr)": 83.02
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "all-mpnet-base-v2",
- "SummEval": 27.49,
- "SummEvalFr (fra-Latn)": 28.11
+ "Model": "voyage-law-2",
+ "SummEvalFr": 30.34
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "all-mpnet-base-v2"
+ "Model": "voyage-law-2"
}
]
}
},
- "e5-base-v2": {
+ "bert-base-multilingual-cased": {
"BitextMining": {
"f1": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "AmazonReviewsClassification (fr)": 29.39,
+ "MTOPDomainClassification (fr)": 63.61,
+ "MTOPIntentClassification (fr)": 37.84,
+ "MasakhaNEWSClassification (fra)": 64.0,
+ "MassiveIntentClassification (fr)": 37.3,
+ "MassiveScenarioClassification (fr)": 44.47
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "e5-base-v2",
- "BiorxivClusteringP2P": 37.12,
- "BiorxivClusteringS2S": 33.41,
- "MedrxivClusteringP2P": 31.82,
- "MedrxivClusteringS2S": 29.68,
- "RedditClustering": 56.54,
- "RedditClusteringP2P": 63.23,
- "StackExchangeClustering": 64.6,
- "StackExchangeClusteringP2P": 33.02,
- "TwentyNewsgroupsClustering": 49.86
+ "Model": "bert-base-multilingual-cased",
+ "AlloProfClusteringP2P": 51.5,
+ "AlloProfClusteringS2S": 43.06,
+ "HALClusteringS2S": 20.81,
+ "MLSUMClusteringP2P": 40.9,
+ "MLSUMClusteringS2S": 31.8,
+ "MasakhaNEWSClusteringP2P (fra)": 24.23,
+ "MasakhaNEWSClusteringS2S (fra)": 24.46
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "OpusparcusPC (fr)": 86.77,
+ "PawsXPairClassification (fr)": 53.39
}
]
},
"Reranking": {
"map": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "AlloprofReranking": 36.23,
+ "SyntecReranking": 53.25
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "AlloprofRetrieval": 1.63,
+ "BSARDRetrieval": 0.0,
+ "MintakaRetrieval (fr)": 3.55,
+ "SyntecRetrieval": 18.95,
+ "XPQARetrieval (fr)": 18.49
}
]
},
"STS": {
"spearman": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "SICKFr": 58.75,
+ "STS22 (fr)": 39.05,
+ "STSBenchmarkMultilingualSTS (fr)": 52.25
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "e5-base-v2"
+ "Model": "bert-base-multilingual-cased",
+ "SummEvalFr": 28.81
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "e5-base-v2",
- "Core17InstructionRetrieval": -2.9,
- "News21InstructionRetrieval": -2.0,
- "Robust04InstructionRetrieval": -6.73
+ "Model": "bert-base-multilingual-cased"
}
]
}
},
- "text-search-babbage-001": {
+ "LLM2Vec-Meta-Llama-3-unsupervised": {
"BitextMining": {
"f1": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "AmazonCounterfactualClassification (en)": 75.7,
+ "AmazonPolarityClassification": 80.68,
+ "AmazonReviewsClassification (en)": 40.0,
+ "Banking77Classification": 84.77,
+ "EmotionClassification": 47.08,
+ "ImdbClassification": 75.19,
+ "MTOPDomainClassification (en)": 94.47,
+ "MTOPIntentClassification (en)": 81.09,
+ "MassiveIntentClassification (en)": 75.01,
+ "MassiveScenarioClassification (en)": 79.16,
+ "ToxicConversationsClassification": 71.85,
+ "TweetSentimentExtractionClassification": 57.61
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "ArxivClusteringP2P": 49.22,
+ "ArxivClusteringS2S": 41.71,
+ "BiorxivClusteringP2P": 38.39,
+ "BiorxivClusteringS2S": 31.31,
+ "MedrxivClusteringP2P": 31.47,
+ "MedrxivClusteringS2S": 27.87,
+ "RedditClustering": 43.67,
+ "RedditClusteringP2P": 61.67,
+ "StackExchangeClustering": 68.2,
+ "StackExchangeClusteringP2P": 36.36,
+ "TwentyNewsgroupsClustering": 32.01
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "SprintDuplicateQuestions": 88.14,
+ "TwitterSemEval2015": 66.6,
+ "TwitterURLCorpus": 79.3
}
]
},
"Reranking": {
"map": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "AskUbuntuDupQuestions": 57.16,
+ "MindSmallReranking": 30.1,
+ "SciDocsRR": 76.28,
+ "StackOverflowDupQuestions": 48.82
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "text-search-babbage-001",
- "ArguAna": 49.2,
- "ClimateFEVER": 19.9,
- "FEVER": 77.0,
- "FiQA2018": 42.2,
- "HotpotQA": 63.1,
- "NFCorpus": 36.7,
- "QuoraRetrieval": 69.7,
- "SciFact": 70.4,
- "TRECCOVID": 58.5,
- "Touche2020": 29.7
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "ArguAna": 51.73,
+ "CQADupstackRetrieval": 32.4,
+ "ClimateFEVER": 23.58,
+ "DBPedia": 26.78,
+ "FEVER": 53.42,
+ "FiQA2018": 28.56,
+ "HotpotQA": 52.37,
+ "MSMARCO": 17.47,
+ "NFCorpus": 26.28,
+ "NQ": 37.65,
+ "QuoraRetrieval": 84.64,
+ "SCIDOCS": 10.39,
+ "SciFact": 66.36,
+ "TRECCOVID": 63.34,
+ "Touche2020": 12.82
}
]
},
"STS": {
"spearman": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "BIOSSES": 84.67,
+ "SICK-R": 72.16,
+ "STS12": 61.6,
+ "STS13": 79.71,
+ "STS14": 72.11,
+ "STS15": 82.18,
+ "STS16": 79.41,
+ "STS17 (en-en)": 85.44,
+ "STS22 (en)": 63.9,
+ "STSBenchmark": 77.44
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
+ "SummEval": 31.45
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "text-search-babbage-001"
+ "Model": "LLM2Vec-Meta-Llama-3-unsupervised"
}
]
}
},
- "all-mpnet-base-v2-instruct": {
+ "text-search-davinci-001": {
"BitextMining": {
"f1": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"Reranking": {
"map": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "all-mpnet-base-v2-instruct",
- "ARCChallenge": 10.35,
- "AlphaNLI": 1.96,
- "HellaSwag": 13.01,
- "PIQA": 27.18,
- "Quail": 3.02,
- "RARbCode": 48.95,
- "RARbMath": 69.21,
- "SIQA": 1.29,
- "SpartQA": 1.01,
- "TempReasonL1": 1.52,
- "TempReasonL2Fact": 7.28,
- "TempReasonL2Pure": 1.03,
- "TempReasonL3Fact": 7.03,
- "TempReasonL3Pure": 5.16,
- "WinoGrande": 9.66
+ "Model": "text-search-davinci-001",
+ "ArguAna": 43.5,
+ "ClimateFEVER": 22.3,
+ "FEVER": 77.5,
+ "FiQA2018": 51.2,
+ "HotpotQA": 68.8,
+ "NFCorpus": 40.7,
+ "QuoraRetrieval": 63.8,
+ "SciFact": 75.4,
+ "TRECCOVID": 64.9,
+ "Touche2020": 29.1
}
]
},
"STS": {
"spearman": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "all-mpnet-base-v2-instruct"
+ "Model": "text-search-davinci-001"
}
]
}
},
- "bge-small-en-v1.5-instruct": {
+ "m3e-base": {
"BitextMining": {
"f1": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base",
+ "AmazonReviewsClassification (zh)": 43.02,
+ "IFlyTek": 44.42,
+ "JDReview": 85.33,
+ "MassiveIntentClassification (zh-CN)": 68.4,
+ "MassiveScenarioClassification (zh-CN)": 74.6,
+ "MultilingualSentiment": 71.9,
+ "OnlineShopping": 87.77,
+ "TNews": 48.28,
+ "Waimai": 83.99
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base",
+ "CLSClusteringP2P": 39.81,
+ "CLSClusteringS2S": 37.34,
+ "ThuNewsClusteringP2P": 59.77,
+ "ThuNewsClusteringS2S": 53.78
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base",
+ "Cmnli": 69.98,
+ "Ocnli": 58.0
}
]
},
"Reranking": {
"map": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base",
+ "CMedQAv1": 77.05,
+ "CMedQAv2": 76.76,
+ "MMarcoReranking": 17.51,
+ "T2Reranking": 66.03
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bge-small-en-v1.5-instruct",
- "ARCChallenge": 7.72,
- "AlphaNLI": 1.26,
- "HellaSwag": 23.41,
- "PIQA": 20.79,
- "Quail": 2.01,
- "RARbCode": 41.52,
- "RARbMath": 46.5,
- "SIQA": 0.98,
- "SpartQA": 2.86,
- "TempReasonL1": 1.27,
- "TempReasonL2Fact": 16.72,
- "TempReasonL2Pure": 1.1,
- "TempReasonL3Fact": 12.81,
- "TempReasonL3Pure": 4.63,
- "WinoGrande": 5.35
+ "Model": "m3e-base",
+ "CmedqaRetrieval": 30.33,
+ "CovidRetrieval": 66.42,
+ "DuRetrieval": 75.76,
+ "EcomRetrieval": 50.27,
+ "MMarcoRetrieval": 65.46,
+ "MedicalRetrieval": 42.79,
+ "T2Retrieval": 73.14,
+ "VideoRetrieval": 51.11
}
]
},
"STS": {
"spearman": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base",
+ "AFQMC": 35.87,
+ "ATEC": 41.27,
+ "BQ": 63.81,
+ "LCQMC": 74.88,
+ "PAWSX": 12.19,
+ "QBQTC": 32.07,
+ "STS22 (zh)": 66.73,
+ "STSB": 76.97
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "bge-small-en-v1.5-instruct"
+ "Model": "m3e-base"
}
]
}
},
- "LaBSE-en-ru": {
+ "multilingual-e5-large": {
"BitextMining": {
"f1": [
{
- "Model": "LaBSE-en-ru",
- "Tatoeba (rus-Cyrl_eng-Latn)": 93.62
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "LaBSE-en-ru",
- "GeoreviewClassification (rus-Cyrl)": 40.89,
- "HeadlineClassification (rus-Cyrl)": 68.75,
- "InappropriatenessClassification (rus-Cyrl)": 58.48,
- "KinopoiskClassification (rus-Cyrl)": 49.85,
- "MassiveIntentClassification (swa-Latn)": 19.98,
- "MassiveIntentClassification (aze-Latn)": 19.52,
- "MassiveIntentClassification (tur-Latn)": 24.12,
- "MassiveIntentClassification (cmo-Hans)": 3.96,
- "MassiveIntentClassification (amh-Ethi)": 2.76,
- "MassiveIntentClassification (kan-Knda)": 2.86,
- "MassiveIntentClassification (hin-Deva)": 3.29,
- "MassiveIntentClassification (tgl-Latn)": 27.08,
- "MassiveIntentClassification (tha-Thai)": 4.0,
- "MassiveIntentClassification (swe-Latn)": 32.01,
- "MassiveIntentClassification (deu-Latn)": 35.14,
- "MassiveIntentClassification (spa-Latn)": 37.67,
- "MassiveIntentClassification (por-Latn)": 39.84,
- "MassiveIntentClassification (jpn-Jpan)": 4.78,
- "MassiveIntentClassification (fin-Latn)": 31.11,
- "MassiveIntentClassification (kat-Geor)": 2.87,
- "MassiveIntentClassification (slv-Latn)": 35.66,
- "MassiveIntentClassification (rus-Cyrl)": 60.53,
- "MassiveIntentClassification (ita-Latn)": 43.32,
- "MassiveIntentClassification (tel-Telu)": 2.72,
- "MassiveIntentClassification (afr-Latn)": 30.59,
- "MassiveIntentClassification (isl-Latn)": 25.61,
- "MassiveIntentClassification (fas-Arab)": 3.71,
- "MassiveIntentClassification (vie-Latn)": 23.0,
- "MassiveIntentClassification (ben-Beng)": 3.35,
- "MassiveIntentClassification (hye-Armn)": 2.8,
- "MassiveIntentClassification (pol-Latn)": 31.3,
- "MassiveIntentClassification (cym-Latn)": 26.59,
- "MassiveIntentClassification (jav-Latn)": 26.84,
- "MassiveIntentClassification (mon-Cyrl)": 35.97,
- "MassiveIntentClassification (en)": 60.48,
- "MassiveIntentClassification (msa-Latn)": 27.82,
- "MassiveIntentClassification (nob-Latn)": 35.78,
- "MassiveIntentClassification (heb-Hebr)": 2.33,
- "MassiveIntentClassification (khm-Khmr)": 4.6,
- "MassiveIntentClassification (nld-Latn)": 34.66,
- "MassiveIntentClassification (ind-Latn)": 33.31,
- "MassiveIntentClassification (mal-Mlym)": 2.63,
- "MassiveIntentClassification (tam-Taml)": 2.22,
- "MassiveIntentClassification (mya-Mymr)": 3.57,
- "MassiveIntentClassification (urd-Arab)": 3.36,
- "MassiveIntentClassification (dan-Latn)": 38.66,
- "MassiveIntentClassification (cmo-Hant)": 5.29,
- "MassiveIntentClassification (ron-Latn)": 37.45,
- "MassiveIntentClassification (lav-Latn)": 23.92,
- "MassiveIntentClassification (fra-Latn)": 40.29,
- "MassiveIntentClassification (ell-Grek)": 11.14,
- "MassiveIntentClassification (sqi-Latn)": 35.84,
- "MassiveIntentClassification (hun-Latn)": 26.74,
- "MassiveIntentClassification (kor-Kore)": 2.69,
- "MassiveIntentClassification (ara-Arab)": 5.19,
- "MassiveScenarioClassification (swa-Latn)": 25.61,
- "MassiveScenarioClassification (aze-Latn)": 24.48,
- "MassiveScenarioClassification (tur-Latn)": 31.38,
- "MassiveScenarioClassification (cmo-Hans)": 9.98,
- "MassiveScenarioClassification (amh-Ethi)": 7.59,
- "MassiveScenarioClassification (kan-Knda)": 8.73,
- "MassiveScenarioClassification (hin-Deva)": 8.77,
- "MassiveScenarioClassification (tgl-Latn)": 35.12,
- "MassiveScenarioClassification (tha-Thai)": 8.69,
- "MassiveScenarioClassification (swe-Latn)": 35.83,
- "MassiveScenarioClassification (deu-Latn)": 41.72,
- "MassiveScenarioClassification (spa-Latn)": 43.33,
- "MassiveScenarioClassification (por-Latn)": 44.62,
- "MassiveScenarioClassification (jpn-Jpan)": 9.51,
- "MassiveScenarioClassification (fin-Latn)": 33.79,
- "MassiveScenarioClassification (kat-Geor)": 7.32,
- "MassiveScenarioClassification (slv-Latn)": 37.6,
- "MassiveScenarioClassification (rus-Cyrl)": 65.15,
- "MassiveScenarioClassification (ita-Latn)": 47.28,
- "MassiveScenarioClassification (tel-Telu)": 7.53,
- "MassiveScenarioClassification (afr-Latn)": 37.27,
- "MassiveScenarioClassification (isl-Latn)": 30.32,
- "MassiveScenarioClassification (fas-Arab)": 6.83,
- "MassiveScenarioClassification (vie-Latn)": 28.92,
- "MassiveScenarioClassification (ben-Beng)": 8.57,
- "MassiveScenarioClassification (hye-Armn)": 8.91,
- "MassiveScenarioClassification (pol-Latn)": 33.75,
- "MassiveScenarioClassification (cym-Latn)": 30.38,
- "MassiveScenarioClassification (jav-Latn)": 33.94,
- "MassiveScenarioClassification (mon-Cyrl)": 41.53,
- "MassiveScenarioClassification (en)": 65.43,
- "MassiveScenarioClassification (msa-Latn)": 36.28,
- "MassiveScenarioClassification (nob-Latn)": 42.43,
- "MassiveScenarioClassification (heb-Hebr)": 8.64,
- "MassiveScenarioClassification (khm-Khmr)": 9.99,
- "MassiveScenarioClassification (nld-Latn)": 41.47,
- "MassiveScenarioClassification (ind-Latn)": 39.05,
- "MassiveScenarioClassification (mal-Mlym)": 7.24,
- "MassiveScenarioClassification (tam-Taml)": 7.71,
- "MassiveScenarioClassification (mya-Mymr)": 9.94,
- "MassiveScenarioClassification (urd-Arab)": 9.16,
- "MassiveScenarioClassification (dan-Latn)": 44.69,
- "MassiveScenarioClassification (cmo-Hant)": 10.48,
- "MassiveScenarioClassification (ron-Latn)": 44.55,
- "MassiveScenarioClassification (lav-Latn)": 26.26,
- "MassiveScenarioClassification (fra-Latn)": 45.08,
- "MassiveScenarioClassification (ell-Grek)": 19.46,
- "MassiveScenarioClassification (sqi-Latn)": 40.9,
- "MassiveScenarioClassification (hun-Latn)": 33.92,
- "MassiveScenarioClassification (kor-Kore)": 7.37,
- "MassiveScenarioClassification (ara-Arab)": 12.43,
- "RuReviewsClassification (rus-Cyrl)": 58.01,
- "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.8,
- "RuSciBenchOECDClassification (rus-Cyrl)": 40.36
+ "Model": "multilingual-e5-large",
+ "BornholmBitextMining (dan-Latn)": 29.61,
+ "BornholmBitextMining": 44.16,
+ "Tatoeba (tgl-Latn_eng-Latn)": 92.0,
+ "Tatoeba (gsw-Latn_eng-Latn)": 51.65,
+ "Tatoeba (tzl-Latn_eng-Latn)": 53.16,
+ "Tatoeba (slv-Latn_eng-Latn)": 89.57,
+ "Tatoeba (jav-Latn_eng-Latn)": 75.46,
+ "Tatoeba (uig-Arab_eng-Latn)": 72.17,
+ "Tatoeba (ind-Latn_eng-Latn)": 92.9,
+ "Tatoeba (rus-Cyrl_eng-Latn)": 92.32,
+ "Tatoeba (war-Latn_eng-Latn)": 62.02,
+ "Tatoeba (mar-Deva_eng-Latn)": 88.58,
+ "Tatoeba (mkd-Cyrl_eng-Latn)": 85.63,
+ "Tatoeba (jpn-Jpan_eng-Latn)": 95.28,
+ "Tatoeba (hun-Latn_eng-Latn)": 94.01,
+ "Tatoeba (slk-Latn_eng-Latn)": 93.13,
+ "Tatoeba (tha-Thai_eng-Latn)": 95.38,
+ "Tatoeba (fra-Latn_eng-Latn)": 93.42,
+ "Tatoeba (ukr-Cyrl_eng-Latn)": 93.32,
+ "Tatoeba (kat-Geor_eng-Latn)": 84.09,
+ "Tatoeba (nov-Latn_eng-Latn)": 71.62,
+ "Tatoeba (kor-Hang_eng-Latn)": 90.65,
+ "Tatoeba (ben-Beng_eng-Latn)": 83.02,
+ "Tatoeba (cor-Latn_eng-Latn)": 6.28,
+ "Tatoeba (lfn-Latn_eng-Latn)": 62.91,
+ "Tatoeba (swh-Latn_eng-Latn)": 71.61,
+ "Tatoeba (tur-Latn_eng-Latn)": 96.27,
+ "Tatoeba (cbk-Latn_eng-Latn)": 69.26,
+ "Tatoeba (kur-Latn_eng-Latn)": 66.83,
+ "Tatoeba (arq-Arab_eng-Latn)": 41.56,
+ "Tatoeba (ceb-Latn_eng-Latn)": 55.31,
+ "Tatoeba (max-Deva_eng-Latn)": 63.41,
+ "Tatoeba (ang-Latn_eng-Latn)": 40.18,
+ "Tatoeba (nds-Latn_eng-Latn)": 69.28,
+ "Tatoeba (epo-Latn_eng-Latn)": 96.01,
+ "Tatoeba (heb-Hebr_eng-Latn)": 86.61,
+ "Tatoeba (yue-Hant_eng-Latn)": 88.71,
+ "Tatoeba (dan-Latn_eng-Latn)": 95.08,
+ "Tatoeba (swe-Latn_eng-Latn)": 95.3,
+ "Tatoeba (lvs-Latn_eng-Latn)": 90.06,
+ "Tatoeba (ast-Latn_eng-Latn)": 81.76,
+ "Tatoeba (dsb-Latn_eng-Latn)": 48.44,
+ "Tatoeba (pes-Arab_eng-Latn)": 92.14,
+ "Tatoeba (dtp-Latn_eng-Latn)": 7.03,
+ "Tatoeba (tuk-Latn_eng-Latn)": 33.15,
+ "Tatoeba (isl-Latn_eng-Latn)": 92.09,
+ "Tatoeba (khm-Khmr_eng-Latn)": 59.96,
+ "Tatoeba (pam-Latn_eng-Latn)": 9.32,
+ "Tatoeba (tat-Cyrl_eng-Latn)": 73.51,
+ "Tatoeba (bos-Latn_eng-Latn)": 92.86,
+ "Tatoeba (spa-Latn_eng-Latn)": 97.1,
+ "Tatoeba (kaz-Cyrl_eng-Latn)": 79.67,
+ "Tatoeba (bel-Cyrl_eng-Latn)": 91.08,
+ "Tatoeba (zsm-Latn_eng-Latn)": 94.53,
+ "Tatoeba (cat-Latn_eng-Latn)": 91.03,
+ "Tatoeba (urd-Arab_eng-Latn)": 89.21,
+ "Tatoeba (mon-Cyrl_eng-Latn)": 87.53,
+ "Tatoeba (tam-Taml_eng-Latn)": 88.23,
+ "Tatoeba (fry-Latn_eng-Latn)": 63.43,
+ "Tatoeba (nob-Latn_eng-Latn)": 97.2,
+ "Tatoeba (tel-Telu_eng-Latn)": 91.34,
+ "Tatoeba (hye-Armn_eng-Latn)": 90.92,
+ "Tatoeba (awa-Deva_eng-Latn)": 72.27,
+ "Tatoeba (hrv-Latn_eng-Latn)": 96.15,
+ "Tatoeba (ile-Latn_eng-Latn)": 79.16,
+ "Tatoeba (amh-Ethi_eng-Latn)": 80.69,
+ "Tatoeba (orv-Cyrl_eng-Latn)": 39.87,
+ "Tatoeba (ara-Arab_eng-Latn)": 85.48,
+ "Tatoeba (ido-Latn_eng-Latn)": 83.52,
+ "Tatoeba (hin-Deva_eng-Latn)": 94.48,
+ "Tatoeba (por-Latn_eng-Latn)": 93.63,
+ "Tatoeba (ron-Latn_eng-Latn)": 94.87,
+ "Tatoeba (swg-Latn_eng-Latn)": 55.64,
+ "Tatoeba (cmn-Hans_eng-Latn)": 95.28,
+ "Tatoeba (pol-Latn_eng-Latn)": 96.6,
+ "Tatoeba (bul-Cyrl_eng-Latn)": 92.93,
+ "Tatoeba (ina-Latn_eng-Latn)": 93.47,
+ "Tatoeba (bre-Latn_eng-Latn)": 11.1,
+ "Tatoeba (wuu-Hans_eng-Latn)": 86.37,
+ "Tatoeba (lit-Latn_eng-Latn)": 88.48,
+ "Tatoeba (csb-Latn_eng-Latn)": 36.98,
+ "Tatoeba (lat-Latn_eng-Latn)": 53.37,
+ "Tatoeba (gle-Latn_eng-Latn)": 71.48,
+ "Tatoeba (ita-Latn_eng-Latn)": 93.29,
+ "Tatoeba (srp-Cyrl_eng-Latn)": 93.1,
+ "Tatoeba (arz-Arab_eng-Latn)": 74.73,
+ "Tatoeba (cym-Latn_eng-Latn)": 76.21,
+ "Tatoeba (ber-Tfng_eng-Latn)": 38.9,
+ "Tatoeba (xho-Latn_eng-Latn)": 80.87,
+ "Tatoeba (uzb-Latn_eng-Latn)": 72.35,
+ "Tatoeba (pms-Latn_eng-Latn)": 59.85,
+ "Tatoeba (est-Latn_eng-Latn)": 85.03,
+ "Tatoeba (deu-Latn_eng-Latn)": 99.07,
+ "Tatoeba (yid-Hebr_eng-Latn)": 76.33,
+ "Tatoeba (ell-Grek_eng-Latn)": 93.88,
+ "Tatoeba (afr-Latn_eng-Latn)": 90.22,
+ "Tatoeba (fao-Latn_eng-Latn)": 72.62,
+ "Tatoeba (nld-Latn_eng-Latn)": 96.63,
+ "Tatoeba (hsb-Latn_eng-Latn)": 58.9,
+ "Tatoeba (aze-Latn_eng-Latn)": 87.61,
+ "Tatoeba (kzj-Latn_eng-Latn)": 7.91,
+ "Tatoeba (kab-Latn_eng-Latn)": 36.54,
+ "Tatoeba (mal-Mlym_eng-Latn)": 97.7,
+ "Tatoeba (mhr-Cyrl_eng-Latn)": 6.79,
+ "Tatoeba (ces-Latn_eng-Latn)": 94.89,
+ "Tatoeba (gla-Latn_eng-Latn)": 59.0,
+ "Tatoeba (cha-Latn_eng-Latn)": 27.16,
+ "Tatoeba (glg-Latn_eng-Latn)": 93.34,
+ "Tatoeba (vie-Latn_eng-Latn)": 97.0,
+ "Tatoeba (oci-Latn_eng-Latn)": 54.91,
+ "Tatoeba (nno-Latn_eng-Latn)": 91.4,
+ "Tatoeba (fin-Latn_eng-Latn)": 95.44,
+ "Tatoeba (eus-Latn_eng-Latn)": 77.82,
+ "Tatoeba (sqi-Latn_eng-Latn)": 94.7
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "multilingual-e5-large",
+ "AllegroReviews (pol-Latn)": 41.04,
+ "AllegroReviews": 41.14,
+ "AmazonCounterfactualClassification (en-ext)": 78.73,
+ "AmazonCounterfactualClassification (en)": 78.67,
+ "AmazonCounterfactualClassification (deu-Latn)": 68.66,
+ "AmazonCounterfactualClassification (jpn-Jpan)": 78.8,
+ "AmazonPolarityClassification": 93.26,
+ "AmazonReviewsClassification (en)": 49.2,
+ "AmazonReviewsClassification (deu-Latn)": 46.5,
+ "AmazonReviewsClassification (spa-Latn)": 44.35,
+ "AmazonReviewsClassification (fra-Latn)": 42.55,
+ "AmazonReviewsClassification (jpn-Jpan)": 41.71,
+ "AmazonReviewsClassification (cmn-Hans)": 38.87,
+ "AmazonReviewsClassification (fr)": 41.91,
+ "AngryTweetsClassification (dan-Latn)": 57.69,
+ "AngryTweetsClassification": 54.95,
+ "Banking77Classification": 75.88,
+ "CBD (pol-Latn)": 69.84,
+ "CBD": 69.9,
+ "DKHateClassification": 66.02,
+ "DanishPoliticalCommentsClassification (dan-Latn)": 39.43,
+ "DanishPoliticalCommentsClassification": 38.27,
+ "EmotionClassification": 47.58,
+ "GeoreviewClassification (rus-Cyrl)": 49.69,
+ "HeadlineClassification (rus-Cyrl)": 77.19,
+ "IFlyTek (cmn-Hans)": 41.86,
+ "IFlyTek": 45.47,
+ "ImdbClassification": 90.23,
+ "InappropriatenessClassification (rus-Cyrl)": 61.6,
+ "JDReview (cmn-Hans)": 80.54,
+ "JDReview": 80.99,
+ "KinopoiskClassification (rus-Cyrl)": 56.59,
+ "LccSentimentClassification (dan-Latn)": 61.53,
+ "LccSentimentClassification": 59.6,
+ "MTOPDomainClassification (en)": 91.81,
+ "MTOPDomainClassification (deu-Latn)": 90.44,
+ "MTOPDomainClassification (spa-Latn)": 88.34,
+ "MTOPDomainClassification (fra-Latn)": 86.23,
+ "MTOPDomainClassification (hin-Deva)": 86.84,
+ "MTOPDomainClassification (tha-Thai)": 86.88,
+ "MTOPDomainClassification (fr)": 86.41,
+ "MTOPIntentClassification (en)": 64.29,
+ "MTOPIntentClassification (deu-Latn)": 65.97,
+ "MTOPIntentClassification (spa-Latn)": 61.9,
+ "MTOPIntentClassification (fra-Latn)": 56.25,
+ "MTOPIntentClassification (hin-Deva)": 59.17,
+ "MTOPIntentClassification (tha-Thai)": 62.59,
+ "MTOPIntentClassification (fr)": 59.43,
+ "MasakhaNEWSClassification (amh-Ethi)": 83.7,
+ "MasakhaNEWSClassification (eng)": 78.26,
+ "MasakhaNEWSClassification (fra-Latn)": 76.11,
+ "MasakhaNEWSClassification (hau-Latn)": 76.17,
+ "MasakhaNEWSClassification (ibo-Latn)": 70.05,
+ "MasakhaNEWSClassification (lin-Latn)": 75.89,
+ "MasakhaNEWSClassification (lug-Latn)": 73.63,
+ "MasakhaNEWSClassification (orm-Ethi)": 80.31,
+ "MasakhaNEWSClassification (pcm-Latn)": 89.15,
+ "MasakhaNEWSClassification (run-Latn)": 76.55,
+ "MasakhaNEWSClassification (sna-Latn)": 86.99,
+ "MasakhaNEWSClassification (som-Latn)": 64.63,
+ "MasakhaNEWSClassification (swa-Latn)": 73.42,
+ "MasakhaNEWSClassification (tir-Ethi)": 72.06,
+ "MasakhaNEWSClassification (xho-Latn)": 82.56,
+ "MasakhaNEWSClassification (yor-Latn)": 81.09,
+ "MasakhaNEWSClassification (fra)": 79.38,
+ "MassiveIntentClassification (kor-Kore)": 63.92,
+ "MassiveIntentClassification (lav-Latn)": 58.31,
+ "MassiveIntentClassification (isl-Latn)": 53.3,
+ "MassiveIntentClassification (tel-Telu)": 53.96,
+ "MassiveIntentClassification (mya-Mymr)": 49.73,
+ "MassiveIntentClassification (nob-Latn)": 64.54,
+ "MassiveIntentClassification (en)": 68.51,
+ "MassiveIntentClassification (spa-Latn)": 64.01,
+ "MassiveIntentClassification (swe-Latn)": 66.52,
+ "MassiveIntentClassification (cmo-Hant)": 58.78,
+ "MassiveIntentClassification (pol-Latn)": 65.09,
+ "MassiveIntentClassification (rus-Cyrl)": 65.76,
+ "MassiveIntentClassification (aze-Latn)": 54.68,
+ "MassiveIntentClassification (fin-Latn)": 64.28,
+ "MassiveIntentClassification (cmo-Hans)": 66.23,
+ "MassiveIntentClassification (urd-Arab)": 54.6,
+ "MassiveIntentClassification (tam-Taml)": 53.41,
+ "MassiveIntentClassification (hin-Deva)": 60.93,
+ "MassiveIntentClassification (deu-Latn)": 63.82,
+ "MassiveIntentClassification (ell-Grek)": 64.34,
+ "MassiveIntentClassification (hye-Armn)": 50.89,
+ "MassiveIntentClassification (por-Latn)": 65.6,
+ "MassiveIntentClassification (nld-Latn)": 65.0,
+ "MassiveIntentClassification (fas-Arab)": 63.74,
+ "MassiveIntentClassification (ron-Latn)": 59.76,
+ "MassiveIntentClassification (slv-Latn)": 59.38,
+ "MassiveIntentClassification (heb-Hebr)": 62.44,
+ "MassiveIntentClassification (vie-Latn)": 63.39,
+ "MassiveIntentClassification (sqi-Latn)": 57.3,
+ "MassiveIntentClassification (khm-Khmr)": 34.88,
+ "MassiveIntentClassification (ben-Beng)": 55.6,
+ "MassiveIntentClassification (tgl-Latn)": 54.77,
+ "MassiveIntentClassification (jpn-Jpan)": 67.11,
+ "MassiveIntentClassification (kat-Geor)": 41.45,
+ "MassiveIntentClassification (afr-Latn)": 53.69,
+ "MassiveIntentClassification (cym-Latn)": 44.22,
+ "MassiveIntentClassification (amh-Ethi)": 45.48,
+ "MassiveIntentClassification (ita-Latn)": 63.89,
+ "MassiveIntentClassification (mal-Mlym)": 57.58,
+ "MassiveIntentClassification (tha-Thai)": 62.75,
+ "MassiveIntentClassification (ind-Latn)": 63.51,
+ "MassiveIntentClassification (jav-Latn)": 48.96,
+ "MassiveIntentClassification (dan-Latn)": 63.7,
+ "MassiveIntentClassification (ara-Arab)": 54.1,
+ "MassiveIntentClassification (kan-Knda)": 53.45,
+ "MassiveIntentClassification (hun-Latn)": 64.0,
+ "MassiveIntentClassification (tur-Latn)": 64.61,
+ "MassiveIntentClassification (msa-Latn)": 58.49,
+ "MassiveIntentClassification (mon-Cyrl)": 49.6,
+ "MassiveIntentClassification (swa-Latn)": 47.69,
+ "MassiveIntentClassification (fra-Latn)": 63.37,
+ "MassiveIntentClassification (da)": 60.16,
+ "MassiveIntentClassification (nb)": 59.83,
+ "MassiveIntentClassification (sv)": 61.78,
+ "MassiveIntentClassification (pl)": 65.07,
+ "MassiveScenarioClassification (heb-Hebr)": 67.72,
+ "MassiveScenarioClassification (vie-Latn)": 68.91,
+ "MassiveScenarioClassification (cmo-Hant)": 64.35,
+ "MassiveScenarioClassification (urd-Arab)": 60.89,
+ "MassiveScenarioClassification (isl-Latn)": 60.74,
+ "MassiveScenarioClassification (ell-Grek)": 69.74,
+ "MassiveScenarioClassification (mon-Cyrl)": 55.37,
+ "MassiveScenarioClassification (swa-Latn)": 56.27,
+ "MassiveScenarioClassification (tam-Taml)": 58.76,
+ "MassiveScenarioClassification (hye-Armn)": 55.76,
+ "MassiveScenarioClassification (amh-Ethi)": 52.69,
+ "MassiveScenarioClassification (ben-Beng)": 61.85,
+ "MassiveScenarioClassification (tel-Telu)": 59.49,
+ "MassiveScenarioClassification (dan-Latn)": 71.18,
+ "MassiveScenarioClassification (slv-Latn)": 65.33,
+ "MassiveScenarioClassification (en)": 73.04,
+ "MassiveScenarioClassification (rus-Cyrl)": 70.85,
+ "MassiveScenarioClassification (mal-Mlym)": 63.17,
+ "MassiveScenarioClassification (sqi-Latn)": 63.79,
+ "MassiveScenarioClassification (ita-Latn)": 69.45,
+ "MassiveScenarioClassification (kor-Kore)": 70.54,
+ "MassiveScenarioClassification (cmo-Hans)": 72.25,
+ "MassiveScenarioClassification (cym-Latn)": 51.25,
+ "MassiveScenarioClassification (pol-Latn)": 69.83,
+ "MassiveScenarioClassification (ind-Latn)": 69.43,
+ "MassiveScenarioClassification (tur-Latn)": 68.12,
+ "MassiveScenarioClassification (tgl-Latn)": 60.71,
+ "MassiveScenarioClassification (hin-Deva)": 66.85,
+ "MassiveScenarioClassification (spa-Latn)": 69.07,
+ "MassiveScenarioClassification (lav-Latn)": 64.28,
+ "MassiveScenarioClassification (mya-Mymr)": 54.03,
+ "MassiveScenarioClassification (ara-Arab)": 61.0,
+ "MassiveScenarioClassification (kan-Knda)": 59.36,
+ "MassiveScenarioClassification (jav-Latn)": 56.24,
+ "MassiveScenarioClassification (por-Latn)": 68.33,
+ "MassiveScenarioClassification (tha-Thai)": 69.06,
+ "MassiveScenarioClassification (aze-Latn)": 58.49,
+ "MassiveScenarioClassification (fra-Latn)": 68.74,
+ "MassiveScenarioClassification (ron-Latn)": 66.06,
+ "MassiveScenarioClassification (nld-Latn)": 71.11,
+ "MassiveScenarioClassification (fas-Arab)": 67.55,
+ "MassiveScenarioClassification (deu-Latn)": 71.25,
+ "MassiveScenarioClassification (nob-Latn)": 70.44,
+ "MassiveScenarioClassification (msa-Latn)": 63.55,
+ "MassiveScenarioClassification (afr-Latn)": 62.35,
+ "MassiveScenarioClassification (hun-Latn)": 70.53,
+ "MassiveScenarioClassification (swe-Latn)": 72.77,
+ "MassiveScenarioClassification (kat-Geor)": 47.82,
+ "MassiveScenarioClassification (jpn-Jpan)": 73.16,
+ "MassiveScenarioClassification (khm-Khmr)": 41.14,
+ "MassiveScenarioClassification (fin-Latn)": 68.62,
+ "MassiveScenarioClassification (da)": 67.46,
+ "MassiveScenarioClassification (nb)": 66.18,
+ "MassiveScenarioClassification (sv)": 69.15,
+ "MassiveScenarioClassification (pl)": 69.82,
+ "MultilingualSentiment (cmn-Hans)": 70.81,
+ "MultilingualSentiment": 68.58,
+ "NoRecClassification (nob-Latn)": 58.43,
+ "NoRecClassification": 62.76,
+ "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 80.15,
+ "NordicLangClassification": 82.29,
+ "NorwegianParliament": 60.36,
+ "OnlineShopping (cmn-Hans)": 90.45,
+ "OnlineShopping": 90.81,
+ "PAC (pol-Latn)": 70.33,
+ "PAC": 70.37,
+ "PolEmo2.0-IN (pol-Latn)": 77.06,
+ "PolEmo2.0-IN": 77.06,
+ "PolEmo2.0-OUT (pol-Latn)": 53.48,
+ "PolEmo2.0-OUT": 53.38,
+ "RuReviewsClassification (rus-Cyrl)": 65.28,
+ "RuSciBenchGRNTIClassification (rus-Cyrl)": 58.2,
+ "RuSciBenchOECDClassification (rus-Cyrl)": 43.91,
+ "ScalaDaClassification": 50.77,
+ "ScalaNbClassification": 50.44,
+ "TNews (cmn-Hans)": 48.8,
+ "TNews": 48.38,
+ "ToxicConversationsClassification": 66.01,
+ "TweetSentimentExtractionClassification": 62.8,
+ "Waimai (cmn-Hans)": 86.3,
+ "Waimai": 85.02
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "LaBSE-en-ru",
- "GeoreviewClusteringP2P (rus-Cyrl)": 51.89,
- "MLSUMClusteringP2P (rus-Cyrl)": 37.87,
- "MLSUMClusteringS2S (rus-Cyrl)": 41.24,
- "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.48,
- "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.16
+ "Model": "multilingual-e5-large",
+ "8TagsClustering": 33.88,
+ "AlloProfClusteringP2P": 62.99,
+ "AlloProfClusteringS2S": 32.26,
+ "BiorxivClusteringP2P": 35.5,
+ "BiorxivClusteringS2S": 33.3,
+ "CLSClusteringP2P": 40.68,
+ "CLSClusteringS2S": 38.59,
+ "GeoreviewClusteringP2P (rus-Cyrl)": 60.51,
+ "HALClusteringS2S": 22.44,
+ "MLSUMClusteringP2P (rus-Cyrl)": 42.79,
+ "MLSUMClusteringP2P": 44.04,
+ "MLSUMClusteringS2S (rus-Cyrl)": 44.32,
+ "MLSUMClusteringS2S": 37.65,
+ "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.16,
+ "MasakhaNEWSClusteringP2P (eng)": 61.1,
+ "MasakhaNEWSClusteringP2P (fra-Latn)": 41.66,
+ "MasakhaNEWSClusteringP2P (hau-Latn)": 60.7,
+ "MasakhaNEWSClusteringP2P (ibo-Latn)": 48.41,
+ "MasakhaNEWSClusteringP2P (lin-Latn)": 57.69,
+ "MasakhaNEWSClusteringP2P (lug-Latn)": 71.95,
+ "MasakhaNEWSClusteringP2P (orm-Ethi)": 60.14,
+ "MasakhaNEWSClusteringP2P (pcm-Latn)": 80.84,
+ "MasakhaNEWSClusteringP2P (run-Latn)": 59.91,
+ "MasakhaNEWSClusteringP2P (sna-Latn)": 53.3,
+ "MasakhaNEWSClusteringP2P (som-Latn)": 34.38,
+ "MasakhaNEWSClusteringP2P (swa-Latn)": 33.25,
+ "MasakhaNEWSClusteringP2P (tir-Ethi)": 54.21,
+ "MasakhaNEWSClusteringP2P (xho-Latn)": 41.12,
+ "MasakhaNEWSClusteringP2P (yor-Latn)": 36.22,
+ "MasakhaNEWSClusteringP2P (fra)": 40.94,
+ "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.24,
+ "MasakhaNEWSClusteringS2S (eng)": 53.93,
+ "MasakhaNEWSClusteringS2S (fra-Latn)": 39.84,
+ "MasakhaNEWSClusteringS2S (hau-Latn)": 19.24,
+ "MasakhaNEWSClusteringS2S (ibo-Latn)": 28.88,
+ "MasakhaNEWSClusteringS2S (lin-Latn)": 42.22,
+ "MasakhaNEWSClusteringS2S (lug-Latn)": 43.63,
+ "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.29,
+ "MasakhaNEWSClusteringS2S (pcm-Latn)": 59.77,
+ "MasakhaNEWSClusteringS2S (run-Latn)": 51.46,
+ "MasakhaNEWSClusteringS2S (sna-Latn)": 48.14,
+ "MasakhaNEWSClusteringS2S (som-Latn)": 25.14,
+ "MasakhaNEWSClusteringS2S (swa-Latn)": 7.28,
+ "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51,
+ "MasakhaNEWSClusteringS2S (xho-Latn)": 30.98,
+ "MasakhaNEWSClusteringS2S (yor-Latn)": 34.09,
+ "MasakhaNEWSClusteringS2S (fra)": 30.56,
+ "MedrxivClusteringP2P": 31.7,
+ "MedrxivClusteringS2S": 29.76,
+ "RedditClustering": 46.91,
+ "RedditClusteringP2P": 63.0,
+ "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 52.03,
+ "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.11,
+ "StackExchangeClustering": 58.37,
+ "StackExchangeClusteringP2P": 32.9,
+ "ThuNewsClusteringP2P": 58.05,
+ "ThuNewsClusteringS2S": 55.59,
+ "TwentyNewsgroupsClustering": 39.4
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "LaBSE-en-ru",
- "OpusparcusPC (rus-Cyrl)": 87.18,
- "TERRa (rus-Cyrl)": 55.61
+ "Model": "multilingual-e5-large",
+ "CDSC-E (pol-Latn)": 74.47,
+ "CDSC-E": 74.47,
+ "Cmnli": 78.18,
+ "Ocnli": 61.6,
+ "OpusparcusPC (deu-Latn)": 97.27,
+ "OpusparcusPC (en)": 98.74,
+ "OpusparcusPC (fin-Latn)": 94.26,
+ "OpusparcusPC (fra-Latn)": 93.68,
+ "OpusparcusPC (rus-Cyrl)": 89.64,
+ "OpusparcusPC (swe-Latn)": 94.98,
+ "OpusparcusPC (fr)": 93.89,
+ "PPC": 92.18,
+ "PSC (pol-Latn)": 99.4,
+ "PSC": 99.39,
+ "PawsXPairClassification (deu-Latn)": 56.81,
+ "PawsXPairClassification (en)": 62.97,
+ "PawsXPairClassification (spa-Latn)": 56.85,
+ "PawsXPairClassification (fra-Latn)": 58.68,
+ "PawsXPairClassification (jpn-Hira)": 50.7,
+ "PawsXPairClassification (kor-Hang)": 52.08,
+ "PawsXPairClassification (cmn-Hans)": 56.82,
+ "PawsXPairClassification (fr)": 58.5,
+ "SICK-E-PL (pol-Latn)": 75.95,
+ "SICK-E-PL": 75.96,
+ "SprintDuplicateQuestions": 93.14,
+ "TERRa (rus-Cyrl)": 58.4,
+ "TwitterSemEval2015": 75.28,
+ "TwitterURLCorpus": 85.83
}
]
},
"Reranking": {
"map": [
{
- "Model": "LaBSE-en-ru",
- "RuBQReranking (rus-Cyrl)": 54.83
+ "Model": "multilingual-e5-large",
+ "AlloprofReranking (fra-Latn)": 69.44,
+ "AlloprofReranking": 57.37,
+ "AskUbuntuDupQuestions": 59.24,
+ "CMedQAv1": 68.25,
+ "CMedQAv2": 68.56,
+ "MMarcoReranking (cmn-Hans)": 29.12,
+ "MMarcoReranking": 21.34,
+ "MindSmallReranking": 30.24,
+ "RuBQReranking (rus-Cyrl)": 75.58,
+ "SciDocsRR": 84.22,
+ "StackOverflowDupQuestions": 50.14,
+ "SyntecReranking (fra-Latn)": 85.45,
+ "SyntecReranking": 86.9,
+ "T2Reranking (cmn-Hans)": 66.32,
+ "T2Reranking": 65.83
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "LaBSE-en-ru",
- "RiaNewsRetrieval (rus-Cyrl)": 34.73,
- "RuBQRetrieval (rus-Cyrl)": 29.03
+ "Model": "multilingual-e5-large",
+ "AILACasedocs": 26.43,
+ "AILAStatutes": 20.84,
+ "ARCChallenge": 10.83,
+ "AlloprofRetrieval (fra-Latn)": 39.34,
+ "AlloprofRetrieval": 38.15,
+ "AlphaNLI": 13.59,
+ "ArguAna": 54.36,
+ "ArguAna-PL (pol-Latn)": 52.99,
+ "ArguAna-PL": 53.02,
+ "BSARDRetrieval (fra-Latn)": 21.28,
+ "BSARDRetrieval": 0.27,
+ "CmedqaRetrieval (cmn-Hans)": 28.66,
+ "CmedqaRetrieval": 28.67,
+ "CovidRetrieval (cmn-Hans)": 75.61,
+ "CovidRetrieval": 75.51,
+ "DBPedia-PL": 35.82,
+ "DuRetrieval (cmn-Hans)": 85.3,
+ "DuRetrieval": 85.32,
+ "EcomRetrieval (cmn-Hans)": 54.67,
+ "EcomRetrieval": 54.75,
+ "FiQA-PL (pol-Latn)": 32.97,
+ "FiQA-PL": 33.0,
+ "FiQA2018": 43.81,
+ "GerDaLIRSmall (deu-Latn)": 15.72,
+ "HellaSwag": 27.35,
+ "HotpotQA-PL": 67.41,
+ "LEMBNarrativeQARetrieval": 24.22,
+ "LEMBNeedleRetrieval": 28.0,
+ "LEMBPasskeyRetrieval": 38.25,
+ "LEMBQMSumRetrieval": 24.26,
+ "LEMBSummScreenFDRetrieval": 71.12,
+ "LEMBWikimQARetrieval": 56.8,
+ "LeCaRDv2 (zho-Hans)": 55.83,
+ "LegalBenchConsumerContractsQA": 73.3,
+ "LegalBenchCorporateLobbying": 89.72,
+ "LegalQuAD (deu-Latn)": 43.17,
+ "LegalSummarization": 62.1,
+ "MMarcoRetrieval (cmn-Hans)": 79.2,
+ "MMarcoRetrieval": 79.2,
+ "MSMARCO-PL": 33.38,
+ "MedicalRetrieval (cmn-Hans)": 51.44,
+ "MedicalRetrieval": 51.44,
+ "MintakaRetrieval (ara-Arab)": 26.5,
+ "MintakaRetrieval (deu-Latn)": 32.77,
+ "MintakaRetrieval (spa-Latn)": 34.23,
+ "MintakaRetrieval (fra-Latn)": 34.24,
+ "MintakaRetrieval (hin-Deva)": 27.45,
+ "MintakaRetrieval (ita-Latn)": 33.84,
+ "MintakaRetrieval (jpn-Hira)": 26.45,
+ "MintakaRetrieval (por-Latn)": 35.9,
+ "MintakaRetrieval (fr)": 25.2,
+ "NFCorpus": 33.95,
+ "NFCorpus-PL (pol-Latn)": 30.21,
+ "NFCorpus-PL": 30.24,
+ "NQ-PL": 52.79,
+ "PIQA": 28.82,
+ "Quail": 4.85,
+ "Quora-PL": 83.65,
+ "RARbCode": 58.92,
+ "RARbMath": 67.32,
+ "RiaNewsRetrieval (rus-Cyrl)": 80.67,
+ "RuBQRetrieval (rus-Cyrl)": 74.11,
+ "SCIDOCS": 17.45,
+ "SCIDOCS-PL (pol-Latn)": 13.82,
+ "SCIDOCS-PL": 13.81,
+ "SIQA": 5.36,
+ "SciFact": 70.42,
+ "SciFact-PL (pol-Latn)": 65.66,
+ "SciFact-PL": 65.66,
+ "SpartQA": 5.64,
+ "SyntecRetrieval (fra-Latn)": 82.39,
+ "SyntecRetrieval": 81.07,
+ "T2Retrieval (cmn-Hans)": 76.07,
+ "T2Retrieval": 76.11,
+ "TRECCOVID": 71.21,
+ "TRECCOVID-PL (pol-Latn)": 69.9,
+ "TRECCOVID-PL": 70.03,
+ "TempReasonL1": 1.14,
+ "TempReasonL2Fact": 42.97,
+ "TempReasonL2Pure": 2.05,
+ "TempReasonL3Fact": 38.22,
+ "TempReasonL3Pure": 8.31,
+ "Touche2020": 23.13,
+ "VideoRetrieval (cmn-Hans)": 58.28,
+ "VideoRetrieval": 58.25,
+ "WinoGrande": 54.99,
+ "XPQARetrieval (ara-Arab_ara-Arab)": 43.69,
+ "XPQARetrieval (eng-Latn_ara-Arab)": 30.86,
+ "XPQARetrieval (ara-Arab_eng-Latn)": 39.11,
+ "XPQARetrieval (deu-Latn_deu-Latn)": 76.83,
+ "XPQARetrieval (eng-Latn_deu-Latn)": 42.87,
+ "XPQARetrieval (deu-Latn_eng-Latn)": 68.25,
+ "XPQARetrieval (spa-Latn_spa-Latn)": 61.77,
+ "XPQARetrieval (eng-Latn_spa-Latn)": 37.55,
+ "XPQARetrieval (spa-Latn_eng-Latn)": 52.86,
+ "XPQARetrieval (fra-Latn_fra-Latn)": 61.38,
+ "XPQARetrieval (eng-Latn_fra-Latn)": 39.12,
+ "XPQARetrieval (fra-Latn_eng-Latn)": 57.93,
+ "XPQARetrieval (hin-Deva_hin-Deva)": 71.09,
+ "XPQARetrieval (eng-Latn_hin-Deva)": 32.39,
+ "XPQARetrieval (hin-Deva_eng-Latn)": 68.31,
+ "XPQARetrieval (ita-Latn_ita-Latn)": 74.32,
+ "XPQARetrieval (eng-Latn_ita-Latn)": 37.95,
+ "XPQARetrieval (ita-Latn_eng-Latn)": 64.54,
+ "XPQARetrieval (jpn-Hira_jpn-Hira)": 74.11,
+ "XPQARetrieval (eng-Latn_jpn-Hira)": 38.31,
+ "XPQARetrieval (jpn-Hira_eng-Latn)": 65.42,
+ "XPQARetrieval (kor-Hang_kor-Hang)": 35.72,
+ "XPQARetrieval (eng-Latn_kor-Hang)": 31.09,
+ "XPQARetrieval (kor-Hang_eng-Latn)": 34.06,
+ "XPQARetrieval (pol-Latn_pol-Latn)": 51.01,
+ "XPQARetrieval (eng-Latn_pol-Latn)": 30.49,
+ "XPQARetrieval (pol-Latn_eng-Latn)": 44.66,
+ "XPQARetrieval (por-Latn_por-Latn)": 41.1,
+ "XPQARetrieval (eng-Latn_por-Latn)": 22.03,
+ "XPQARetrieval (por-Latn_eng-Latn)": 35.15,
+ "XPQARetrieval (tam-Taml_tam-Taml)": 39.51,
+ "XPQARetrieval (eng-Latn_tam-Taml)": 17.33,
+ "XPQARetrieval (tam-Taml_eng-Latn)": 33.67,
+ "XPQARetrieval (cmn-Hans_cmn-Hans)": 66.27,
+ "XPQARetrieval (eng-Latn_cmn-Hans)": 26.24,
+ "XPQARetrieval (cmn-Hans_eng-Latn)": 55.15,
+ "XPQARetrieval (fr)": 66.15
}
]
},
"STS": {
"spearman": [
{
- "Model": "LaBSE-en-ru",
- "RUParaPhraserSTS (rus-Cyrl)": 65.87,
- "RuSTSBenchmarkSTS (rus-Cyrl)": 73.32,
- "STS22 (deu-Latn)": 38.9,
- "STS22 (en)": 59.47,
- "STS22 (pol-Latn_eng-Latn)": 58.73,
- "STS22 (spa-Latn)": 60.85,
- "STS22 (fra-Latn)": 74.98,
- "STS22 (deu-Latn_eng-Latn)": 47.98,
- "STS22 (deu-Latn_fra-Latn)": 59.4,
- "STS22 (deu-Latn_pol-Latn)": 39.48,
- "STS22 (pol-Latn)": 32.74,
- "STS22 (tur-Latn)": 55.04,
- "STS22 (spa-Latn_eng-Latn)": 70.8,
- "STS22 (rus-Cyrl)": 58.53,
- "STS22 (ita-Latn)": 68.58,
- "STS22 (fra-Latn_pol-Latn)": 61.98,
- "STS22 (spa-Latn_ita-Latn)": 66.83,
- "STS22 (cmn-Hans_eng-Latn)": 24.98,
- "STS22 (ara-Arab)": 31.85,
- "STS22 (cmn-Hans)": 35.1,
- "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.02
+ "Model": "multilingual-e5-large",
+ "AFQMC (cmn-Hans)": 33.01,
+ "AFQMC": 33.02,
+ "ATEC (cmn-Hans)": 39.8,
+ "ATEC": 39.81,
+ "BIOSSES": 82.49,
+ "BQ (cmn-Hans)": 46.44,
+ "BQ": 46.44,
+ "CDSC-R (pol-Latn)": 91.0,
+ "CDSC-R": 91.0,
+ "LCQMC (cmn-Hans)": 75.95,
+ "LCQMC": 75.95,
+ "PAWSX (cmn-Hans)": 14.63,
+ "PAWSX": 14.63,
+ "QBQTC": 29.77,
+ "RUParaPhraserSTS (rus-Cyrl)": 71.82,
+ "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15,
+ "SICK-R": 80.23,
+ "SICK-R-PL (pol-Latn)": 75.08,
+ "SICK-R-PL": 75.08,
+ "SICKFr (fra-Latn)": 78.81,
+ "SICKFr": 78.78,
+ "STS12": 80.02,
+ "STS13": 81.55,
+ "STS14": 77.72,
+ "STS15": 89.31,
+ "STS16": 85.79,
+ "STS17 (en-en)": 88.12,
+ "STS17 (spa-Latn)": 86.71,
+ "STS17 (spa-Latn_eng-Latn)": 80.74,
+ "STS17 (eng-Latn_ara-Arab)": 75.03,
+ "STS17 (fra-Latn_eng-Latn)": 85.62,
+ "STS17 (kor-Hang)": 82.27,
+ "STS17 (ita-Latn_eng-Latn)": 84.52,
+ "STS17 (ara-Arab)": 77.83,
+ "STS17 (eng-Latn_tur-Latn)": 71.22,
+ "STS17 (eng-Latn_deu-Latn)": 86.15,
+ "STS17 (nld-Latn_eng-Latn)": 85.29,
+ "STS22 (spa-Latn)": 64.6,
+ "STS22 (spa-Latn_eng-Latn)": 72.51,
+ "STS22 (deu-Latn_eng-Latn)": 56.59,
+ "STS22 (cmn-Hans_eng-Latn)": 65.95,
+ "STS22 (deu-Latn_pol-Latn)": 49.58,
+ "STS22 (fra-Latn_pol-Latn)": 50.71,
+ "STS22 (en)": 63.66,
+ "STS22 (ara-Arab)": 56.95,
+ "STS22 (spa-Latn_ita-Latn)": 68.92,
+ "STS22 (tur-Latn)": 63.56,
+ "STS22 (deu-Latn_fra-Latn)": 67.96,
+ "STS22 (ita-Latn)": 76.99,
+ "STS22 (cmn-Hans)": 66.82,
+ "STS22 (rus-Cyrl)": 59.89,
+ "STS22 (fra-Latn)": 76.77,
+ "STS22 (pol-Latn_eng-Latn)": 65.54,
+ "STS22 (deu-Latn)": 56.58,
+ "STS22 (pol-Latn)": 34.65,
+ "STS22 (zh)": 65.64,
+ "STS22 (pl)": 34.66,
+ "STSB (cmn-Hans)": 81.08,
+ "STSB": 81.08,
+ "STSBenchmark": 87.29,
+ "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22,
+ "STSBenchmarkMultilingualSTS (en)": 87.29,
+ "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06,
+ "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63,
+ "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05,
+ "STSBenchmarkMultilingualSTS (por-Latn)": 73.31,
+ "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81,
+ "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28,
+ "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27,
+ "STSBenchmarkMultilingualSTS (fr)": 82.53
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "LaBSE-en-ru"
+ "Model": "multilingual-e5-large",
+ "SummEval": 29.65,
+ "SummEvalFr (fra-Latn)": 30.92,
+ "SummEvalFr": 30.92
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "LaBSE-en-ru"
+ "Model": "multilingual-e5-large"
}
]
}
},
- "text2vec-base-chinese": {
+ "komninos": {
"BitextMining": {
"f1": [
{
- "Model": "text2vec-base-chinese"
+ "Model": "komninos",
+ "BUCC (de-en)": 0.18,
+ "BUCC (fr-en)": 0.08,
+ "BUCC (ru-en)": 0.15,
+ "BUCC (zh-en)": 0.05,
+ "Tatoeba (afr-eng)": 4.82,
+ "Tatoeba (amh-eng)": 1.18,
+ "Tatoeba (ang-eng)": 8.54,
+ "Tatoeba (ara-eng)": 0.63,
+ "Tatoeba (arq-eng)": 0.4,
+ "Tatoeba (arz-eng)": 0.63,
+ "Tatoeba (ast-eng)": 11.69,
+ "Tatoeba (awa-eng)": 0.0,
+ "Tatoeba (aze-eng)": 3.22,
+ "Tatoeba (bel-eng)": 1.75,
+ "Tatoeba (ben-eng)": 0.2,
+ "Tatoeba (ber-eng)": 7.0,
+ "Tatoeba (bos-eng)": 9.31,
+ "Tatoeba (bre-eng)": 4.17,
+ "Tatoeba (bul-eng)": 1.29,
+ "Tatoeba (cat-eng)": 7.73,
+ "Tatoeba (cbk-eng)": 5.61,
+ "Tatoeba (ceb-eng)": 4.88,
+ "Tatoeba (ces-eng)": 3.55,
+ "Tatoeba (cha-eng)": 19.29,
+ "Tatoeba (cmn-eng)": 0.5,
+ "Tatoeba (cor-eng)": 4.15,
+ "Tatoeba (csb-eng)": 5.69,
+ "Tatoeba (cym-eng)": 8.4,
+ "Tatoeba (dan-eng)": 6.99,
+ "Tatoeba (deu-eng)": 3.67,
+ "Tatoeba (dsb-eng)": 5.33,
+ "Tatoeba (dtp-eng)": 4.25,
+ "Tatoeba (ell-eng)": 0.63,
+ "Tatoeba (epo-eng)": 2.45,
+ "Tatoeba (est-eng)": 2.69,
+ "Tatoeba (eus-eng)": 4.69,
+ "Tatoeba (fao-eng)": 7.61,
+ "Tatoeba (fin-eng)": 3.36,
+ "Tatoeba (fra-eng)": 7.0,
+ "Tatoeba (fry-eng)": 12.36,
+ "Tatoeba (gla-eng)": 3.07,
+ "Tatoeba (gle-eng)": 4.81,
+ "Tatoeba (glg-eng)": 8.12,
+ "Tatoeba (gsw-eng)": 18.87,
+ "Tatoeba (heb-eng)": 0.68,
+ "Tatoeba (hin-eng)": 0.1,
+ "Tatoeba (hrv-eng)": 5.41,
+ "Tatoeba (hsb-eng)": 6.32,
+ "Tatoeba (hun-eng)": 3.42,
+ "Tatoeba (hye-eng)": 0.97,
+ "Tatoeba (ido-eng)": 7.1,
+ "Tatoeba (ile-eng)": 13.61,
+ "Tatoeba (ina-eng)": 8.57,
+ "Tatoeba (ind-eng)": 7.26,
+ "Tatoeba (isl-eng)": 4.09,
+ "Tatoeba (ita-eng)": 5.54,
+ "Tatoeba (jav-eng)": 11.43,
+ "Tatoeba (jpn-eng)": 0.2,
+ "Tatoeba (kab-eng)": 2.71,
+ "Tatoeba (kat-eng)": 1.11,
+ "Tatoeba (kaz-eng)": 1.17,
+ "Tatoeba (khm-eng)": 0.55,
+ "Tatoeba (kor-eng)": 0.5,
+ "Tatoeba (kur-eng)": 8.55,
+ "Tatoeba (kzj-eng)": 4.61,
+ "Tatoeba (lat-eng)": 4.07,
+ "Tatoeba (lfn-eng)": 2.83,
+ "Tatoeba (lit-eng)": 0.95,
+ "Tatoeba (lvs-eng)": 3.25,
+ "Tatoeba (mal-eng)": 0.29,
+ "Tatoeba (mar-eng)": 0.2,
+ "Tatoeba (max-eng)": 14.53,
+ "Tatoeba (mhr-eng)": 0.2,
+ "Tatoeba (mkd-eng)": 0.2,
+ "Tatoeba (mon-eng)": 1.1,
+ "Tatoeba (nds-eng)": 10.37,
+ "Tatoeba (nld-eng)": 9.5,
+ "Tatoeba (nno-eng)": 4.49,
+ "Tatoeba (nob-eng)": 4.95,
+ "Tatoeba (nov-eng)": 14.53,
+ "Tatoeba (oci-eng)": 5.8,
+ "Tatoeba (orv-eng)": 0.24,
+ "Tatoeba (pam-eng)": 6.65,
+ "Tatoeba (pes-eng)": 0.5,
+ "Tatoeba (pms-eng)": 8.05,
+ "Tatoeba (pol-eng)": 5.13,
+ "Tatoeba (por-eng)": 5.87,
+ "Tatoeba (ron-eng)": 6.76,
+ "Tatoeba (rus-eng)": 0.2,
+ "Tatoeba (slk-eng)": 4.23,
+ "Tatoeba (slv-eng)": 6.05,
+ "Tatoeba (spa-eng)": 5.03,
+ "Tatoeba (sqi-eng)": 4.36,
+ "Tatoeba (srp-eng)": 1.77,
+ "Tatoeba (swe-eng)": 6.72,
+ "Tatoeba (swg-eng)": 8.54,
+ "Tatoeba (swh-eng)": 11.49,
+ "Tatoeba (tam-eng)": 1.3,
+ "Tatoeba (tat-eng)": 0.77,
+ "Tatoeba (tel-eng)": 0.85,
+ "Tatoeba (tgl-eng)": 2.61,
+ "Tatoeba (tha-eng)": 0.69,
+ "Tatoeba (tuk-eng)": 5.76,
+ "Tatoeba (tur-eng)": 5.24,
+ "Tatoeba (tzl-eng)": 15.51,
+ "Tatoeba (uig-eng)": 0.6,
+ "Tatoeba (ukr-eng)": 1.23,
+ "Tatoeba (urd-eng)": 0.4,
+ "Tatoeba (uzb-eng)": 4.73,
+ "Tatoeba (vie-eng)": 6.55,
+ "Tatoeba (war-eng)": 4.12,
+ "Tatoeba (wuu-eng)": 0.2,
+ "Tatoeba (xho-eng)": 4.33,
+ "Tatoeba (yid-eng)": 0.59,
+ "Tatoeba (yue-eng)": 0.5,
+ "Tatoeba (zsm-eng)": 7.27
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "text2vec-base-chinese",
- "AmazonReviewsClassification (zh)": 34.12,
- "IFlyTek": 42.05,
- "JDReview": 82.14,
- "MassiveIntentClassification (zh-CN)": 63.98,
- "MassiveScenarioClassification (zh-CN)": 70.52,
- "MultilingualSentiment": 60.98,
- "OnlineShopping": 85.69,
- "TNews": 43.01,
- "Waimai": 77.22
+ "Model": "komninos",
+ "AmazonCounterfactualClassification (en)": 60.54,
+ "AmazonPolarityClassification": 59.59,
+ "AmazonReviewsClassification (en)": 31.01,
+ "Banking77Classification": 67.05,
+ "EmotionClassification": 33.18,
+ "ImdbClassification": 63.98,
+ "MTOPDomainClassification (en)": 78.57,
+ "MTOPIntentClassification (en)": 57.07,
+ "MassiveIntentClassification (en)": 57.21,
+ "MassiveScenarioClassification (en)": 66.11,
+ "ToxicConversationsClassification": 67.76,
+ "TweetSentimentExtractionClassification": 49.68
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "text2vec-base-chinese",
- "CLSClusteringP2P": 35.27,
- "CLSClusteringS2S": 32.42,
- "ThuNewsClusteringP2P": 42.92,
- "ThuNewsClusteringS2S": 40.01
+ "Model": "komninos",
+ "ArxivClusteringP2P": 34.73,
+ "ArxivClusteringS2S": 26.01,
+ "BiorxivClusteringP2P": 29.76,
+ "BiorxivClusteringS2S": 20.71,
+ "BlurbsClusteringP2P": 11.37,
+ "BlurbsClusteringS2S": 8.01,
+ "MedrxivClusteringP2P": 26.65,
+ "MedrxivClusteringS2S": 21.5,
+ "RedditClustering": 28.84,
+ "RedditClusteringP2P": 7.37,
+ "StackExchangeClustering": 39.04,
+ "StackExchangeClusteringP2P": 30.23,
+ "TenKGnadClusteringP2P": 15.89,
+ "TenKGnadClusteringS2S": 4.84,
+ "TwentyNewsgroupsClustering": 27.42
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "text2vec-base-chinese",
- "Cmnli": 73.87,
- "Ocnli": 60.95
+ "Model": "komninos",
+ "SprintDuplicateQuestions": 85.55,
+ "TwitterSemEval2015": 53.85,
+ "TwitterURLCorpus": 79.41
}
]
},
"Reranking": {
"map": [
{
- "Model": "text2vec-base-chinese",
- "CMedQAv1": 59.26,
- "CMedQAv2": 59.82,
- "MMarcoReranking": 12.76,
- "T2Reranking": 65.95
+ "Model": "komninos",
+ "AskUbuntuDupQuestions": 50.88,
+ "MindSmallReranking": 28.92,
+ "SciDocsRR": 63.55,
+ "StackOverflowDupQuestions": 35.65
}
]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "text2vec-base-chinese",
- "CmedqaRetrieval": 15.91,
- "CovidRetrieval": 44.81,
- "DuRetrieval": 52.23,
- "EcomRetrieval": 34.6,
- "MMarcoRetrieval": 44.06,
- "MedicalRetrieval": 27.56,
- "T2Retrieval": 51.67,
- "VideoRetrieval": 39.52
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "komninos",
+ "ArguAna": 30.96,
+ "CQADupstackRetrieval": 16.79,
+ "ClimateFEVER": 14.87,
+ "DBPedia": 15.88,
+ "FEVER": 15.56,
+ "FiQA2018": 10.49,
+ "HotpotQA": 20.77,
+ "MSMARCO": 9.75,
+ "NFCorpus": 11.79,
+ "NQ": 12.75,
+ "QuoraRetrieval": 71.57,
+ "SCIDOCS": 8.47,
+ "SciFact": 29.53,
+ "TRECCOVID": 35.92,
+ "Touche2020": 13.17
}
]
},
"STS": {
"spearman": [
{
- "Model": "text2vec-base-chinese",
- "AFQMC": 26.06,
- "ATEC": 31.93,
- "BQ": 42.67,
- "LCQMC": 70.16,
- "PAWSX": 17.21,
- "QBQTC": 24.62,
- "STS22 (zh)": 55.35,
- "STSB": 79.3
+ "Model": "komninos",
+ "BIOSSES": 50.25,
+ "SICK-R": 55.49,
+ "STS12": 53.51,
+ "STS13": 70.8,
+ "STS14": 63.56,
+ "STS15": 74.08,
+ "STS16": 64.6,
+ "STS17 (ar-ar)": 13.78,
+ "STS17 (en-ar)": 9.08,
+ "STS17 (en-de)": -3.11,
+ "STS17 (en-en)": 76.91,
+ "STS17 (en-tr)": -0.45,
+ "STS17 (es-en)": -8.18,
+ "STS17 (es-es)": 48.23,
+ "STS17 (fr-en)": 5.81,
+ "STS17 (it-en)": 3.64,
+ "STS17 (ko-ko)": 2.54,
+ "STS17 (nl-en)": 0.44,
+ "STS22 (ar)": 32.42,
+ "STS22 (de)": 33.04,
+ "STS22 (de-en)": 28.65,
+ "STS22 (de-fr)": 14.77,
+ "STS22 (de-pl)": 11.21,
+ "STS22 (en)": 53.89,
+ "STS22 (es)": 48.53,
+ "STS22 (es-en)": 26.97,
+ "STS22 (es-it)": 41.1,
+ "STS22 (fr)": 49.43,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (it)": 57.77,
+ "STS22 (pl)": 12.47,
+ "STS22 (pl-en)": 45.55,
+ "STS22 (ru)": 19.44,
+ "STS22 (tr)": 47.38,
+ "STS22 (zh)": 4.78,
+ "STS22 (zh-en)": 14.05,
+ "STSBenchmark": 61.55
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "text2vec-base-chinese"
+ "Model": "komninos",
+ "SummEval": 30.49
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "text2vec-base-chinese"
+ "Model": "komninos"
}
]
}
},
- "herbert-base-retrieval-v2": {
+ "voyage-lite-01-instruct": {
"BitextMining": {
"f1": [
{
- "Model": "herbert-base-retrieval-v2"
+ "Model": "voyage-lite-01-instruct"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "herbert-base-retrieval-v2",
- "AllegroReviews": 34.11,
- "CBD": 68.35,
- "MassiveIntentClassification (pl)": 65.53,
- "MassiveScenarioClassification (pl)": 68.51,
- "PAC": 68.4,
- "PolEmo2.0-IN": 64.18,
- "PolEmo2.0-OUT": 45.73
+ "Model": "voyage-lite-01-instruct",
+ "AmazonCounterfactualClassification (en)": 71.43,
+ "AmazonPolarityClassification": 96.41,
+ "AmazonReviewsClassification (en)": 57.06,
+ "Banking77Classification": 81.64,
+ "EmotionClassification": 48.29,
+ "ImdbClassification": 95.49,
+ "MTOPDomainClassification (en)": 96.3,
+ "MTOPIntentClassification (en)": 67.93,
+ "MassiveIntentClassification (en)": 71.29,
+ "MassiveScenarioClassification (en)": 76.74,
+ "ToxicConversationsClassification": 75.45,
+ "TweetSentimentExtractionClassification": 59.44
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "herbert-base-retrieval-v2",
- "8TagsClustering": 28.15
+ "Model": "voyage-lite-01-instruct",
+ "ArxivClusteringP2P": 47.92,
+ "ArxivClusteringS2S": 42.42,
+ "BiorxivClusteringP2P": 38.72,
+ "BiorxivClusteringS2S": 36.6,
+ "MedrxivClusteringP2P": 34.04,
+ "MedrxivClusteringS2S": 32.81,
+ "RedditClustering": 61.56,
+ "RedditClusteringP2P": 65.35,
+ "StackExchangeClustering": 70.16,
+ "StackExchangeClusteringP2P": 38.23,
+ "TwentyNewsgroupsClustering": 53.56
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "herbert-base-retrieval-v2",
- "CDSC-E": 63.31,
- "PPC": 84.18,
- "PSC": 98.87,
- "SICK-E-PL": 54.93
+ "Model": "voyage-lite-01-instruct",
+ "SprintDuplicateQuestions": 96.01,
+ "TwitterSemEval2015": 76.87,
+ "TwitterURLCorpus": 86.84
}
]
},
"Reranking": {
"map": [
{
- "Model": "herbert-base-retrieval-v2"
+ "Model": "voyage-lite-01-instruct",
+ "AskUbuntuDupQuestions": 65.77,
+ "MindSmallReranking": 31.69,
+ "SciDocsRR": 87.03,
+ "StackOverflowDupQuestions": 54.49
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "herbert-base-retrieval-v2",
- "ArguAna-PL": 41.97,
- "DBPedia-PL": 24.07,
- "FiQA-PL": 24.25,
- "HotpotQA-PL": 43.41,
- "MSMARCO-PL": 51.56,
- "NFCorpus-PL": 25.95,
- "NQ-PL": 35.09,
- "Quora-PL": 78.86,
- "SCIDOCS-PL": 11.0,
- "SciFact-PL": 51.92,
- "TRECCOVID-PL": 42.64
+ "Model": "voyage-lite-01-instruct",
+ "ArguAna": 58.73,
+ "CQADupstackRetrieval": 45.11,
+ "ClimateFEVER": 37.47,
+ "DBPedia": 43.42,
+ "FEVER": 89.71,
+ "FiQA2018": 44.79,
+ "HotpotQA": 70.46,
+ "MSMARCO": 39.66,
+ "NFCorpus": 43.33,
+ "NQ": 60.65,
+ "QuoraRetrieval": 87.83,
+ "SCIDOCS": 23.19,
+ "SciFact": 73.64,
+ "TRECCOVID": 78.92,
+ "Touche2020": 36.83
}
]
},
"STS": {
"spearman": [
{
- "Model": "herbert-base-retrieval-v2",
- "CDSC-R": 86.18,
- "SICK-R-PL": 64.67,
- "STS22 (pl)": 39.73
+ "Model": "voyage-lite-01-instruct",
+ "BIOSSES": 84.85,
+ "SICK-R": 79.71,
+ "STS12": 77.09,
+ "STS13": 88.91,
+ "STS14": 82.08,
+ "STS15": 89.21,
+ "STS16": 84.74,
+ "STS17 (en-en)": 90.73,
+ "STS22 (en)": 62.1,
+ "STSBenchmark": 89.86
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "herbert-base-retrieval-v2"
+ "Model": "voyage-lite-01-instruct",
+ "SummEval": 30.97
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "herbert-base-retrieval-v2"
+ "Model": "voyage-lite-01-instruct"
}
]
}
},
- "voyage-law-2": {
+ "nomic-embed-text-v1.5-256": {
"BitextMining": {
"f1": [
{
- "Model": "voyage-law-2"
+ "Model": "nomic-embed-text-v1.5-256"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "voyage-law-2",
- "AmazonReviewsClassification (fr)": 41.98,
- "MTOPDomainClassification (fr)": 90.12,
- "MTOPIntentClassification (fr)": 62.44,
- "MasakhaNEWSClassification (fra)": 76.42,
- "MassiveIntentClassification (fr)": 66.94,
- "MassiveScenarioClassification (fr)": 72.78
+ "Model": "nomic-embed-text-v1.5-256",
+ "AmazonCounterfactualClassification (en)": 72.94,
+ "AmazonPolarityClassification": 91.35,
+ "AmazonReviewsClassification (en)": 45.73,
+ "Banking77Classification": 83.69,
+ "EmotionClassification": 45.88,
+ "ImdbClassification": 83.99,
+ "MTOPDomainClassification (en)": 91.68,
+ "MTOPIntentClassification (en)": 72.47,
+ "MassiveIntentClassification (en)": 71.76,
+ "MassiveScenarioClassification (en)": 75.67,
+ "ToxicConversationsClassification": 70.87,
+ "TweetSentimentExtractionClassification": 59.2
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "voyage-law-2",
- "AlloProfClusteringP2P": 62.5,
- "AlloProfClusteringS2S": 44.28,
- "HALClusteringS2S": 26.36,
- "MLSUMClusteringP2P (fr)": 44.03,
- "MLSUMClusteringS2S (fr)": 42.95,
- "MasakhaNEWSClusteringP2P (fra)": 50.68,
- "MasakhaNEWSClusteringS2S (fra)": 38.79
+ "Model": "nomic-embed-text-v1.5-256",
+ "ArxivClusteringP2P": 44.82,
+ "ArxivClusteringS2S": 35.32,
+ "BiorxivClusteringP2P": 38.19,
+ "BiorxivClusteringS2S": 31.83,
+ "MedrxivClusteringP2P": 34.08,
+ "MedrxivClusteringS2S": 30.98,
+ "RedditClustering": 54.92,
+ "RedditClusteringP2P": 60.23,
+ "StackExchangeClustering": 61.81,
+ "StackExchangeClusteringP2P": 34.03,
+ "TwentyNewsgroupsClustering": 48.56
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "voyage-law-2",
- "OpusparcusPC (fr)": 93.06,
- "PawsXPairClassification (fr)": 61.54
+ "Model": "nomic-embed-text-v1.5-256",
+ "SprintDuplicateQuestions": 92.31,
+ "TwitterSemEval2015": 73.61,
+ "TwitterURLCorpus": 86.34
}
]
},
"Reranking": {
"map": [
{
- "Model": "voyage-law-2",
- "AlloprofReranking": 72.92,
- "SyntecReranking": 91.2
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "voyage-law-2",
- "AILACasedocs": 44.56,
- "AILAStatutes": 45.51,
- "AlloprofRetrieval": 57.28,
- "BSARDRetrieval": 11.83,
- "GerDaLIRSmall": 44.91,
- "LEMBNarrativeQARetrieval": 55.78,
- "LEMBNeedleRetrieval": 80.5,
- "LEMBPasskeyRetrieval": 93.75,
- "LEMBQMSumRetrieval": 57.26,
- "LEMBSummScreenFDRetrieval": 98.72,
- "LEMBWikimQARetrieval": 87.08,
- "LeCaRDv2": 72.75,
- "LegalBenchConsumerContractsQA": 83.27,
- "LegalBenchCorporateLobbying": 95.66,
- "LegalQuAD": 67.47,
- "LegalSummarization": 68.96,
- "MintakaRetrieval (fr)": 34.92,
- "SyntecRetrieval": 87.33,
- "XPQARetrieval (fr)": 73.56
+ "Model": "nomic-embed-text-v1.5-256",
+ "AskUbuntuDupQuestions": 61.34,
+ "MindSmallReranking": 30.04,
+ "SciDocsRR": 79.4,
+ "StackOverflowDupQuestions": 49.95
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "nomic-embed-text-v1.5-256",
+ "ArguAna": 45.44,
+ "CQADupstackRetrieval": 37.61,
+ "ClimateFEVER": 39.63,
+ "DBPedia": 39.42,
+ "FEVER": 84.4,
+ "FiQA2018": 35.0,
+ "HotpotQA": 67.78,
+ "MSMARCO": 41.38,
+ "NFCorpus": 32.54,
+ "NQ": 57.1,
+ "QuoraRetrieval": 87.65,
+ "SCIDOCS": 16.76,
+ "SciFact": 68.24,
+ "TRECCOVID": 80.65,
+ "Touche2020": 28.49
}
]
},
"STS": {
"spearman": [
{
- "Model": "voyage-law-2",
- "SICKFr": 74.09,
- "STS22 (fr)": 83.75,
- "STSBenchmarkMultilingualSTS (fr)": 83.02
+ "Model": "nomic-embed-text-v1.5-256",
+ "BIOSSES": 81.58,
+ "SICK-R": 79.24,
+ "STS12": 78.16,
+ "STS13": 86.01,
+ "STS14": 81.25,
+ "STS15": 86.51,
+ "STS16": 84.24,
+ "STS17 (en-en)": 86.44,
+ "STS22 (en)": 65.14,
+ "STSBenchmark": 84.8
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "voyage-law-2",
- "SummEvalFr": 30.34
+ "Model": "nomic-embed-text-v1.5-256",
+ "SummEval": 30.05
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "voyage-law-2"
+ "Model": "nomic-embed-text-v1.5-256"
}
]
}
},
- "bert-base-multilingual-cased": {
+ "e5-mistral-7b-instruct-noinstruct": {
"BitextMining": {
"f1": [
{
- "Model": "bert-base-multilingual-cased"
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "bert-base-multilingual-cased",
- "AmazonReviewsClassification (fr)": 29.39,
- "MTOPDomainClassification (fr)": 63.61,
- "MTOPIntentClassification (fr)": 37.84,
- "MasakhaNEWSClassification (fra)": 64.0,
- "MassiveIntentClassification (fr)": 37.3,
- "MassiveScenarioClassification (fr)": 44.47
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "bert-base-multilingual-cased",
- "AlloProfClusteringP2P": 51.5,
- "AlloProfClusteringS2S": 43.06,
- "HALClusteringS2S": 20.81,
- "MLSUMClusteringP2P": 40.9,
- "MLSUMClusteringS2S": 31.8,
- "MasakhaNEWSClusteringP2P (fra)": 24.23,
- "MasakhaNEWSClusteringS2S (fra)": 24.46
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "bert-base-multilingual-cased",
- "OpusparcusPC (fr)": 86.77,
- "PawsXPairClassification (fr)": 53.39
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"Reranking": {
"map": [
{
- "Model": "bert-base-multilingual-cased",
- "AlloprofReranking": 36.23,
- "SyntecReranking": 53.25
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bert-base-multilingual-cased",
- "AlloprofRetrieval": 1.63,
- "BSARDRetrieval": 0.0,
- "MintakaRetrieval (fr)": 3.55,
- "SyntecRetrieval": 18.95,
- "XPQARetrieval (fr)": 18.49
+ "Model": "e5-mistral-7b-instruct-noinstruct",
+ "ARCChallenge": 20.48,
+ "AlphaNLI": 18.88,
+ "HellaSwag": 32.25,
+ "PIQA": 32.8,
+ "Quail": 6.25,
+ "RARbCode": 79.84,
+ "RARbMath": 76.19,
+ "SIQA": 5.08,
+ "SpartQA": 10.87,
+ "TempReasonL1": 3.04,
+ "TempReasonL2Fact": 35.63,
+ "TempReasonL2Pure": 9.32,
+ "TempReasonL3Fact": 30.41,
+ "TempReasonL3Pure": 14.39,
+ "WinoGrande": 45.18
}
]
},
"STS": {
"spearman": [
{
- "Model": "bert-base-multilingual-cased",
- "SICKFr": 58.75,
- "STS22 (fr)": 39.05,
- "STSBenchmarkMultilingualSTS (fr)": 52.25
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "bert-base-multilingual-cased",
- "SummEvalFr": 28.81
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "bert-base-multilingual-cased"
+ "Model": "e5-mistral-7b-instruct-noinstruct"
}
]
}
},
- "LLM2Vec-Meta-Llama-3-unsupervised": {
+ "e5-base-4k": {
"BitextMining": {
"f1": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised"
+ "Model": "e5-base-4k"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "AmazonCounterfactualClassification (en)": 75.7,
- "AmazonPolarityClassification": 80.68,
- "AmazonReviewsClassification (en)": 40.0,
- "Banking77Classification": 84.77,
- "EmotionClassification": 47.08,
- "ImdbClassification": 75.19,
- "MTOPDomainClassification (en)": 94.47,
- "MTOPIntentClassification (en)": 81.09,
- "MassiveIntentClassification (en)": 75.01,
- "MassiveScenarioClassification (en)": 79.16,
- "ToxicConversationsClassification": 71.85,
- "TweetSentimentExtractionClassification": 57.61
+ "Model": "e5-base-4k"
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "ArxivClusteringP2P": 49.22,
- "ArxivClusteringS2S": 41.71,
- "BiorxivClusteringP2P": 38.39,
- "BiorxivClusteringS2S": 31.31,
- "MedrxivClusteringP2P": 31.47,
- "MedrxivClusteringS2S": 27.87,
- "RedditClustering": 43.67,
- "RedditClusteringP2P": 61.67,
- "StackExchangeClustering": 68.2,
- "StackExchangeClusteringP2P": 36.36,
- "TwentyNewsgroupsClustering": 32.01
+ "Model": "e5-base-4k"
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "SprintDuplicateQuestions": 88.14,
- "TwitterSemEval2015": 66.6,
- "TwitterURLCorpus": 79.3
+ "Model": "e5-base-4k"
}
]
},
"Reranking": {
"map": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "AskUbuntuDupQuestions": 57.16,
- "MindSmallReranking": 30.1,
- "SciDocsRR": 76.28,
- "StackOverflowDupQuestions": 48.82
+ "Model": "e5-base-4k"
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "ArguAna": 51.73,
- "CQADupstackRetrieval": 32.4,
- "ClimateFEVER": 23.58,
- "DBPedia": 26.78,
- "FEVER": 53.42,
- "FiQA2018": 28.56,
- "HotpotQA": 52.37,
- "MSMARCO": 17.47,
- "NFCorpus": 26.28,
- "NQ": 37.65,
- "QuoraRetrieval": 84.64,
- "SCIDOCS": 10.39,
- "SciFact": 66.36,
- "TRECCOVID": 63.34,
- "Touche2020": 12.82
+ "Model": "e5-base-4k",
+ "LEMBNarrativeQARetrieval": 30.35,
+ "LEMBNeedleRetrieval": 41.5,
+ "LEMBPasskeyRetrieval": 67.25,
+ "LEMBQMSumRetrieval": 35.6,
+ "LEMBSummScreenFDRetrieval": 95.23,
+ "LEMBWikimQARetrieval": 69.19
}
]
},
"STS": {
"spearman": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "BIOSSES": 84.67,
- "SICK-R": 72.16,
- "STS12": 61.6,
- "STS13": 79.71,
- "STS14": 72.11,
- "STS15": 82.18,
- "STS16": 79.41,
- "STS17 (en-en)": 85.44,
- "STS22 (en)": 63.9,
- "STSBenchmark": 77.44
+ "Model": "e5-base-4k"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised",
- "SummEval": 31.45
+ "Model": "e5-base-4k"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "LLM2Vec-Meta-Llama-3-unsupervised"
+ "Model": "e5-base-4k"
}
]
}
},
- "text-search-davinci-001": {
+ "sentence-t5-xl": {
"BitextMining": {
"f1": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "BUCC (de-en)": 95.04,
+ "BUCC (fr-en)": 94.96,
+ "BUCC (ru-en)": 8.33,
+ "BUCC (zh-en)": 1.3,
+ "Tatoeba (afr-eng)": 41.84,
+ "Tatoeba (amh-eng)": 0.03,
+ "Tatoeba (ang-eng)": 37.87,
+ "Tatoeba (ara-eng)": 0.61,
+ "Tatoeba (arq-eng)": 0.74,
+ "Tatoeba (arz-eng)": 0.42,
+ "Tatoeba (ast-eng)": 65.41,
+ "Tatoeba (awa-eng)": 1.46,
+ "Tatoeba (aze-eng)": 8.79,
+ "Tatoeba (bel-eng)": 5.76,
+ "Tatoeba (ben-eng)": 0.01,
+ "Tatoeba (ber-eng)": 5.92,
+ "Tatoeba (bos-eng)": 16.12,
+ "Tatoeba (bre-eng)": 6.12,
+ "Tatoeba (bul-eng)": 9.06,
+ "Tatoeba (cat-eng)": 57.4,
+ "Tatoeba (cbk-eng)": 57.68,
+ "Tatoeba (ceb-eng)": 12.56,
+ "Tatoeba (ces-eng)": 9.47,
+ "Tatoeba (cha-eng)": 27.13,
+ "Tatoeba (cmn-eng)": 1.82,
+ "Tatoeba (cor-eng)": 3.87,
+ "Tatoeba (csb-eng)": 14.41,
+ "Tatoeba (cym-eng)": 6.69,
+ "Tatoeba (dan-eng)": 54.87,
+ "Tatoeba (deu-eng)": 93.72,
+ "Tatoeba (dsb-eng)": 14.74,
+ "Tatoeba (dtp-eng)": 5.84,
+ "Tatoeba (ell-eng)": 0.6,
+ "Tatoeba (epo-eng)": 30.8,
+ "Tatoeba (est-eng)": 5.39,
+ "Tatoeba (eus-eng)": 11.9,
+ "Tatoeba (fao-eng)": 28.08,
+ "Tatoeba (fin-eng)": 6.81,
+ "Tatoeba (fra-eng)": 85.29,
+ "Tatoeba (fry-eng)": 38.68,
+ "Tatoeba (gla-eng)": 2.96,
+ "Tatoeba (gle-eng)": 3.74,
+ "Tatoeba (glg-eng)": 70.0,
+ "Tatoeba (gsw-eng)": 30.49,
+ "Tatoeba (heb-eng)": 0.87,
+ "Tatoeba (hin-eng)": 0.1,
+ "Tatoeba (hrv-eng)": 17.43,
+ "Tatoeba (hsb-eng)": 14.69,
+ "Tatoeba (hun-eng)": 7.28,
+ "Tatoeba (hye-eng)": 0.77,
+ "Tatoeba (ido-eng)": 46.65,
+ "Tatoeba (ile-eng)": 59.43,
+ "Tatoeba (ina-eng)": 82.71,
+ "Tatoeba (ind-eng)": 37.26,
+ "Tatoeba (isl-eng)": 11.21,
+ "Tatoeba (ita-eng)": 79.77,
+ "Tatoeba (jav-eng)": 7.81,
+ "Tatoeba (jpn-eng)": 0.91,
+ "Tatoeba (kab-eng)": 2.23,
+ "Tatoeba (kat-eng)": 1.48,
+ "Tatoeba (kaz-eng)": 1.77,
+ "Tatoeba (khm-eng)": 0.38,
+ "Tatoeba (kor-eng)": 1.96,
+ "Tatoeba (kur-eng)": 12.11,
+ "Tatoeba (kzj-eng)": 6.13,
+ "Tatoeba (lat-eng)": 27.84,
+ "Tatoeba (lfn-eng)": 45.89,
+ "Tatoeba (lit-eng)": 5.94,
+ "Tatoeba (lvs-eng)": 8.11,
+ "Tatoeba (mal-eng)": 0.59,
+ "Tatoeba (mar-eng)": 0.03,
+ "Tatoeba (max-eng)": 21.7,
+ "Tatoeba (mhr-eng)": 0.68,
+ "Tatoeba (mkd-eng)": 5.92,
+ "Tatoeba (mon-eng)": 2.39,
+ "Tatoeba (nds-eng)": 45.04,
+ "Tatoeba (nld-eng)": 64.75,
+ "Tatoeba (nno-eng)": 36.74,
+ "Tatoeba (nob-eng)": 54.77,
+ "Tatoeba (nov-eng)": 57.12,
+ "Tatoeba (oci-eng)": 34.39,
+ "Tatoeba (orv-eng)": 2.04,
+ "Tatoeba (pam-eng)": 8.34,
+ "Tatoeba (pes-eng)": 0.87,
+ "Tatoeba (pms-eng)": 38.06,
+ "Tatoeba (pol-eng)": 28.35,
+ "Tatoeba (por-eng)": 83.61,
+ "Tatoeba (ron-eng)": 65.27,
+ "Tatoeba (rus-eng)": 30.42,
+ "Tatoeba (slk-eng)": 13.19,
+ "Tatoeba (slv-eng)": 13.49,
+ "Tatoeba (spa-eng)": 89.18,
+ "Tatoeba (sqi-eng)": 14.66,
+ "Tatoeba (srp-eng)": 13.24,
+ "Tatoeba (swe-eng)": 60.67,
+ "Tatoeba (swg-eng)": 34.76,
+ "Tatoeba (swh-eng)": 8.07,
+ "Tatoeba (tam-eng)": 0.36,
+ "Tatoeba (tat-eng)": 1.46,
+ "Tatoeba (tel-eng)": 0.67,
+ "Tatoeba (tgl-eng)": 25.22,
+ "Tatoeba (tha-eng)": 1.58,
+ "Tatoeba (tuk-eng)": 4.99,
+ "Tatoeba (tur-eng)": 7.72,
+ "Tatoeba (tzl-eng)": 38.49,
+ "Tatoeba (uig-eng)": 0.87,
+ "Tatoeba (ukr-eng)": 9.12,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (uzb-eng)": 5.48,
+ "Tatoeba (vie-eng)": 8.45,
+ "Tatoeba (war-eng)": 13.75,
+ "Tatoeba (wuu-eng)": 1.44,
+ "Tatoeba (xho-eng)": 9.15,
+ "Tatoeba (yid-eng)": 0.28,
+ "Tatoeba (yue-eng)": 0.98,
+ "Tatoeba (zsm-eng)": 35.71
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "AmazonCounterfactualClassification (de)": 67.01,
+ "AmazonCounterfactualClassification (en)": 76.01,
+ "AmazonCounterfactualClassification (en-ext)": 77.29,
+ "AmazonCounterfactualClassification (ja)": 45.61,
+ "AmazonPolarityClassification": 93.17,
+ "AmazonReviewsClassification (de)": 44.05,
+ "AmazonReviewsClassification (en)": 48.18,
+ "AmazonReviewsClassification (es)": 45.01,
+ "AmazonReviewsClassification (fr)": 43.52,
+ "AmazonReviewsClassification (ja)": 22.23,
+ "AmazonReviewsClassification (zh)": 21.88,
+ "Banking77Classification": 80.88,
+ "EmotionClassification": 51.95,
+ "ImdbClassification": 87.54,
+ "MTOPDomainClassification (de)": 83.28,
+ "MTOPDomainClassification (en)": 90.73,
+ "MTOPDomainClassification (es)": 85.32,
+ "MTOPDomainClassification (fr)": 85.14,
+ "MTOPDomainClassification (hi)": 20.85,
+ "MTOPDomainClassification (th)": 15.62,
+ "MTOPIntentClassification (de)": 54.65,
+ "MTOPIntentClassification (en)": 68.15,
+ "MTOPIntentClassification (es)": 57.38,
+ "MTOPIntentClassification (fr)": 54.39,
+ "MTOPIntentClassification (hi)": 3.28,
+ "MTOPIntentClassification (th)": 5.08,
+ "MasakhaNEWSClassification (fra)": 80.09,
+ "MassiveIntentClassification (af)": 40.17,
+ "MassiveIntentClassification (am)": 2.18,
+ "MassiveIntentClassification (ar)": 4.18,
+ "MassiveIntentClassification (az)": 30.02,
+ "MassiveIntentClassification (bn)": 2.6,
+ "MassiveIntentClassification (cy)": 29.15,
+ "MassiveIntentClassification (da)": 47.69,
+ "MassiveIntentClassification (de)": 57.43,
+ "MassiveIntentClassification (el)": 9.96,
+ "MassiveIntentClassification (en)": 72.09,
+ "MassiveIntentClassification (es)": 57.97,
+ "MassiveIntentClassification (fa)": 3.6,
+ "MassiveIntentClassification (fi)": 34.02,
+ "MassiveIntentClassification (fr)": 60.99,
+ "MassiveIntentClassification (he)": 2.51,
+ "MassiveIntentClassification (hi)": 3.02,
+ "MassiveIntentClassification (hu)": 31.66,
+ "MassiveIntentClassification (hy)": 3.32,
+ "MassiveIntentClassification (id)": 41.53,
+ "MassiveIntentClassification (is)": 30.25,
+ "MassiveIntentClassification (it)": 56.57,
+ "MassiveIntentClassification (ja)": 3.5,
+ "MassiveIntentClassification (jv)": 31.67,
+ "MassiveIntentClassification (ka)": 2.79,
+ "MassiveIntentClassification (km)": 5.43,
+ "MassiveIntentClassification (kn)": 2.79,
+ "MassiveIntentClassification (ko)": 2.67,
+ "MassiveIntentClassification (lv)": 34.25,
+ "MassiveIntentClassification (ml)": 2.98,
+ "MassiveIntentClassification (mn)": 20.99,
+ "MassiveIntentClassification (ms)": 37.43,
+ "MassiveIntentClassification (my)": 4.02,
+ "MassiveIntentClassification (nb)": 45.91,
+ "MassiveIntentClassification (nl)": 50.51,
+ "MassiveIntentClassification (pl)": 43.95,
+ "MassiveIntentClassification (pt)": 57.95,
+ "MassiveIntentClassification (ro)": 49.37,
+ "MassiveIntentClassification (ru)": 33.46,
+ "MassiveIntentClassification (sl)": 36.33,
+ "MassiveIntentClassification (sq)": 37.65,
+ "MassiveIntentClassification (sv)": 46.35,
+ "MassiveIntentClassification (sw)": 30.6,
+ "MassiveIntentClassification (ta)": 1.79,
+ "MassiveIntentClassification (te)": 2.26,
+ "MassiveIntentClassification (th)": 4.02,
+ "MassiveIntentClassification (tl)": 38.92,
+ "MassiveIntentClassification (tr)": 32.05,
+ "MassiveIntentClassification (ur)": 2.7,
+ "MassiveIntentClassification (vi)": 21.47,
+ "MassiveIntentClassification (zh-CN)": 0.59,
+ "MassiveIntentClassification (zh-TW)": 3.24,
+ "MassiveScenarioClassification (af)": 50.81,
+ "MassiveScenarioClassification (am)": 6.95,
+ "MassiveScenarioClassification (ar)": 12.32,
+ "MassiveScenarioClassification (az)": 38.79,
+ "MassiveScenarioClassification (bn)": 8.0,
+ "MassiveScenarioClassification (cy)": 33.91,
+ "MassiveScenarioClassification (da)": 55.79,
+ "MassiveScenarioClassification (de)": 65.33,
+ "MassiveScenarioClassification (el)": 16.89,
+ "MassiveScenarioClassification (en)": 73.26,
+ "MassiveScenarioClassification (es)": 62.52,
+ "MassiveScenarioClassification (fa)": 6.08,
+ "MassiveScenarioClassification (fi)": 43.34,
+ "MassiveScenarioClassification (fr)": 66.42,
+ "MassiveScenarioClassification (he)": 7.55,
+ "MassiveScenarioClassification (hi)": 7.44,
+ "MassiveScenarioClassification (hu)": 40.85,
+ "MassiveScenarioClassification (hy)": 9.25,
+ "MassiveScenarioClassification (id)": 51.92,
+ "MassiveScenarioClassification (is)": 40.09,
+ "MassiveScenarioClassification (it)": 62.94,
+ "MassiveScenarioClassification (ja)": 7.9,
+ "MassiveScenarioClassification (jv)": 41.33,
+ "MassiveScenarioClassification (ka)": 7.76,
+ "MassiveScenarioClassification (km)": 9.19,
+ "MassiveScenarioClassification (kn)": 8.36,
+ "MassiveScenarioClassification (ko)": 6.13,
+ "MassiveScenarioClassification (lv)": 40.7,
+ "MassiveScenarioClassification (ml)": 6.98,
+ "MassiveScenarioClassification (mn)": 27.0,
+ "MassiveScenarioClassification (ms)": 46.9,
+ "MassiveScenarioClassification (my)": 9.55,
+ "MassiveScenarioClassification (nb)": 53.43,
+ "MassiveScenarioClassification (nl)": 59.65,
+ "MassiveScenarioClassification (pl)": 49.87,
+ "MassiveScenarioClassification (pt)": 62.18,
+ "MassiveScenarioClassification (ro)": 58.22,
+ "MassiveScenarioClassification (ru)": 40.73,
+ "MassiveScenarioClassification (sl)": 43.66,
+ "MassiveScenarioClassification (sq)": 49.25,
+ "MassiveScenarioClassification (sv)": 57.17,
+ "MassiveScenarioClassification (sw)": 40.55,
+ "MassiveScenarioClassification (ta)": 7.46,
+ "MassiveScenarioClassification (te)": 7.03,
+ "MassiveScenarioClassification (th)": 8.52,
+ "MassiveScenarioClassification (tl)": 51.74,
+ "MassiveScenarioClassification (tr)": 43.01,
+ "MassiveScenarioClassification (ur)": 9.61,
+ "MassiveScenarioClassification (vi)": 28.91,
+ "MassiveScenarioClassification (zh-CN)": 5.86,
+ "MassiveScenarioClassification (zh-TW)": 7.14,
+ "ToxicConversationsClassification": 70.95,
+ "TweetSentimentExtractionClassification": 61.21
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "AlloProfClusteringP2P": 60.37,
+ "AlloProfClusteringS2S": 40.76,
+ "ArxivClusteringP2P": 41.62,
+ "ArxivClusteringS2S": 31.17,
+ "BiorxivClusteringP2P": 36.43,
+ "BiorxivClusteringS2S": 26.47,
+ "HALClusteringS2S": 20.28,
+ "MLSUMClusteringP2P": 41.61,
+ "MLSUMClusteringS2S": 33.6,
+ "MasakhaNEWSClusteringP2P (fra)": 62.82,
+ "MasakhaNEWSClusteringS2S (fra)": 31.74,
+ "MedrxivClusteringP2P": 32.3,
+ "MedrxivClusteringS2S": 26.93,
+ "RedditClustering": 57.03,
+ "RedditClusteringP2P": 62.34,
+ "StackExchangeClustering": 67.13,
+ "StackExchangeClusteringP2P": 34.79,
+ "TwentyNewsgroupsClustering": 49.53
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "OpusparcusPC (fr)": 92.48,
+ "PawsXPairClassification (fr)": 62.52,
+ "SprintDuplicateQuestions": 91.44,
+ "TwitterSemEval2015": 80.89,
+ "TwitterURLCorpus": 85.86
}
]
},
"Reranking": {
"map": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "AlloprofReranking": 63.3,
+ "AskUbuntuDupQuestions": 62.86,
+ "MindSmallReranking": 29.77,
+ "SciDocsRR": 75.16,
+ "StackOverflowDupQuestions": 51.05,
+ "SyntecReranking": 83.07
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "text-search-davinci-001",
- "ArguAna": 43.5,
- "ClimateFEVER": 22.3,
- "FEVER": 77.5,
- "FiQA2018": 51.2,
- "HotpotQA": 68.8,
- "NFCorpus": 40.7,
- "QuoraRetrieval": 63.8,
- "SciFact": 75.4,
- "TRECCOVID": 64.9,
- "Touche2020": 29.1
+ "Model": "sentence-t5-xl",
+ "AlloprofRetrieval": 40.38,
+ "ArguAna": 39.4,
+ "BSARDRetrieval": 0.14,
+ "CQADupstackRetrieval": 40.78,
+ "ClimateFEVER": 10.61,
+ "DBPedia": 33.65,
+ "FEVER": 36.12,
+ "FiQA2018": 44.71,
+ "HotpotQA": 37.17,
+ "MSMARCO": 25.17,
+ "MintakaRetrieval (fr)": 31.54,
+ "NFCorpus": 33.18,
+ "NQ": 46.29,
+ "QuoraRetrieval": 85.85,
+ "SCIDOCS": 15.97,
+ "SciFact": 50.91,
+ "SyntecRetrieval": 74.24,
+ "TRECCOVID": 54.77,
+ "Touche2020": 22.51,
+ "XPQARetrieval (fr)": 52.14
}
]
},
"STS": {
"spearman": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "BIOSSES": 73.12,
+ "SICK-R": 79.98,
+ "SICKFr": 75.08,
+ "STS12": 79.02,
+ "STS13": 88.8,
+ "STS14": 84.33,
+ "STS15": 88.89,
+ "STS16": 85.31,
+ "STS17 (ar-ar)": 11.13,
+ "STS17 (en-ar)": -3.93,
+ "STS17 (en-de)": 79.04,
+ "STS17 (en-en)": 88.91,
+ "STS17 (en-tr)": 13.61,
+ "STS17 (es-en)": 71.72,
+ "STS17 (es-es)": 83.42,
+ "STS17 (fr-en)": 71.38,
+ "STS17 (it-en)": 69.5,
+ "STS17 (ko-ko)": 9.61,
+ "STS17 (nl-en)": 66.12,
+ "STS22 (ar)": 29.6,
+ "STS22 (de)": 47.72,
+ "STS22 (de-en)": 49.64,
+ "STS22 (de-fr)": 62.21,
+ "STS22 (de-pl)": 34.34,
+ "STS22 (en)": 64.32,
+ "STS22 (es)": 58.16,
+ "STS22 (es-en)": 69.15,
+ "STS22 (es-it)": 65.26,
+ "STS22 (fr)": 77.49,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (it)": 66.91,
+ "STS22 (pl)": 27.04,
+ "STS22 (pl-en)": 58.85,
+ "STS22 (ru)": 26.63,
+ "STS22 (tr)": 43.36,
+ "STS22 (zh)": 33.55,
+ "STS22 (zh-en)": 29.0,
+ "STSBenchmark": 83.93,
+ "STSBenchmarkMultilingualSTS (fr)": 79.42
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl",
+ "SummEval": 29.91,
+ "SummEvalFr": 31.59
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "text-search-davinci-001"
+ "Model": "sentence-t5-xl"
}
]
}
},
- "m3e-base": {
+ "sentence-camembert-base": {
"BitextMining": {
"f1": [
{
- "Model": "m3e-base"
+ "Model": "sentence-camembert-base"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "m3e-base",
- "AmazonReviewsClassification (zh)": 43.02,
- "IFlyTek": 44.42,
- "JDReview": 85.33,
- "MassiveIntentClassification (zh-CN)": 68.4,
- "MassiveScenarioClassification (zh-CN)": 74.6,
- "MultilingualSentiment": 71.9,
- "OnlineShopping": 87.77,
- "TNews": 48.28,
- "Waimai": 83.99
+ "Model": "sentence-camembert-base",
+ "AmazonReviewsClassification (fr)": 36.03,
+ "MTOPDomainClassification (fr)": 77.1,
+ "MTOPIntentClassification (fr)": 43.44,
+ "MasakhaNEWSClassification (fra)": 70.36,
+ "MassiveIntentClassification (fr)": 51.59,
+ "MassiveScenarioClassification (fr)": 61.28
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "m3e-base",
- "CLSClusteringP2P": 39.81,
- "CLSClusteringS2S": 37.34,
- "ThuNewsClusteringP2P": 59.77,
- "ThuNewsClusteringS2S": 53.78
+ "Model": "sentence-camembert-base",
+ "AlloProfClusteringP2P": 59.09,
+ "AlloProfClusteringS2S": 38.92,
+ "HALClusteringS2S": 20.22,
+ "MLSUMClusteringP2P": 35.98,
+ "MLSUMClusteringS2S": 27.05,
+ "MasakhaNEWSClusteringP2P (fra)": 36.03,
+ "MasakhaNEWSClusteringS2S (fra)": 30.77
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "m3e-base",
- "Cmnli": 69.98,
- "Ocnli": 58.0
+ "Model": "sentence-camembert-base",
+ "OpusparcusPC (fr)": 92.05,
+ "PawsXPairClassification (fr)": 57.44
}
]
},
"Reranking": {
"map": [
{
- "Model": "m3e-base",
- "CMedQAv1": 77.05,
- "CMedQAv2": 76.76,
- "MMarcoReranking": 17.51,
- "T2Reranking": 66.03
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
- {
- "Model": "m3e-base",
- "CmedqaRetrieval": 30.33,
- "CovidRetrieval": 66.42,
- "DuRetrieval": 75.76,
- "EcomRetrieval": 50.27,
- "MMarcoRetrieval": 65.46,
- "MedicalRetrieval": 42.79,
- "T2Retrieval": 73.14,
- "VideoRetrieval": 51.11
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "m3e-base",
- "AFQMC": 35.87,
- "ATEC": 41.27,
- "BQ": 63.81,
- "LCQMC": 74.88,
- "PAWSX": 12.19,
- "QBQTC": 32.07,
- "STS22 (zh)": 66.73,
- "STSB": 76.97
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "m3e-base"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "m3e-base"
- }
- ]
- }
- },
- "multilingual-e5-large": {
- "BitextMining": {
- "f1": [
- {
- "Model": "multilingual-e5-large",
- "BornholmBitextMining (dan-Latn)": 29.61,
- "BornholmBitextMining": 44.16,
- "Tatoeba (tgl-Latn_eng-Latn)": 92.0,
- "Tatoeba (gsw-Latn_eng-Latn)": 51.65,
- "Tatoeba (tzl-Latn_eng-Latn)": 53.16,
- "Tatoeba (slv-Latn_eng-Latn)": 89.57,
- "Tatoeba (jav-Latn_eng-Latn)": 75.46,
- "Tatoeba (uig-Arab_eng-Latn)": 72.17,
- "Tatoeba (ind-Latn_eng-Latn)": 92.9,
- "Tatoeba (rus-Cyrl_eng-Latn)": 92.32,
- "Tatoeba (war-Latn_eng-Latn)": 62.02,
- "Tatoeba (mar-Deva_eng-Latn)": 88.58,
- "Tatoeba (mkd-Cyrl_eng-Latn)": 85.63,
- "Tatoeba (jpn-Jpan_eng-Latn)": 95.28,
- "Tatoeba (hun-Latn_eng-Latn)": 94.01,
- "Tatoeba (slk-Latn_eng-Latn)": 93.13,
- "Tatoeba (tha-Thai_eng-Latn)": 95.38,
- "Tatoeba (fra-Latn_eng-Latn)": 93.42,
- "Tatoeba (ukr-Cyrl_eng-Latn)": 93.32,
- "Tatoeba (kat-Geor_eng-Latn)": 84.09,
- "Tatoeba (nov-Latn_eng-Latn)": 71.62,
- "Tatoeba (kor-Hang_eng-Latn)": 90.65,
- "Tatoeba (ben-Beng_eng-Latn)": 83.02,
- "Tatoeba (cor-Latn_eng-Latn)": 6.28,
- "Tatoeba (lfn-Latn_eng-Latn)": 62.91,
- "Tatoeba (swh-Latn_eng-Latn)": 71.61,
- "Tatoeba (tur-Latn_eng-Latn)": 96.27,
- "Tatoeba (cbk-Latn_eng-Latn)": 69.26,
- "Tatoeba (kur-Latn_eng-Latn)": 66.83,
- "Tatoeba (arq-Arab_eng-Latn)": 41.56,
- "Tatoeba (ceb-Latn_eng-Latn)": 55.31,
- "Tatoeba (max-Deva_eng-Latn)": 63.41,
- "Tatoeba (ang-Latn_eng-Latn)": 40.18,
- "Tatoeba (nds-Latn_eng-Latn)": 69.28,
- "Tatoeba (epo-Latn_eng-Latn)": 96.01,
- "Tatoeba (heb-Hebr_eng-Latn)": 86.61,
- "Tatoeba (yue-Hant_eng-Latn)": 88.71,
- "Tatoeba (dan-Latn_eng-Latn)": 95.08,
- "Tatoeba (swe-Latn_eng-Latn)": 95.3,
- "Tatoeba (lvs-Latn_eng-Latn)": 90.06,
- "Tatoeba (ast-Latn_eng-Latn)": 81.76,
- "Tatoeba (dsb-Latn_eng-Latn)": 48.44,
- "Tatoeba (pes-Arab_eng-Latn)": 92.14,
- "Tatoeba (dtp-Latn_eng-Latn)": 7.03,
- "Tatoeba (tuk-Latn_eng-Latn)": 33.15,
- "Tatoeba (isl-Latn_eng-Latn)": 92.09,
- "Tatoeba (khm-Khmr_eng-Latn)": 59.96,
- "Tatoeba (pam-Latn_eng-Latn)": 9.32,
- "Tatoeba (tat-Cyrl_eng-Latn)": 73.51,
- "Tatoeba (bos-Latn_eng-Latn)": 92.86,
- "Tatoeba (spa-Latn_eng-Latn)": 97.1,
- "Tatoeba (kaz-Cyrl_eng-Latn)": 79.67,
- "Tatoeba (bel-Cyrl_eng-Latn)": 91.08,
- "Tatoeba (zsm-Latn_eng-Latn)": 94.53,
- "Tatoeba (cat-Latn_eng-Latn)": 91.03,
- "Tatoeba (urd-Arab_eng-Latn)": 89.21,
- "Tatoeba (mon-Cyrl_eng-Latn)": 87.53,
- "Tatoeba (tam-Taml_eng-Latn)": 88.23,
- "Tatoeba (fry-Latn_eng-Latn)": 63.43,
- "Tatoeba (nob-Latn_eng-Latn)": 97.2,
- "Tatoeba (tel-Telu_eng-Latn)": 91.34,
- "Tatoeba (hye-Armn_eng-Latn)": 90.92,
- "Tatoeba (awa-Deva_eng-Latn)": 72.27,
- "Tatoeba (hrv-Latn_eng-Latn)": 96.15,
- "Tatoeba (ile-Latn_eng-Latn)": 79.16,
- "Tatoeba (amh-Ethi_eng-Latn)": 80.69,
- "Tatoeba (orv-Cyrl_eng-Latn)": 39.87,
- "Tatoeba (ara-Arab_eng-Latn)": 85.48,
- "Tatoeba (ido-Latn_eng-Latn)": 83.52,
- "Tatoeba (hin-Deva_eng-Latn)": 94.48,
- "Tatoeba (por-Latn_eng-Latn)": 93.63,
- "Tatoeba (ron-Latn_eng-Latn)": 94.87,
- "Tatoeba (swg-Latn_eng-Latn)": 55.64,
- "Tatoeba (cmn-Hans_eng-Latn)": 95.28,
- "Tatoeba (pol-Latn_eng-Latn)": 96.6,
- "Tatoeba (bul-Cyrl_eng-Latn)": 92.93,
- "Tatoeba (ina-Latn_eng-Latn)": 93.47,
- "Tatoeba (bre-Latn_eng-Latn)": 11.1,
- "Tatoeba (wuu-Hans_eng-Latn)": 86.37,
- "Tatoeba (lit-Latn_eng-Latn)": 88.48,
- "Tatoeba (csb-Latn_eng-Latn)": 36.98,
- "Tatoeba (lat-Latn_eng-Latn)": 53.37,
- "Tatoeba (gle-Latn_eng-Latn)": 71.48,
- "Tatoeba (ita-Latn_eng-Latn)": 93.29,
- "Tatoeba (srp-Cyrl_eng-Latn)": 93.1,
- "Tatoeba (arz-Arab_eng-Latn)": 74.73,
- "Tatoeba (cym-Latn_eng-Latn)": 76.21,
- "Tatoeba (ber-Tfng_eng-Latn)": 38.9,
- "Tatoeba (xho-Latn_eng-Latn)": 80.87,
- "Tatoeba (uzb-Latn_eng-Latn)": 72.35,
- "Tatoeba (pms-Latn_eng-Latn)": 59.85,
- "Tatoeba (est-Latn_eng-Latn)": 85.03,
- "Tatoeba (deu-Latn_eng-Latn)": 99.07,
- "Tatoeba (yid-Hebr_eng-Latn)": 76.33,
- "Tatoeba (ell-Grek_eng-Latn)": 93.88,
- "Tatoeba (afr-Latn_eng-Latn)": 90.22,
- "Tatoeba (fao-Latn_eng-Latn)": 72.62,
- "Tatoeba (nld-Latn_eng-Latn)": 96.63,
- "Tatoeba (hsb-Latn_eng-Latn)": 58.9,
- "Tatoeba (aze-Latn_eng-Latn)": 87.61,
- "Tatoeba (kzj-Latn_eng-Latn)": 7.91,
- "Tatoeba (kab-Latn_eng-Latn)": 36.54,
- "Tatoeba (mal-Mlym_eng-Latn)": 97.7,
- "Tatoeba (mhr-Cyrl_eng-Latn)": 6.79,
- "Tatoeba (ces-Latn_eng-Latn)": 94.89,
- "Tatoeba (gla-Latn_eng-Latn)": 59.0,
- "Tatoeba (cha-Latn_eng-Latn)": 27.16,
- "Tatoeba (glg-Latn_eng-Latn)": 93.34,
- "Tatoeba (vie-Latn_eng-Latn)": 97.0,
- "Tatoeba (oci-Latn_eng-Latn)": 54.91,
- "Tatoeba (nno-Latn_eng-Latn)": 91.4,
- "Tatoeba (fin-Latn_eng-Latn)": 95.44,
- "Tatoeba (eus-Latn_eng-Latn)": 77.82,
- "Tatoeba (sqi-Latn_eng-Latn)": 94.7
+ "Model": "sentence-camembert-base",
+ "AlloprofReranking": 48.68,
+ "SyntecReranking": 79.75
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "sentence-camembert-base",
+ "AlloprofRetrieval": 21.94,
+ "BSARDRetrieval": 0.0,
+ "MintakaRetrieval (fr)": 13.36,
+ "SyntecRetrieval": 68.62,
+ "XPQARetrieval (fr)": 57.92
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "sentence-camembert-base",
+ "SICKFr": 74.18,
+ "STS22 (fr)": 77.54,
+ "STSBenchmarkMultilingualSTS (fr)": 81.64
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "sentence-camembert-base",
+ "SummEvalFr": 28.77
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "sentence-camembert-base"
+ }
+ ]
+ }
+ },
+ "Cohere-embed-multilingual-v3.0": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "multilingual-e5-large",
- "AllegroReviews (pol-Latn)": 41.04,
- "AllegroReviews": 41.14,
- "AmazonCounterfactualClassification (en-ext)": 78.73,
- "AmazonCounterfactualClassification (en)": 78.67,
- "AmazonCounterfactualClassification (deu-Latn)": 68.66,
- "AmazonCounterfactualClassification (jpn-Jpan)": 78.8,
- "AmazonPolarityClassification": 93.26,
- "AmazonReviewsClassification (en)": 49.2,
- "AmazonReviewsClassification (deu-Latn)": 46.5,
- "AmazonReviewsClassification (spa-Latn)": 44.35,
- "AmazonReviewsClassification (fra-Latn)": 42.55,
- "AmazonReviewsClassification (jpn-Jpan)": 41.71,
- "AmazonReviewsClassification (cmn-Hans)": 38.87,
- "AmazonReviewsClassification (fr)": 41.91,
- "AngryTweetsClassification (dan-Latn)": 57.69,
- "AngryTweetsClassification": 54.95,
- "Banking77Classification": 75.88,
- "CBD (pol-Latn)": 69.84,
- "CBD": 69.9,
- "DKHateClassification": 66.02,
- "DanishPoliticalCommentsClassification (dan-Latn)": 39.43,
- "DanishPoliticalCommentsClassification": 38.27,
- "EmotionClassification": 47.58,
- "GeoreviewClassification (rus-Cyrl)": 49.69,
- "HeadlineClassification (rus-Cyrl)": 77.19,
- "IFlyTek (cmn-Hans)": 41.86,
- "IFlyTek": 45.47,
- "ImdbClassification": 90.23,
- "InappropriatenessClassification (rus-Cyrl)": 61.6,
- "JDReview (cmn-Hans)": 80.54,
- "JDReview": 80.99,
- "KinopoiskClassification (rus-Cyrl)": 56.59,
- "LccSentimentClassification (dan-Latn)": 61.53,
- "LccSentimentClassification": 59.6,
- "MTOPDomainClassification (en)": 91.81,
- "MTOPDomainClassification (deu-Latn)": 90.44,
- "MTOPDomainClassification (spa-Latn)": 88.34,
- "MTOPDomainClassification (fra-Latn)": 86.23,
- "MTOPDomainClassification (hin-Deva)": 86.84,
- "MTOPDomainClassification (tha-Thai)": 86.88,
- "MTOPDomainClassification (fr)": 86.41,
- "MTOPIntentClassification (en)": 64.29,
- "MTOPIntentClassification (deu-Latn)": 65.97,
- "MTOPIntentClassification (spa-Latn)": 61.9,
- "MTOPIntentClassification (fra-Latn)": 56.25,
- "MTOPIntentClassification (hin-Deva)": 59.17,
- "MTOPIntentClassification (tha-Thai)": 62.59,
- "MTOPIntentClassification (fr)": 59.43,
- "MasakhaNEWSClassification (amh-Ethi)": 83.7,
- "MasakhaNEWSClassification (eng)": 78.26,
- "MasakhaNEWSClassification (fra-Latn)": 76.11,
- "MasakhaNEWSClassification (hau-Latn)": 76.17,
- "MasakhaNEWSClassification (ibo-Latn)": 70.05,
- "MasakhaNEWSClassification (lin-Latn)": 75.89,
- "MasakhaNEWSClassification (lug-Latn)": 73.63,
- "MasakhaNEWSClassification (orm-Ethi)": 80.31,
- "MasakhaNEWSClassification (pcm-Latn)": 89.15,
- "MasakhaNEWSClassification (run-Latn)": 76.55,
- "MasakhaNEWSClassification (sna-Latn)": 86.99,
- "MasakhaNEWSClassification (som-Latn)": 64.63,
- "MasakhaNEWSClassification (swa-Latn)": 73.42,
- "MasakhaNEWSClassification (tir-Ethi)": 72.06,
- "MasakhaNEWSClassification (xho-Latn)": 82.56,
- "MasakhaNEWSClassification (yor-Latn)": 81.09,
- "MasakhaNEWSClassification (fra)": 79.38,
- "MassiveIntentClassification (kor-Kore)": 63.92,
- "MassiveIntentClassification (lav-Latn)": 58.31,
- "MassiveIntentClassification (isl-Latn)": 53.3,
- "MassiveIntentClassification (tel-Telu)": 53.96,
- "MassiveIntentClassification (mya-Mymr)": 49.73,
- "MassiveIntentClassification (nob-Latn)": 64.54,
- "MassiveIntentClassification (en)": 68.51,
- "MassiveIntentClassification (spa-Latn)": 64.01,
- "MassiveIntentClassification (swe-Latn)": 66.52,
- "MassiveIntentClassification (cmo-Hant)": 58.78,
- "MassiveIntentClassification (pol-Latn)": 65.09,
- "MassiveIntentClassification (rus-Cyrl)": 65.76,
- "MassiveIntentClassification (aze-Latn)": 54.68,
- "MassiveIntentClassification (fin-Latn)": 64.28,
- "MassiveIntentClassification (cmo-Hans)": 66.23,
- "MassiveIntentClassification (urd-Arab)": 54.6,
- "MassiveIntentClassification (tam-Taml)": 53.41,
- "MassiveIntentClassification (hin-Deva)": 60.93,
- "MassiveIntentClassification (deu-Latn)": 63.82,
- "MassiveIntentClassification (ell-Grek)": 64.34,
- "MassiveIntentClassification (hye-Armn)": 50.89,
- "MassiveIntentClassification (por-Latn)": 65.6,
- "MassiveIntentClassification (nld-Latn)": 65.0,
- "MassiveIntentClassification (fas-Arab)": 63.74,
- "MassiveIntentClassification (ron-Latn)": 59.76,
- "MassiveIntentClassification (slv-Latn)": 59.38,
- "MassiveIntentClassification (heb-Hebr)": 62.44,
- "MassiveIntentClassification (vie-Latn)": 63.39,
- "MassiveIntentClassification (sqi-Latn)": 57.3,
- "MassiveIntentClassification (khm-Khmr)": 34.88,
- "MassiveIntentClassification (ben-Beng)": 55.6,
- "MassiveIntentClassification (tgl-Latn)": 54.77,
- "MassiveIntentClassification (jpn-Jpan)": 67.11,
- "MassiveIntentClassification (kat-Geor)": 41.45,
- "MassiveIntentClassification (afr-Latn)": 53.69,
- "MassiveIntentClassification (cym-Latn)": 44.22,
- "MassiveIntentClassification (amh-Ethi)": 45.48,
- "MassiveIntentClassification (ita-Latn)": 63.89,
- "MassiveIntentClassification (mal-Mlym)": 57.58,
- "MassiveIntentClassification (tha-Thai)": 62.75,
- "MassiveIntentClassification (ind-Latn)": 63.51,
- "MassiveIntentClassification (jav-Latn)": 48.96,
- "MassiveIntentClassification (dan-Latn)": 63.7,
- "MassiveIntentClassification (ara-Arab)": 54.1,
- "MassiveIntentClassification (kan-Knda)": 53.45,
- "MassiveIntentClassification (hun-Latn)": 64.0,
- "MassiveIntentClassification (tur-Latn)": 64.61,
- "MassiveIntentClassification (msa-Latn)": 58.49,
- "MassiveIntentClassification (mon-Cyrl)": 49.6,
- "MassiveIntentClassification (swa-Latn)": 47.69,
- "MassiveIntentClassification (fra-Latn)": 63.37,
- "MassiveIntentClassification (da)": 60.16,
- "MassiveIntentClassification (nb)": 59.83,
- "MassiveIntentClassification (sv)": 61.78,
- "MassiveIntentClassification (pl)": 65.07,
- "MassiveScenarioClassification (heb-Hebr)": 67.72,
- "MassiveScenarioClassification (vie-Latn)": 68.91,
- "MassiveScenarioClassification (cmo-Hant)": 64.35,
- "MassiveScenarioClassification (urd-Arab)": 60.89,
- "MassiveScenarioClassification (isl-Latn)": 60.74,
- "MassiveScenarioClassification (ell-Grek)": 69.74,
- "MassiveScenarioClassification (mon-Cyrl)": 55.37,
- "MassiveScenarioClassification (swa-Latn)": 56.27,
- "MassiveScenarioClassification (tam-Taml)": 58.76,
- "MassiveScenarioClassification (hye-Armn)": 55.76,
- "MassiveScenarioClassification (amh-Ethi)": 52.69,
- "MassiveScenarioClassification (ben-Beng)": 61.85,
- "MassiveScenarioClassification (tel-Telu)": 59.49,
- "MassiveScenarioClassification (dan-Latn)": 71.18,
- "MassiveScenarioClassification (slv-Latn)": 65.33,
- "MassiveScenarioClassification (en)": 73.04,
- "MassiveScenarioClassification (rus-Cyrl)": 70.85,
- "MassiveScenarioClassification (mal-Mlym)": 63.17,
- "MassiveScenarioClassification (sqi-Latn)": 63.79,
- "MassiveScenarioClassification (ita-Latn)": 69.45,
- "MassiveScenarioClassification (kor-Kore)": 70.54,
- "MassiveScenarioClassification (cmo-Hans)": 72.25,
- "MassiveScenarioClassification (cym-Latn)": 51.25,
- "MassiveScenarioClassification (pol-Latn)": 69.83,
- "MassiveScenarioClassification (ind-Latn)": 69.43,
- "MassiveScenarioClassification (tur-Latn)": 68.12,
- "MassiveScenarioClassification (tgl-Latn)": 60.71,
- "MassiveScenarioClassification (hin-Deva)": 66.85,
- "MassiveScenarioClassification (spa-Latn)": 69.07,
- "MassiveScenarioClassification (lav-Latn)": 64.28,
- "MassiveScenarioClassification (mya-Mymr)": 54.03,
- "MassiveScenarioClassification (ara-Arab)": 61.0,
- "MassiveScenarioClassification (kan-Knda)": 59.36,
- "MassiveScenarioClassification (jav-Latn)": 56.24,
- "MassiveScenarioClassification (por-Latn)": 68.33,
- "MassiveScenarioClassification (tha-Thai)": 69.06,
- "MassiveScenarioClassification (aze-Latn)": 58.49,
- "MassiveScenarioClassification (fra-Latn)": 68.74,
- "MassiveScenarioClassification (ron-Latn)": 66.06,
- "MassiveScenarioClassification (nld-Latn)": 71.11,
- "MassiveScenarioClassification (fas-Arab)": 67.55,
- "MassiveScenarioClassification (deu-Latn)": 71.25,
- "MassiveScenarioClassification (nob-Latn)": 70.44,
- "MassiveScenarioClassification (msa-Latn)": 63.55,
- "MassiveScenarioClassification (afr-Latn)": 62.35,
- "MassiveScenarioClassification (hun-Latn)": 70.53,
- "MassiveScenarioClassification (swe-Latn)": 72.77,
- "MassiveScenarioClassification (kat-Geor)": 47.82,
- "MassiveScenarioClassification (jpn-Jpan)": 73.16,
- "MassiveScenarioClassification (khm-Khmr)": 41.14,
- "MassiveScenarioClassification (fin-Latn)": 68.62,
- "MassiveScenarioClassification (da)": 67.46,
- "MassiveScenarioClassification (nb)": 66.18,
- "MassiveScenarioClassification (sv)": 69.15,
- "MassiveScenarioClassification (pl)": 69.82,
- "MultilingualSentiment (cmn-Hans)": 70.81,
- "MultilingualSentiment": 68.58,
- "NoRecClassification (nob-Latn)": 58.43,
- "NoRecClassification": 62.76,
- "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 80.15,
- "NordicLangClassification": 82.29,
- "NorwegianParliament": 60.36,
- "OnlineShopping (cmn-Hans)": 90.45,
- "OnlineShopping": 90.81,
- "PAC (pol-Latn)": 70.33,
- "PAC": 70.37,
- "PolEmo2.0-IN (pol-Latn)": 77.06,
- "PolEmo2.0-IN": 77.06,
- "PolEmo2.0-OUT (pol-Latn)": 53.48,
- "PolEmo2.0-OUT": 53.38,
- "RuReviewsClassification (rus-Cyrl)": 65.28,
- "RuSciBenchGRNTIClassification (rus-Cyrl)": 58.2,
- "RuSciBenchOECDClassification (rus-Cyrl)": 43.91,
- "ScalaDaClassification": 50.77,
- "ScalaNbClassification": 50.44,
- "TNews (cmn-Hans)": 48.8,
- "TNews": 48.38,
- "ToxicConversationsClassification": 66.01,
- "TweetSentimentExtractionClassification": 62.8,
- "Waimai (cmn-Hans)": 86.3,
- "Waimai": 85.02
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "AmazonReviewsClassification (fr)": 41.89,
+ "MTOPDomainClassification (fr)": 86.23,
+ "MTOPIntentClassification (fr)": 61.07,
+ "MasakhaNEWSClassification (fra)": 83.06,
+ "MassiveIntentClassification (fr)": 62.94,
+ "MassiveScenarioClassification (fr)": 67.29
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "multilingual-e5-large",
- "8TagsClustering": 33.88,
- "AlloProfClusteringP2P": 62.99,
- "AlloProfClusteringS2S": 32.26,
- "BiorxivClusteringP2P": 35.5,
- "BiorxivClusteringS2S": 33.3,
- "CLSClusteringP2P": 40.68,
- "CLSClusteringS2S": 38.59,
- "GeoreviewClusteringP2P (rus-Cyrl)": 60.51,
- "HALClusteringS2S": 22.44,
- "MLSUMClusteringP2P (rus-Cyrl)": 42.79,
- "MLSUMClusteringP2P": 44.04,
- "MLSUMClusteringS2S (rus-Cyrl)": 44.32,
- "MLSUMClusteringS2S": 37.65,
- "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.16,
- "MasakhaNEWSClusteringP2P (eng)": 61.1,
- "MasakhaNEWSClusteringP2P (fra-Latn)": 41.66,
- "MasakhaNEWSClusteringP2P (hau-Latn)": 60.7,
- "MasakhaNEWSClusteringP2P (ibo-Latn)": 48.41,
- "MasakhaNEWSClusteringP2P (lin-Latn)": 57.69,
- "MasakhaNEWSClusteringP2P (lug-Latn)": 71.95,
- "MasakhaNEWSClusteringP2P (orm-Ethi)": 60.14,
- "MasakhaNEWSClusteringP2P (pcm-Latn)": 80.84,
- "MasakhaNEWSClusteringP2P (run-Latn)": 59.91,
- "MasakhaNEWSClusteringP2P (sna-Latn)": 53.3,
- "MasakhaNEWSClusteringP2P (som-Latn)": 34.38,
- "MasakhaNEWSClusteringP2P (swa-Latn)": 33.25,
- "MasakhaNEWSClusteringP2P (tir-Ethi)": 54.21,
- "MasakhaNEWSClusteringP2P (xho-Latn)": 41.12,
- "MasakhaNEWSClusteringP2P (yor-Latn)": 36.22,
- "MasakhaNEWSClusteringP2P (fra)": 40.94,
- "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.24,
- "MasakhaNEWSClusteringS2S (eng)": 53.93,
- "MasakhaNEWSClusteringS2S (fra-Latn)": 39.84,
- "MasakhaNEWSClusteringS2S (hau-Latn)": 19.24,
- "MasakhaNEWSClusteringS2S (ibo-Latn)": 28.88,
- "MasakhaNEWSClusteringS2S (lin-Latn)": 42.22,
- "MasakhaNEWSClusteringS2S (lug-Latn)": 43.63,
- "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.29,
- "MasakhaNEWSClusteringS2S (pcm-Latn)": 59.77,
- "MasakhaNEWSClusteringS2S (run-Latn)": 51.46,
- "MasakhaNEWSClusteringS2S (sna-Latn)": 48.14,
- "MasakhaNEWSClusteringS2S (som-Latn)": 25.14,
- "MasakhaNEWSClusteringS2S (swa-Latn)": 7.28,
- "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51,
- "MasakhaNEWSClusteringS2S (xho-Latn)": 30.98,
- "MasakhaNEWSClusteringS2S (yor-Latn)": 34.09,
- "MasakhaNEWSClusteringS2S (fra)": 30.56,
- "MedrxivClusteringP2P": 31.7,
- "MedrxivClusteringS2S": 29.76,
- "RedditClustering": 46.91,
- "RedditClusteringP2P": 63.0,
- "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 52.03,
- "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.11,
- "StackExchangeClustering": 58.37,
- "StackExchangeClusteringP2P": 32.9,
- "ThuNewsClusteringP2P": 58.05,
- "ThuNewsClusteringS2S": 55.59,
- "TwentyNewsgroupsClustering": 39.4
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "AlloProfClusteringP2P": 63.53,
+ "AlloProfClusteringS2S": 36.18,
+ "HALClusteringS2S": 19.9,
+ "MLSUMClusteringP2P": 45.08,
+ "MLSUMClusteringS2S": 34.75,
+ "MasakhaNEWSClusteringP2P (fra)": 53.18,
+ "MasakhaNEWSClusteringS2S (fra)": 32.31
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "multilingual-e5-large",
- "CDSC-E (pol-Latn)": 74.47,
- "CDSC-E": 74.47,
- "Cmnli": 78.18,
- "Ocnli": 61.6,
- "OpusparcusPC (deu-Latn)": 97.27,
- "OpusparcusPC (en)": 98.74,
- "OpusparcusPC (fin-Latn)": 94.26,
- "OpusparcusPC (fra-Latn)": 93.68,
- "OpusparcusPC (rus-Cyrl)": 89.64,
- "OpusparcusPC (swe-Latn)": 94.98,
- "OpusparcusPC (fr)": 93.89,
- "PPC": 92.18,
- "PSC (pol-Latn)": 99.4,
- "PSC": 99.39,
- "PawsXPairClassification (deu-Latn)": 56.81,
- "PawsXPairClassification (en)": 62.97,
- "PawsXPairClassification (spa-Latn)": 56.85,
- "PawsXPairClassification (fra-Latn)": 58.68,
- "PawsXPairClassification (jpn-Hira)": 50.7,
- "PawsXPairClassification (kor-Hang)": 52.08,
- "PawsXPairClassification (cmn-Hans)": 56.82,
- "PawsXPairClassification (fr)": 58.5,
- "SICK-E-PL (pol-Latn)": 75.95,
- "SICK-E-PL": 75.96,
- "SprintDuplicateQuestions": 93.14,
- "TERRa (rus-Cyrl)": 58.4,
- "TwitterSemEval2015": 75.28,
- "TwitterURLCorpus": 85.83
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "OpusparcusPC (fr)": 94.08,
+ "PawsXPairClassification (fr)": 61.26
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "AlloprofReranking": 51.01,
+ "SyntecReranking": 85.72
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "AlloprofRetrieval": 38.36,
+ "BSARDRetrieval": 0.14,
+ "MintakaRetrieval (fr)": 25.44,
+ "SyntecRetrieval": 79.27,
+ "XPQARetrieval (fr)": 58.87
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "SICKFr": 79.23,
+ "STS22 (fr)": 82.76,
+ "STSBenchmarkMultilingualSTS (fr)": 81.84
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "SummEvalFr": 31.26
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0"
+ }
+ ]
+ }
+ },
+ "text2vec-large-chinese": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "text2vec-large-chinese"
+ }
+ ]
+ },
+ "Classification": {
+ "accuracy": [
+ {
+ "Model": "text2vec-large-chinese",
+ "AmazonReviewsClassification (zh)": 33.77,
+ "IFlyTek": 41.54,
+ "JDReview": 81.56,
+ "MassiveIntentClassification (zh-CN)": 63.23,
+ "MassiveScenarioClassification (zh-CN)": 68.45,
+ "MultilingualSentiment": 58.97,
+ "OnlineShopping": 83.51,
+ "TNews": 38.92,
+ "Waimai": 76.01
+ }
+ ]
+ },
+ "Clustering": {
+ "v_measure": [
+ {
+ "Model": "text2vec-large-chinese",
+ "CLSClusteringP2P": 30.13,
+ "CLSClusteringS2S": 28.77,
+ "ThuNewsClusteringP2P": 35.05,
+ "ThuNewsClusteringS2S": 26.14
+ }
+ ]
+ },
+ "PairClassification": {
+ "ap": [
+ {
+ "Model": "text2vec-large-chinese",
+ "Cmnli": 77.67,
+ "Ocnli": 64.04
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "text2vec-large-chinese",
+ "CMedQAv1": 58.92,
+ "CMedQAv2": 60.41,
+ "MMarcoReranking": 12.48,
+ "T2Reranking": 64.82
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "text2vec-large-chinese",
+ "CmedqaRetrieval": 15.53,
+ "CovidRetrieval": 60.48,
+ "DuRetrieval": 51.87,
+ "EcomRetrieval": 37.58,
+ "MMarcoRetrieval": 45.96,
+ "MedicalRetrieval": 30.93,
+ "T2Retrieval": 50.52,
+ "VideoRetrieval": 42.65
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "text2vec-large-chinese",
+ "AFQMC": 24.51,
+ "ATEC": 32.45,
+ "BQ": 44.22,
+ "LCQMC": 69.16,
+ "PAWSX": 14.55,
+ "QBQTC": 29.51,
+ "STS22 (zh)": 65.94,
+ "STSB": 79.45
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "text2vec-large-chinese"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "text2vec-large-chinese"
+ }
+ ]
+ }
+ },
+ "dragon-plus": {
+ "BitextMining": {
+ "f1": [
+ {
+ "Model": "dragon-plus"
}
]
},
- "Reranking": {
- "map": [
+ "Classification": {
+ "accuracy": [
{
- "Model": "multilingual-e5-large",
- "AlloprofReranking (fra-Latn)": 69.44,
- "AlloprofReranking": 57.37,
- "AskUbuntuDupQuestions": 59.24,
- "CMedQAv1": 68.25,
- "CMedQAv2": 68.56,
- "MMarcoReranking (cmn-Hans)": 29.12,
- "MMarcoReranking": 21.34,
- "MindSmallReranking": 30.24,
- "RuBQReranking (rus-Cyrl)": 75.58,
- "SciDocsRR": 84.22,
- "StackOverflowDupQuestions": 50.14,
- "SyntecReranking (fra-Latn)": 85.45,
- "SyntecReranking": 86.9,
- "T2Reranking (cmn-Hans)": 66.32,
- "T2Reranking": 65.83
+ "Model": "dragon-plus"
}
]
},
- "Retrieval": {
- "ndcg_at_10": [
+ "Clustering": {
+ "v_measure": [
{
- "Model": "multilingual-e5-large",
- "AILACasedocs": 26.43,
- "AILAStatutes": 20.84,
- "ARCChallenge": 10.83,
- "AlloprofRetrieval (fra-Latn)": 39.34,
- "AlloprofRetrieval": 38.15,
- "AlphaNLI": 13.59,
- "ArguAna": 54.36,
- "ArguAna-PL (pol-Latn)": 52.99,
- "ArguAna-PL": 53.02,
- "BSARDRetrieval (fra-Latn)": 21.28,
- "BSARDRetrieval": 0.27,
- "CmedqaRetrieval (cmn-Hans)": 28.66,
- "CmedqaRetrieval": 28.67,
- "CovidRetrieval (cmn-Hans)": 75.61,
- "CovidRetrieval": 75.51,
- "DBPedia-PL": 35.82,
- "DuRetrieval (cmn-Hans)": 85.3,
- "DuRetrieval": 85.32,
- "EcomRetrieval (cmn-Hans)": 54.67,
- "EcomRetrieval": 54.75,
- "FiQA-PL (pol-Latn)": 32.97,
- "FiQA-PL": 33.0,
- "FiQA2018": 43.81,
- "GerDaLIRSmall (deu-Latn)": 15.72,
- "HellaSwag": 27.35,
- "HotpotQA-PL": 67.41,
- "LEMBNarrativeQARetrieval": 24.22,
- "LEMBNeedleRetrieval": 28.0,
- "LEMBPasskeyRetrieval": 38.25,
- "LEMBQMSumRetrieval": 24.26,
- "LEMBSummScreenFDRetrieval": 71.12,
- "LEMBWikimQARetrieval": 56.8,
- "LeCaRDv2 (zho-Hans)": 55.83,
- "LegalBenchConsumerContractsQA": 73.3,
- "LegalBenchCorporateLobbying": 89.72,
- "LegalQuAD (deu-Latn)": 43.17,
- "LegalSummarization": 62.1,
- "MMarcoRetrieval (cmn-Hans)": 79.2,
- "MMarcoRetrieval": 79.2,
- "MSMARCO-PL": 33.38,
- "MedicalRetrieval (cmn-Hans)": 51.44,
- "MedicalRetrieval": 51.44,
- "MintakaRetrieval (ara-Arab)": 26.5,
- "MintakaRetrieval (deu-Latn)": 32.77,
- "MintakaRetrieval (spa-Latn)": 34.23,
- "MintakaRetrieval (fra-Latn)": 34.24,
- "MintakaRetrieval (hin-Deva)": 27.45,
- "MintakaRetrieval (ita-Latn)": 33.84,
- "MintakaRetrieval (jpn-Hira)": 26.45,
- "MintakaRetrieval (por-Latn)": 35.9,
- "MintakaRetrieval (fr)": 25.2,
- "NFCorpus": 33.95,
- "NFCorpus-PL (pol-Latn)": 30.21,
- "NFCorpus-PL": 30.24,
- "NQ-PL": 52.79,
- "PIQA": 28.82,
- "Quail": 4.85,
- "Quora-PL": 83.65,
- "RARbCode": 58.92,
- "RARbMath": 67.32,
- "RiaNewsRetrieval (rus-Cyrl)": 80.67,
- "RuBQRetrieval (rus-Cyrl)": 74.11,
- "SCIDOCS": 17.45,
- "SCIDOCS-PL (pol-Latn)": 13.82,
- "SCIDOCS-PL": 13.81,
- "SIQA": 5.36,
- "SciFact": 70.42,
- "SciFact-PL (pol-Latn)": 65.66,
- "SciFact-PL": 65.66,
- "SpartQA": 5.64,
- "SyntecRetrieval (fra-Latn)": 82.39,
- "SyntecRetrieval": 81.07,
- "T2Retrieval (cmn-Hans)": 76.07,
- "T2Retrieval": 76.11,
- "TRECCOVID": 71.21,
- "TRECCOVID-PL (pol-Latn)": 69.9,
- "TRECCOVID-PL": 70.03,
- "TempReasonL1": 1.14,
- "TempReasonL2Fact": 42.97,
- "TempReasonL2Pure": 2.05,
- "TempReasonL3Fact": 38.22,
- "TempReasonL3Pure": 8.31,
- "Touche2020": 23.13,
- "VideoRetrieval (cmn-Hans)": 58.28,
- "VideoRetrieval": 58.25,
- "WinoGrande": 54.99,
- "XPQARetrieval (ara-Arab_ara-Arab)": 43.69,
- "XPQARetrieval (eng-Latn_ara-Arab)": 30.86,
- "XPQARetrieval (ara-Arab_eng-Latn)": 39.11,
- "XPQARetrieval (deu-Latn_deu-Latn)": 76.83,
- "XPQARetrieval (eng-Latn_deu-Latn)": 42.87,
- "XPQARetrieval (deu-Latn_eng-Latn)": 68.25,
- "XPQARetrieval (spa-Latn_spa-Latn)": 61.77,
- "XPQARetrieval (eng-Latn_spa-Latn)": 37.55,
- "XPQARetrieval (spa-Latn_eng-Latn)": 52.86,
- "XPQARetrieval (fra-Latn_fra-Latn)": 61.38,
- "XPQARetrieval (eng-Latn_fra-Latn)": 39.12,
- "XPQARetrieval (fra-Latn_eng-Latn)": 57.93,
- "XPQARetrieval (hin-Deva_hin-Deva)": 71.09,
- "XPQARetrieval (eng-Latn_hin-Deva)": 32.39,
- "XPQARetrieval (hin-Deva_eng-Latn)": 68.31,
- "XPQARetrieval (ita-Latn_ita-Latn)": 74.32,
- "XPQARetrieval (eng-Latn_ita-Latn)": 37.95,
- "XPQARetrieval (ita-Latn_eng-Latn)": 64.54,
- "XPQARetrieval (jpn-Hira_jpn-Hira)": 74.11,
- "XPQARetrieval (eng-Latn_jpn-Hira)": 38.31,
- "XPQARetrieval (jpn-Hira_eng-Latn)": 65.42,
- "XPQARetrieval (kor-Hang_kor-Hang)": 35.72,
- "XPQARetrieval (eng-Latn_kor-Hang)": 31.09,
- "XPQARetrieval (kor-Hang_eng-Latn)": 34.06,
- "XPQARetrieval (pol-Latn_pol-Latn)": 51.01,
- "XPQARetrieval (eng-Latn_pol-Latn)": 30.49,
- "XPQARetrieval (pol-Latn_eng-Latn)": 44.66,
- "XPQARetrieval (por-Latn_por-Latn)": 41.1,
- "XPQARetrieval (eng-Latn_por-Latn)": 22.03,
- "XPQARetrieval (por-Latn_eng-Latn)": 35.15,
- "XPQARetrieval (tam-Taml_tam-Taml)": 39.51,
- "XPQARetrieval (eng-Latn_tam-Taml)": 17.33,
- "XPQARetrieval (tam-Taml_eng-Latn)": 33.67,
- "XPQARetrieval (cmn-Hans_cmn-Hans)": 66.27,
- "XPQARetrieval (eng-Latn_cmn-Hans)": 26.24,
- "XPQARetrieval (cmn-Hans_eng-Latn)": 55.15,
- "XPQARetrieval (fr)": 66.15
+ "Model": "dragon-plus"
}
]
},
- "STS": {
- "spearman": [
+ "PairClassification": {
+ "ap": [
{
- "Model": "multilingual-e5-large",
- "AFQMC (cmn-Hans)": 33.01,
- "AFQMC": 33.02,
- "ATEC (cmn-Hans)": 39.8,
- "ATEC": 39.81,
- "BIOSSES": 82.49,
- "BQ (cmn-Hans)": 46.44,
- "BQ": 46.44,
- "CDSC-R (pol-Latn)": 91.0,
- "CDSC-R": 91.0,
- "LCQMC (cmn-Hans)": 75.95,
- "LCQMC": 75.95,
- "PAWSX (cmn-Hans)": 14.63,
- "PAWSX": 14.63,
- "QBQTC": 29.77,
- "RUParaPhraserSTS (rus-Cyrl)": 71.82,
- "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15,
- "SICK-R": 80.23,
- "SICK-R-PL (pol-Latn)": 75.08,
- "SICK-R-PL": 75.08,
- "SICKFr (fra-Latn)": 78.81,
- "SICKFr": 78.78,
- "STS12": 80.02,
- "STS13": 81.55,
- "STS14": 77.72,
- "STS15": 89.31,
- "STS16": 85.79,
- "STS17 (en-en)": 88.12,
- "STS17 (spa-Latn)": 86.71,
- "STS17 (spa-Latn_eng-Latn)": 80.74,
- "STS17 (eng-Latn_ara-Arab)": 75.03,
- "STS17 (fra-Latn_eng-Latn)": 85.62,
- "STS17 (kor-Hang)": 82.27,
- "STS17 (ita-Latn_eng-Latn)": 84.52,
- "STS17 (ara-Arab)": 77.83,
- "STS17 (eng-Latn_tur-Latn)": 71.22,
- "STS17 (eng-Latn_deu-Latn)": 86.15,
- "STS17 (nld-Latn_eng-Latn)": 85.29,
- "STS22 (spa-Latn)": 64.6,
- "STS22 (spa-Latn_eng-Latn)": 72.51,
- "STS22 (deu-Latn_eng-Latn)": 56.59,
- "STS22 (cmn-Hans_eng-Latn)": 65.95,
- "STS22 (deu-Latn_pol-Latn)": 49.58,
- "STS22 (fra-Latn_pol-Latn)": 50.71,
- "STS22 (en)": 63.66,
- "STS22 (ara-Arab)": 56.95,
- "STS22 (spa-Latn_ita-Latn)": 68.92,
- "STS22 (tur-Latn)": 63.56,
- "STS22 (deu-Latn_fra-Latn)": 67.96,
- "STS22 (ita-Latn)": 76.99,
- "STS22 (cmn-Hans)": 66.82,
- "STS22 (rus-Cyrl)": 59.89,
- "STS22 (fra-Latn)": 76.77,
- "STS22 (pol-Latn_eng-Latn)": 65.54,
- "STS22 (deu-Latn)": 56.58,
- "STS22 (pol-Latn)": 34.65,
- "STS22 (zh)": 65.64,
- "STS22 (pl)": 34.66,
- "STSB (cmn-Hans)": 81.08,
- "STSB": 81.08,
- "STSBenchmark": 87.29,
- "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22,
- "STSBenchmarkMultilingualSTS (en)": 87.29,
- "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06,
- "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63,
- "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75,
- "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05,
- "STSBenchmarkMultilingualSTS (por-Latn)": 73.31,
- "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81,
- "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28,
- "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27,
- "STSBenchmarkMultilingualSTS (fr)": 82.53
+ "Model": "dragon-plus"
+ }
+ ]
+ },
+ "Reranking": {
+ "map": [
+ {
+ "Model": "dragon-plus"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "dragon-plus",
+ "ARCChallenge": 8.91,
+ "AlphaNLI": 32.1,
+ "HellaSwag": 27.69,
+ "PIQA": 28.01,
+ "Quail": 4.09,
+ "RARbCode": 17.58,
+ "RARbMath": 45.09,
+ "SIQA": 2.0,
+ "SpartQA": 10.34,
+ "TempReasonL1": 1.82,
+ "TempReasonL2Fact": 17.45,
+ "TempReasonL2Pure": 0.55,
+ "TempReasonL3Fact": 15.71,
+ "TempReasonL3Pure": 7.97,
+ "WinoGrande": 67.18
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "dragon-plus"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "multilingual-e5-large",
- "SummEval": 29.65,
- "SummEvalFr (fra-Latn)": 30.92,
- "SummEvalFr": 30.92
+ "Model": "dragon-plus"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "multilingual-e5-large"
+ "Model": "dragon-plus"
}
]
}
},
- "komninos": {
+ "bge-small-zh-v1.5": {
"BitextMining": {
"f1": [
{
- "Model": "komninos",
- "BUCC (de-en)": 0.18,
- "BUCC (fr-en)": 0.08,
- "BUCC (ru-en)": 0.15,
- "BUCC (zh-en)": 0.05,
- "Tatoeba (afr-eng)": 4.82,
- "Tatoeba (amh-eng)": 1.18,
- "Tatoeba (ang-eng)": 8.54,
- "Tatoeba (ara-eng)": 0.63,
- "Tatoeba (arq-eng)": 0.4,
- "Tatoeba (arz-eng)": 0.63,
- "Tatoeba (ast-eng)": 11.69,
- "Tatoeba (awa-eng)": 0.0,
- "Tatoeba (aze-eng)": 3.22,
- "Tatoeba (bel-eng)": 1.75,
- "Tatoeba (ben-eng)": 0.2,
- "Tatoeba (ber-eng)": 7.0,
- "Tatoeba (bos-eng)": 9.31,
- "Tatoeba (bre-eng)": 4.17,
- "Tatoeba (bul-eng)": 1.29,
- "Tatoeba (cat-eng)": 7.73,
- "Tatoeba (cbk-eng)": 5.61,
- "Tatoeba (ceb-eng)": 4.88,
- "Tatoeba (ces-eng)": 3.55,
- "Tatoeba (cha-eng)": 19.29,
- "Tatoeba (cmn-eng)": 0.5,
- "Tatoeba (cor-eng)": 4.15,
- "Tatoeba (csb-eng)": 5.69,
- "Tatoeba (cym-eng)": 8.4,
- "Tatoeba (dan-eng)": 6.99,
- "Tatoeba (deu-eng)": 3.67,
- "Tatoeba (dsb-eng)": 5.33,
- "Tatoeba (dtp-eng)": 4.25,
- "Tatoeba (ell-eng)": 0.63,
- "Tatoeba (epo-eng)": 2.45,
- "Tatoeba (est-eng)": 2.69,
- "Tatoeba (eus-eng)": 4.69,
- "Tatoeba (fao-eng)": 7.61,
- "Tatoeba (fin-eng)": 3.36,
- "Tatoeba (fra-eng)": 7.0,
- "Tatoeba (fry-eng)": 12.36,
- "Tatoeba (gla-eng)": 3.07,
- "Tatoeba (gle-eng)": 4.81,
- "Tatoeba (glg-eng)": 8.12,
- "Tatoeba (gsw-eng)": 18.87,
- "Tatoeba (heb-eng)": 0.68,
- "Tatoeba (hin-eng)": 0.1,
- "Tatoeba (hrv-eng)": 5.41,
- "Tatoeba (hsb-eng)": 6.32,
- "Tatoeba (hun-eng)": 3.42,
- "Tatoeba (hye-eng)": 0.97,
- "Tatoeba (ido-eng)": 7.1,
- "Tatoeba (ile-eng)": 13.61,
- "Tatoeba (ina-eng)": 8.57,
- "Tatoeba (ind-eng)": 7.26,
- "Tatoeba (isl-eng)": 4.09,
- "Tatoeba (ita-eng)": 5.54,
- "Tatoeba (jav-eng)": 11.43,
- "Tatoeba (jpn-eng)": 0.2,
- "Tatoeba (kab-eng)": 2.71,
- "Tatoeba (kat-eng)": 1.11,
- "Tatoeba (kaz-eng)": 1.17,
- "Tatoeba (khm-eng)": 0.55,
- "Tatoeba (kor-eng)": 0.5,
- "Tatoeba (kur-eng)": 8.55,
- "Tatoeba (kzj-eng)": 4.61,
- "Tatoeba (lat-eng)": 4.07,
- "Tatoeba (lfn-eng)": 2.83,
- "Tatoeba (lit-eng)": 0.95,
- "Tatoeba (lvs-eng)": 3.25,
- "Tatoeba (mal-eng)": 0.29,
- "Tatoeba (mar-eng)": 0.2,
- "Tatoeba (max-eng)": 14.53,
- "Tatoeba (mhr-eng)": 0.2,
- "Tatoeba (mkd-eng)": 0.2,
- "Tatoeba (mon-eng)": 1.1,
- "Tatoeba (nds-eng)": 10.37,
- "Tatoeba (nld-eng)": 9.5,
- "Tatoeba (nno-eng)": 4.49,
- "Tatoeba (nob-eng)": 4.95,
- "Tatoeba (nov-eng)": 14.53,
- "Tatoeba (oci-eng)": 5.8,
- "Tatoeba (orv-eng)": 0.24,
- "Tatoeba (pam-eng)": 6.65,
- "Tatoeba (pes-eng)": 0.5,
- "Tatoeba (pms-eng)": 8.05,
- "Tatoeba (pol-eng)": 5.13,
- "Tatoeba (por-eng)": 5.87,
- "Tatoeba (ron-eng)": 6.76,
- "Tatoeba (rus-eng)": 0.2,
- "Tatoeba (slk-eng)": 4.23,
- "Tatoeba (slv-eng)": 6.05,
- "Tatoeba (spa-eng)": 5.03,
- "Tatoeba (sqi-eng)": 4.36,
- "Tatoeba (srp-eng)": 1.77,
- "Tatoeba (swe-eng)": 6.72,
- "Tatoeba (swg-eng)": 8.54,
- "Tatoeba (swh-eng)": 11.49,
- "Tatoeba (tam-eng)": 1.3,
- "Tatoeba (tat-eng)": 0.77,
- "Tatoeba (tel-eng)": 0.85,
- "Tatoeba (tgl-eng)": 2.61,
- "Tatoeba (tha-eng)": 0.69,
- "Tatoeba (tuk-eng)": 5.76,
- "Tatoeba (tur-eng)": 5.24,
- "Tatoeba (tzl-eng)": 15.51,
- "Tatoeba (uig-eng)": 0.6,
- "Tatoeba (ukr-eng)": 1.23,
- "Tatoeba (urd-eng)": 0.4,
- "Tatoeba (uzb-eng)": 4.73,
- "Tatoeba (vie-eng)": 6.55,
- "Tatoeba (war-eng)": 4.12,
- "Tatoeba (wuu-eng)": 0.2,
- "Tatoeba (xho-eng)": 4.33,
- "Tatoeba (yid-eng)": 0.59,
- "Tatoeba (yue-eng)": 0.5,
- "Tatoeba (zsm-eng)": 7.27
+ "Model": "bge-small-zh-v1.5"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "komninos",
- "AmazonCounterfactualClassification (en)": 60.54,
- "AmazonPolarityClassification": 59.59,
- "AmazonReviewsClassification (en)": 31.01,
- "Banking77Classification": 67.05,
- "EmotionClassification": 33.18,
- "ImdbClassification": 63.98,
- "MTOPDomainClassification (en)": 78.57,
- "MTOPIntentClassification (en)": 57.07,
- "MassiveIntentClassification (en)": 57.21,
- "MassiveScenarioClassification (en)": 66.11,
- "ToxicConversationsClassification": 67.76,
- "TweetSentimentExtractionClassification": 49.68
+ "Model": "bge-small-zh-v1.5",
+ "AmazonReviewsClassification (zh)": 35.91,
+ "IFlyTek": 45.49,
+ "JDReview": 80.04,
+ "MassiveIntentClassification (zh-CN)": 63.95,
+ "MassiveScenarioClassification (zh-CN)": 70.8,
+ "MultilingualSentiment": 63.06,
+ "OnlineShopping": 85.05,
+ "TNews": 48.15,
+ "Waimai": 83.18
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "komninos",
- "ArxivClusteringP2P": 34.73,
- "ArxivClusteringS2S": 26.01,
- "BiorxivClusteringP2P": 29.76,
- "BiorxivClusteringS2S": 20.71,
- "BlurbsClusteringP2P": 11.37,
- "BlurbsClusteringS2S": 8.01,
- "MedrxivClusteringP2P": 26.65,
- "MedrxivClusteringS2S": 21.5,
- "RedditClustering": 28.84,
- "RedditClusteringP2P": 7.37,
- "StackExchangeClustering": 39.04,
- "StackExchangeClusteringP2P": 30.23,
- "TenKGnadClusteringP2P": 15.89,
- "TenKGnadClusteringS2S": 4.84,
- "TwentyNewsgroupsClustering": 27.42
+ "Model": "bge-small-zh-v1.5",
+ "CLSClusteringP2P": 38.14,
+ "CLSClusteringS2S": 35.14,
+ "ThuNewsClusteringP2P": 54.22,
+ "ThuNewsClusteringS2S": 49.22
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "komninos",
- "SprintDuplicateQuestions": 85.55,
- "TwitterSemEval2015": 53.85,
- "TwitterURLCorpus": 79.41
+ "Model": "bge-small-zh-v1.5",
+ "Cmnli": 76.24,
+ "Ocnli": 64.57
}
]
},
"Reranking": {
"map": [
{
- "Model": "komninos",
- "AskUbuntuDupQuestions": 50.88,
- "MindSmallReranking": 28.92,
- "SciDocsRR": 63.55,
- "StackOverflowDupQuestions": 35.65
+ "Model": "bge-small-zh-v1.5",
+ "CMedQAv1": 77.4,
+ "CMedQAv2": 79.86,
+ "MMarcoReranking": 20.5,
+ "T2Reranking": 65.9
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "komninos",
- "ArguAna": 30.96,
- "CQADupstackRetrieval": 16.79,
- "ClimateFEVER": 14.87,
- "DBPedia": 15.88,
- "FEVER": 15.56,
- "FiQA2018": 10.49,
- "HotpotQA": 20.77,
- "MSMARCO": 9.75,
- "NFCorpus": 11.79,
- "NQ": 12.75,
- "QuoraRetrieval": 71.57,
- "SCIDOCS": 8.47,
- "SciFact": 29.53,
- "TRECCOVID": 35.92,
- "Touche2020": 13.17
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "komninos",
- "BIOSSES": 50.25,
- "SICK-R": 55.49,
- "STS12": 53.51,
- "STS13": 70.8,
- "STS14": 63.56,
- "STS15": 74.08,
- "STS16": 64.6,
- "STS17 (ar-ar)": 13.78,
- "STS17 (en-ar)": 9.08,
- "STS17 (en-de)": -3.11,
- "STS17 (en-en)": 76.91,
- "STS17 (en-tr)": -0.45,
- "STS17 (es-en)": -8.18,
- "STS17 (es-es)": 48.23,
- "STS17 (fr-en)": 5.81,
- "STS17 (it-en)": 3.64,
- "STS17 (ko-ko)": 2.54,
- "STS17 (nl-en)": 0.44,
- "STS22 (ar)": 32.42,
- "STS22 (de)": 33.04,
- "STS22 (de-en)": 28.65,
- "STS22 (de-fr)": 14.77,
- "STS22 (de-pl)": 11.21,
- "STS22 (en)": 53.89,
- "STS22 (es)": 48.53,
- "STS22 (es-en)": 26.97,
- "STS22 (es-it)": 41.1,
- "STS22 (fr)": 49.43,
- "STS22 (fr-pl)": 39.44,
- "STS22 (it)": 57.77,
- "STS22 (pl)": 12.47,
- "STS22 (pl-en)": 45.55,
- "STS22 (ru)": 19.44,
- "STS22 (tr)": 47.38,
- "STS22 (zh)": 4.78,
- "STS22 (zh-en)": 14.05,
- "STSBenchmark": 61.55
+ "Model": "bge-small-zh-v1.5",
+ "CmedqaRetrieval": 35.11,
+ "CovidRetrieval": 70.14,
+ "DuRetrieval": 77.28,
+ "EcomRetrieval": 55.71,
+ "MMarcoRetrieval": 63.48,
+ "MedicalRetrieval": 49.8,
+ "T2Retrieval": 76.43,
+ "VideoRetrieval": 66.19
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "bge-small-zh-v1.5",
+ "AFQMC": 33.42,
+ "ATEC": 43.01,
+ "BQ": 55.22,
+ "LCQMC": 72.19,
+ "PAWSX": 9.26,
+ "QBQTC": 35.29,
+ "STS22 (zh)": 67.72,
+ "STSB": 76.73
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "komninos",
- "SummEval": 30.49
+ "Model": "bge-small-zh-v1.5"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "komninos"
+ "Model": "bge-small-zh-v1.5"
}
]
}
},
- "voyage-lite-01-instruct": {
+ "nb-bert-large": {
"BitextMining": {
"f1": [
{
- "Model": "voyage-lite-01-instruct"
+ "Model": "nb-bert-large",
+ "BornholmBitextMining": 4.53
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "voyage-lite-01-instruct",
- "AmazonCounterfactualClassification (en)": 71.43,
- "AmazonPolarityClassification": 96.41,
- "AmazonReviewsClassification (en)": 57.06,
- "Banking77Classification": 81.64,
- "EmotionClassification": 48.29,
- "ImdbClassification": 95.49,
- "MTOPDomainClassification (en)": 96.3,
- "MTOPIntentClassification (en)": 67.93,
- "MassiveIntentClassification (en)": 71.29,
- "MassiveScenarioClassification (en)": 76.74,
- "ToxicConversationsClassification": 75.45,
- "TweetSentimentExtractionClassification": 59.44
+ "Model": "nb-bert-large",
+ "AngryTweetsClassification": 52.14,
+ "DKHateClassification": 62.13,
+ "DanishPoliticalCommentsClassification": 35.04,
+ "LccSentimentClassification": 56.27,
+ "MassiveIntentClassification (da)": 57.03,
+ "MassiveIntentClassification (nb)": 62.68,
+ "MassiveIntentClassification (sv)": 55.02,
+ "MassiveScenarioClassification (da)": 60.43,
+ "MassiveScenarioClassification (nb)": 67.44,
+ "MassiveScenarioClassification (sv)": 57.12,
+ "NoRecClassification": 55.46,
+ "NordicLangClassification": 85.27,
+ "NorwegianParliament": 62.58,
+ "ScalaDaClassification": 62.85,
+ "ScalaNbClassification": 66.97
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "voyage-lite-01-instruct",
- "ArxivClusteringP2P": 47.92,
- "ArxivClusteringS2S": 42.42,
- "BiorxivClusteringP2P": 38.72,
- "BiorxivClusteringS2S": 36.6,
- "MedrxivClusteringP2P": 34.04,
- "MedrxivClusteringS2S": 32.81,
- "RedditClustering": 61.56,
- "RedditClusteringP2P": 65.35,
- "StackExchangeClustering": 70.16,
- "StackExchangeClusteringP2P": 38.23,
- "TwentyNewsgroupsClustering": 53.56
+ "Model": "nb-bert-large"
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "voyage-lite-01-instruct",
- "SprintDuplicateQuestions": 96.01,
- "TwitterSemEval2015": 76.87,
- "TwitterURLCorpus": 86.84
+ "Model": "nb-bert-large"
}
]
},
"Reranking": {
"map": [
{
- "Model": "voyage-lite-01-instruct",
- "AskUbuntuDupQuestions": 65.77,
- "MindSmallReranking": 31.69,
- "SciDocsRR": 87.03,
- "StackOverflowDupQuestions": 54.49
+ "Model": "nb-bert-large"
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "voyage-lite-01-instruct",
- "ArguAna": 58.73,
- "CQADupstackRetrieval": 45.11,
- "ClimateFEVER": 37.47,
- "DBPedia": 43.42,
- "FEVER": 89.71,
- "FiQA2018": 44.79,
- "HotpotQA": 70.46,
- "MSMARCO": 39.66,
- "NFCorpus": 43.33,
- "NQ": 60.65,
- "QuoraRetrieval": 87.83,
- "SCIDOCS": 23.19,
- "SciFact": 73.64,
- "TRECCOVID": 78.92,
- "Touche2020": 36.83
+ "Model": "nb-bert-large"
}
]
},
"STS": {
"spearman": [
{
- "Model": "voyage-lite-01-instruct",
- "BIOSSES": 84.85,
- "SICK-R": 79.71,
- "STS12": 77.09,
- "STS13": 88.91,
- "STS14": 82.08,
- "STS15": 89.21,
- "STS16": 84.74,
- "STS17 (en-en)": 90.73,
- "STS22 (en)": 62.1,
- "STSBenchmark": 89.86
+ "Model": "nb-bert-large"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "voyage-lite-01-instruct",
- "SummEval": 30.97
+ "Model": "nb-bert-large"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "voyage-lite-01-instruct"
+ "Model": "nb-bert-large"
}
]
}
},
- "bm25": {
+ "e5-large-v2": {
"BitextMining": {
"f1": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2",
+ "BiorxivClusteringP2P": 36.72,
+ "BiorxivClusteringS2S": 35.47,
+ "MedrxivClusteringP2P": 31.45,
+ "MedrxivClusteringS2S": 29.91,
+ "RedditClustering": 55.5,
+ "RedditClusteringP2P": 63.71,
+ "StackExchangeClustering": 65.23,
+ "StackExchangeClusteringP2P": 33.62,
+ "TwentyNewsgroupsClustering": 48.73
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"Reranking": {
"map": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bm25",
- "BrightRetrieval (robotics)": 13.53,
- "BrightRetrieval (pony)": 7.93,
- "BrightRetrieval (leetcode)": 24.37,
- "BrightRetrieval (earth_science)": 27.06,
- "BrightRetrieval (stackoverflow)": 16.55,
- "BrightRetrieval (economics)": 14.87,
- "BrightRetrieval (theoremqa_questions)": 9.78,
- "BrightRetrieval (theoremqa_theorems)": 4.25,
- "BrightRetrieval (psychology)": 12.51,
- "BrightRetrieval (sustainable_living)": 15.22,
- "BrightRetrieval (biology)": 19.19,
- "BrightRetrieval (aops)": 6.2
+ "Model": "e5-large-v2"
}
]
},
"STS": {
"spearman": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "bm25"
+ "Model": "e5-large-v2"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "bm25",
- "Core17InstructionRetrieval": -1.06,
- "News21InstructionRetrieval": -2.15,
- "Robust04InstructionRetrieval": -3.06
+ "Model": "e5-large-v2",
+ "Core17InstructionRetrieval": 0.12,
+ "News21InstructionRetrieval": 0.87,
+ "Robust04InstructionRetrieval": -4.16
}
]
}
},
- "nomic-embed-text-v1.5-256": {
+ "universal-sentence-encoder-multilingual-3": {
"BitextMining": {
"f1": [
{
- "Model": "nomic-embed-text-v1.5-256"
+ "Model": "universal-sentence-encoder-multilingual-3"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "AmazonCounterfactualClassification (en)": 72.94,
- "AmazonPolarityClassification": 91.35,
- "AmazonReviewsClassification (en)": 45.73,
- "Banking77Classification": 83.69,
- "EmotionClassification": 45.88,
- "ImdbClassification": 83.99,
- "MTOPDomainClassification (en)": 91.68,
- "MTOPIntentClassification (en)": 72.47,
- "MassiveIntentClassification (en)": 71.76,
- "MassiveScenarioClassification (en)": 75.67,
- "ToxicConversationsClassification": 70.87,
- "TweetSentimentExtractionClassification": 59.2
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "AmazonReviewsClassification (fr)": 33.51,
+ "MTOPDomainClassification (fr)": 85.5,
+ "MTOPIntentClassification (fr)": 53.98,
+ "MasakhaNEWSClassification (fra)": 82.06,
+ "MassiveIntentClassification (fr)": 61.19,
+ "MassiveScenarioClassification (fr)": 70.22
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "ArxivClusteringP2P": 44.82,
- "ArxivClusteringS2S": 35.32,
- "BiorxivClusteringP2P": 38.19,
- "BiorxivClusteringS2S": 31.83,
- "MedrxivClusteringP2P": 34.08,
- "MedrxivClusteringS2S": 30.98,
- "RedditClustering": 54.92,
- "RedditClusteringP2P": 60.23,
- "StackExchangeClustering": 61.81,
- "StackExchangeClusteringP2P": 34.03,
- "TwentyNewsgroupsClustering": 48.56
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "AlloProfClusteringP2P": 56.9,
+ "AlloProfClusteringS2S": 37.84,
+ "HALClusteringS2S": 18.95,
+ "MLSUMClusteringP2P": 43.9,
+ "MLSUMClusteringS2S": 35.5,
+ "MasakhaNEWSClusteringP2P (fra)": 60.57,
+ "MasakhaNEWSClusteringS2S (fra)": 40.31
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "SprintDuplicateQuestions": 92.31,
- "TwitterSemEval2015": 73.61,
- "TwitterURLCorpus": 86.34
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "OpusparcusPC (fr)": 91.46,
+ "PawsXPairClassification (fr)": 52.39
}
]
},
"Reranking": {
"map": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "AskUbuntuDupQuestions": 61.34,
- "MindSmallReranking": 30.04,
- "SciDocsRR": 79.4,
- "StackOverflowDupQuestions": 49.95
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "AlloprofReranking": 56.23,
+ "SyntecReranking": 73.85
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "ArguAna": 45.44,
- "CQADupstackRetrieval": 37.61,
- "ClimateFEVER": 39.63,
- "DBPedia": 39.42,
- "FEVER": 84.4,
- "FiQA2018": 35.0,
- "HotpotQA": 67.78,
- "MSMARCO": 41.38,
- "NFCorpus": 32.54,
- "NQ": 57.1,
- "QuoraRetrieval": 87.65,
- "SCIDOCS": 16.76,
- "SciFact": 68.24,
- "TRECCOVID": 80.65,
- "Touche2020": 28.49
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "AlloprofRetrieval": 35.27,
+ "BSARDRetrieval": 0.0,
+ "MintakaRetrieval (fr)": 26.12,
+ "SyntecRetrieval": 69.82,
+ "XPQARetrieval (fr)": 59.59
}
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "nomic-embed-text-v1.5-256",
- "BIOSSES": 81.58,
- "SICK-R": 79.24,
- "STS12": 78.16,
- "STS13": 86.01,
- "STS14": 81.25,
- "STS15": 86.51,
- "STS16": 84.24,
- "STS17 (en-en)": 86.44,
- "STS22 (en)": 65.14,
- "STSBenchmark": 84.8
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "SICKFr": 71.37,
+ "STS22 (fr)": 77.91,
+ "STSBenchmarkMultilingualSTS (fr)": 75.48
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "nomic-embed-text-v1.5-256",
- "SummEval": 30.05
+ "Model": "universal-sentence-encoder-multilingual-3",
+ "SummEvalFr": 28.21
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "nomic-embed-text-v1.5-256"
+ "Model": "universal-sentence-encoder-multilingual-3"
}
]
}
},
- "e5-mistral-7b-instruct-noinstruct": {
+ "LaBSE-ru-turbo": {
"BitextMining": {
"f1": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "Tatoeba (rus-Cyrl_eng-Latn)": 93.22
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "GeoreviewClassification (rus-Cyrl)": 46.04,
+ "HeadlineClassification (rus-Cyrl)": 69.98,
+ "InappropriatenessClassification (rus-Cyrl)": 61.39,
+ "KinopoiskClassification (rus-Cyrl)": 53.59,
+ "MassiveIntentClassification (rus-Cyrl)": 66.08,
+ "MassiveScenarioClassification (rus-Cyrl)": 71.13,
+ "RuReviewsClassification (rus-Cyrl)": 64.58,
+ "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.67,
+ "RuSciBenchOECDClassification (rus-Cyrl)": 43.58
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "GeoreviewClusteringP2P (rus-Cyrl)": 64.55,
+ "MLSUMClusteringP2P (rus-Cyrl)": 45.7,
+ "MLSUMClusteringS2S (rus-Cyrl)": 42.93,
+ "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.64,
+ "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "OpusparcusPC (rus-Cyrl)": 89.32,
+ "TERRa (rus-Cyrl)": 57.81
}
]
},
"Reranking": {
"map": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "RuBQReranking (rus-Cyrl)": 68.65
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct",
- "ARCChallenge": 20.48,
- "AlphaNLI": 18.88,
- "HellaSwag": 32.25,
- "PIQA": 32.8,
- "Quail": 6.25,
- "RARbCode": 79.84,
- "RARbMath": 76.19,
- "SIQA": 5.08,
- "SpartQA": 10.87,
- "TempReasonL1": 3.04,
- "TempReasonL2Fact": 35.63,
- "TempReasonL2Pure": 9.32,
- "TempReasonL3Fact": 30.41,
- "TempReasonL3Pure": 14.39,
- "WinoGrande": 45.18
+ "Model": "LaBSE-ru-turbo",
+ "RiaNewsRetrieval (rus-Cyrl)": 69.36,
+ "RuBQRetrieval (rus-Cyrl)": 65.71
}
]
},
"STS": {
"spearman": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo",
+ "RUParaPhraserSTS (rus-Cyrl)": 72.97,
+ "RuSTSBenchmarkSTS (rus-Cyrl)": 81.77,
+ "STS22 (rus-Cyrl)": 62.89,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "e5-mistral-7b-instruct-noinstruct"
+ "Model": "LaBSE-ru-turbo"
}
]
}
},
- "e5-base-4k": {
+ "bge-large-zh-noinstruct": {
"BitextMining": {
"f1": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct",
+ "AmazonReviewsClassification (zh)": 41.94,
+ "IFlyTek": 45.32,
+ "JDReview": 85.38,
+ "MassiveIntentClassification (zh-CN)": 66.96,
+ "MassiveScenarioClassification (zh-CN)": 73.39,
+ "MultilingualSentiment": 73.7,
+ "OnlineShopping": 91.66,
+ "TNews": 52.05,
+ "Waimai": 86.83
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct",
+ "CLSClusteringP2P": 41.23,
+ "CLSClusteringS2S": 40.04,
+ "ThuNewsClusteringP2P": 62.03,
+ "ThuNewsClusteringS2S": 56.75
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct",
+ "Cmnli": 82.17,
+ "Ocnli": 71.37
}
]
},
"Reranking": {
"map": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct",
+ "CMedQAv1": 81.72,
+ "CMedQAv2": 84.64,
+ "MMarcoReranking": 27.1,
+ "T2Reranking": 66.16
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "e5-base-4k",
- "LEMBNarrativeQARetrieval": 30.35,
- "LEMBNeedleRetrieval": 41.5,
- "LEMBPasskeyRetrieval": 67.25,
- "LEMBQMSumRetrieval": 35.6,
- "LEMBSummScreenFDRetrieval": 95.23,
- "LEMBWikimQARetrieval": 69.19
+ "Model": "bge-large-zh-noinstruct",
+ "CmedqaRetrieval": 41.03,
+ "CovidRetrieval": 75.07,
+ "DuRetrieval": 84.68,
+ "EcomRetrieval": 65.6,
+ "MMarcoRetrieval": 81.38,
+ "MedicalRetrieval": 58.28,
+ "T2Retrieval": 84.39,
+ "VideoRetrieval": 73.93
}
]
},
"STS": {
"spearman": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct",
+ "AFQMC": 43.06,
+ "ATEC": 48.29,
+ "BQ": 60.53,
+ "LCQMC": 74.71,
+ "PAWSX": 16.64,
+ "QBQTC": 35.2,
+ "STS22 (zh)": 67.19,
+ "STSB": 78.41
}
]
},
"Summarization": {
"spearman": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct"
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "e5-base-4k"
+ "Model": "bge-large-zh-noinstruct"
}
]
}
},
- "sentence-t5-xl": {
+ "bm25s": {
"BitextMining": {
- "f1": [
- {
- "Model": "sentence-t5-xl",
- "BUCC (de-en)": 95.04,
- "BUCC (fr-en)": 94.96,
- "BUCC (ru-en)": 8.33,
- "BUCC (zh-en)": 1.3,
- "Tatoeba (afr-eng)": 41.84,
- "Tatoeba (amh-eng)": 0.03,
- "Tatoeba (ang-eng)": 37.87,
- "Tatoeba (ara-eng)": 0.61,
- "Tatoeba (arq-eng)": 0.74,
- "Tatoeba (arz-eng)": 0.42,
- "Tatoeba (ast-eng)": 65.41,
- "Tatoeba (awa-eng)": 1.46,
- "Tatoeba (aze-eng)": 8.79,
- "Tatoeba (bel-eng)": 5.76,
- "Tatoeba (ben-eng)": 0.01,
- "Tatoeba (ber-eng)": 5.92,
- "Tatoeba (bos-eng)": 16.12,
- "Tatoeba (bre-eng)": 6.12,
- "Tatoeba (bul-eng)": 9.06,
- "Tatoeba (cat-eng)": 57.4,
- "Tatoeba (cbk-eng)": 57.68,
- "Tatoeba (ceb-eng)": 12.56,
- "Tatoeba (ces-eng)": 9.47,
- "Tatoeba (cha-eng)": 27.13,
- "Tatoeba (cmn-eng)": 1.82,
- "Tatoeba (cor-eng)": 3.87,
- "Tatoeba (csb-eng)": 14.41,
- "Tatoeba (cym-eng)": 6.69,
- "Tatoeba (dan-eng)": 54.87,
- "Tatoeba (deu-eng)": 93.72,
- "Tatoeba (dsb-eng)": 14.74,
- "Tatoeba (dtp-eng)": 5.84,
- "Tatoeba (ell-eng)": 0.6,
- "Tatoeba (epo-eng)": 30.8,
- "Tatoeba (est-eng)": 5.39,
- "Tatoeba (eus-eng)": 11.9,
- "Tatoeba (fao-eng)": 28.08,
- "Tatoeba (fin-eng)": 6.81,
- "Tatoeba (fra-eng)": 85.29,
- "Tatoeba (fry-eng)": 38.68,
- "Tatoeba (gla-eng)": 2.96,
- "Tatoeba (gle-eng)": 3.74,
- "Tatoeba (glg-eng)": 70.0,
- "Tatoeba (gsw-eng)": 30.49,
- "Tatoeba (heb-eng)": 0.87,
- "Tatoeba (hin-eng)": 0.1,
- "Tatoeba (hrv-eng)": 17.43,
- "Tatoeba (hsb-eng)": 14.69,
- "Tatoeba (hun-eng)": 7.28,
- "Tatoeba (hye-eng)": 0.77,
- "Tatoeba (ido-eng)": 46.65,
- "Tatoeba (ile-eng)": 59.43,
- "Tatoeba (ina-eng)": 82.71,
- "Tatoeba (ind-eng)": 37.26,
- "Tatoeba (isl-eng)": 11.21,
- "Tatoeba (ita-eng)": 79.77,
- "Tatoeba (jav-eng)": 7.81,
- "Tatoeba (jpn-eng)": 0.91,
- "Tatoeba (kab-eng)": 2.23,
- "Tatoeba (kat-eng)": 1.48,
- "Tatoeba (kaz-eng)": 1.77,
- "Tatoeba (khm-eng)": 0.38,
- "Tatoeba (kor-eng)": 1.96,
- "Tatoeba (kur-eng)": 12.11,
- "Tatoeba (kzj-eng)": 6.13,
- "Tatoeba (lat-eng)": 27.84,
- "Tatoeba (lfn-eng)": 45.89,
- "Tatoeba (lit-eng)": 5.94,
- "Tatoeba (lvs-eng)": 8.11,
- "Tatoeba (mal-eng)": 0.59,
- "Tatoeba (mar-eng)": 0.03,
- "Tatoeba (max-eng)": 21.7,
- "Tatoeba (mhr-eng)": 0.68,
- "Tatoeba (mkd-eng)": 5.92,
- "Tatoeba (mon-eng)": 2.39,
- "Tatoeba (nds-eng)": 45.04,
- "Tatoeba (nld-eng)": 64.75,
- "Tatoeba (nno-eng)": 36.74,
- "Tatoeba (nob-eng)": 54.77,
- "Tatoeba (nov-eng)": 57.12,
- "Tatoeba (oci-eng)": 34.39,
- "Tatoeba (orv-eng)": 2.04,
- "Tatoeba (pam-eng)": 8.34,
- "Tatoeba (pes-eng)": 0.87,
- "Tatoeba (pms-eng)": 38.06,
- "Tatoeba (pol-eng)": 28.35,
- "Tatoeba (por-eng)": 83.61,
- "Tatoeba (ron-eng)": 65.27,
- "Tatoeba (rus-eng)": 30.42,
- "Tatoeba (slk-eng)": 13.19,
- "Tatoeba (slv-eng)": 13.49,
- "Tatoeba (spa-eng)": 89.18,
- "Tatoeba (sqi-eng)": 14.66,
- "Tatoeba (srp-eng)": 13.24,
- "Tatoeba (swe-eng)": 60.67,
- "Tatoeba (swg-eng)": 34.76,
- "Tatoeba (swh-eng)": 8.07,
- "Tatoeba (tam-eng)": 0.36,
- "Tatoeba (tat-eng)": 1.46,
- "Tatoeba (tel-eng)": 0.67,
- "Tatoeba (tgl-eng)": 25.22,
- "Tatoeba (tha-eng)": 1.58,
- "Tatoeba (tuk-eng)": 4.99,
- "Tatoeba (tur-eng)": 7.72,
- "Tatoeba (tzl-eng)": 38.49,
- "Tatoeba (uig-eng)": 0.87,
- "Tatoeba (ukr-eng)": 9.12,
- "Tatoeba (urd-eng)": 0.0,
- "Tatoeba (uzb-eng)": 5.48,
- "Tatoeba (vie-eng)": 8.45,
- "Tatoeba (war-eng)": 13.75,
- "Tatoeba (wuu-eng)": 1.44,
- "Tatoeba (xho-eng)": 9.15,
- "Tatoeba (yid-eng)": 0.28,
- "Tatoeba (yue-eng)": 0.98,
- "Tatoeba (zsm-eng)": 35.71
+ "f1": [
+ {
+ "Model": "bm25s"
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "sentence-t5-xl",
- "AmazonCounterfactualClassification (de)": 67.01,
- "AmazonCounterfactualClassification (en)": 76.01,
- "AmazonCounterfactualClassification (en-ext)": 77.29,
- "AmazonCounterfactualClassification (ja)": 45.61,
- "AmazonPolarityClassification": 93.17,
- "AmazonReviewsClassification (de)": 44.05,
- "AmazonReviewsClassification (en)": 48.18,
- "AmazonReviewsClassification (es)": 45.01,
- "AmazonReviewsClassification (fr)": 43.52,
- "AmazonReviewsClassification (ja)": 22.23,
- "AmazonReviewsClassification (zh)": 21.88,
- "Banking77Classification": 80.88,
- "EmotionClassification": 51.95,
- "ImdbClassification": 87.54,
- "MTOPDomainClassification (de)": 83.28,
- "MTOPDomainClassification (en)": 90.73,
- "MTOPDomainClassification (es)": 85.32,
- "MTOPDomainClassification (fr)": 85.14,
- "MTOPDomainClassification (hi)": 20.85,
- "MTOPDomainClassification (th)": 15.62,
- "MTOPIntentClassification (de)": 54.65,
- "MTOPIntentClassification (en)": 68.15,
- "MTOPIntentClassification (es)": 57.38,
- "MTOPIntentClassification (fr)": 54.39,
- "MTOPIntentClassification (hi)": 3.28,
- "MTOPIntentClassification (th)": 5.08,
- "MasakhaNEWSClassification (fra)": 80.09,
- "MassiveIntentClassification (af)": 40.17,
- "MassiveIntentClassification (am)": 2.18,
- "MassiveIntentClassification (ar)": 4.18,
- "MassiveIntentClassification (az)": 30.02,
- "MassiveIntentClassification (bn)": 2.6,
- "MassiveIntentClassification (cy)": 29.15,
- "MassiveIntentClassification (da)": 47.69,
- "MassiveIntentClassification (de)": 57.43,
- "MassiveIntentClassification (el)": 9.96,
- "MassiveIntentClassification (en)": 72.09,
- "MassiveIntentClassification (es)": 57.97,
- "MassiveIntentClassification (fa)": 3.6,
- "MassiveIntentClassification (fi)": 34.02,
- "MassiveIntentClassification (fr)": 60.99,
- "MassiveIntentClassification (he)": 2.51,
- "MassiveIntentClassification (hi)": 3.02,
- "MassiveIntentClassification (hu)": 31.66,
- "MassiveIntentClassification (hy)": 3.32,
- "MassiveIntentClassification (id)": 41.53,
- "MassiveIntentClassification (is)": 30.25,
- "MassiveIntentClassification (it)": 56.57,
- "MassiveIntentClassification (ja)": 3.5,
- "MassiveIntentClassification (jv)": 31.67,
- "MassiveIntentClassification (ka)": 2.79,
- "MassiveIntentClassification (km)": 5.43,
- "MassiveIntentClassification (kn)": 2.79,
- "MassiveIntentClassification (ko)": 2.67,
- "MassiveIntentClassification (lv)": 34.25,
- "MassiveIntentClassification (ml)": 2.98,
- "MassiveIntentClassification (mn)": 20.99,
- "MassiveIntentClassification (ms)": 37.43,
- "MassiveIntentClassification (my)": 4.02,
- "MassiveIntentClassification (nb)": 45.91,
- "MassiveIntentClassification (nl)": 50.51,
- "MassiveIntentClassification (pl)": 43.95,
- "MassiveIntentClassification (pt)": 57.95,
- "MassiveIntentClassification (ro)": 49.37,
- "MassiveIntentClassification (ru)": 33.46,
- "MassiveIntentClassification (sl)": 36.33,
- "MassiveIntentClassification (sq)": 37.65,
- "MassiveIntentClassification (sv)": 46.35,
- "MassiveIntentClassification (sw)": 30.6,
- "MassiveIntentClassification (ta)": 1.79,
- "MassiveIntentClassification (te)": 2.26,
- "MassiveIntentClassification (th)": 4.02,
- "MassiveIntentClassification (tl)": 38.92,
- "MassiveIntentClassification (tr)": 32.05,
- "MassiveIntentClassification (ur)": 2.7,
- "MassiveIntentClassification (vi)": 21.47,
- "MassiveIntentClassification (zh-CN)": 0.59,
- "MassiveIntentClassification (zh-TW)": 3.24,
- "MassiveScenarioClassification (af)": 50.81,
- "MassiveScenarioClassification (am)": 6.95,
- "MassiveScenarioClassification (ar)": 12.32,
- "MassiveScenarioClassification (az)": 38.79,
- "MassiveScenarioClassification (bn)": 8.0,
- "MassiveScenarioClassification (cy)": 33.91,
- "MassiveScenarioClassification (da)": 55.79,
- "MassiveScenarioClassification (de)": 65.33,
- "MassiveScenarioClassification (el)": 16.89,
- "MassiveScenarioClassification (en)": 73.26,
- "MassiveScenarioClassification (es)": 62.52,
- "MassiveScenarioClassification (fa)": 6.08,
- "MassiveScenarioClassification (fi)": 43.34,
- "MassiveScenarioClassification (fr)": 66.42,
- "MassiveScenarioClassification (he)": 7.55,
- "MassiveScenarioClassification (hi)": 7.44,
- "MassiveScenarioClassification (hu)": 40.85,
- "MassiveScenarioClassification (hy)": 9.25,
- "MassiveScenarioClassification (id)": 51.92,
- "MassiveScenarioClassification (is)": 40.09,
- "MassiveScenarioClassification (it)": 62.94,
- "MassiveScenarioClassification (ja)": 7.9,
- "MassiveScenarioClassification (jv)": 41.33,
- "MassiveScenarioClassification (ka)": 7.76,
- "MassiveScenarioClassification (km)": 9.19,
- "MassiveScenarioClassification (kn)": 8.36,
- "MassiveScenarioClassification (ko)": 6.13,
- "MassiveScenarioClassification (lv)": 40.7,
- "MassiveScenarioClassification (ml)": 6.98,
- "MassiveScenarioClassification (mn)": 27.0,
- "MassiveScenarioClassification (ms)": 46.9,
- "MassiveScenarioClassification (my)": 9.55,
- "MassiveScenarioClassification (nb)": 53.43,
- "MassiveScenarioClassification (nl)": 59.65,
- "MassiveScenarioClassification (pl)": 49.87,
- "MassiveScenarioClassification (pt)": 62.18,
- "MassiveScenarioClassification (ro)": 58.22,
- "MassiveScenarioClassification (ru)": 40.73,
- "MassiveScenarioClassification (sl)": 43.66,
- "MassiveScenarioClassification (sq)": 49.25,
- "MassiveScenarioClassification (sv)": 57.17,
- "MassiveScenarioClassification (sw)": 40.55,
- "MassiveScenarioClassification (ta)": 7.46,
- "MassiveScenarioClassification (te)": 7.03,
- "MassiveScenarioClassification (th)": 8.52,
- "MassiveScenarioClassification (tl)": 51.74,
- "MassiveScenarioClassification (tr)": 43.01,
- "MassiveScenarioClassification (ur)": 9.61,
- "MassiveScenarioClassification (vi)": 28.91,
- "MassiveScenarioClassification (zh-CN)": 5.86,
- "MassiveScenarioClassification (zh-TW)": 7.14,
- "ToxicConversationsClassification": 70.95,
- "TweetSentimentExtractionClassification": 61.21
+ "Model": "bm25s"
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "sentence-t5-xl",
- "AlloProfClusteringP2P": 60.37,
- "AlloProfClusteringS2S": 40.76,
- "ArxivClusteringP2P": 41.62,
- "ArxivClusteringS2S": 31.17,
- "BiorxivClusteringP2P": 36.43,
- "BiorxivClusteringS2S": 26.47,
- "HALClusteringS2S": 20.28,
- "MLSUMClusteringP2P": 41.61,
- "MLSUMClusteringS2S": 33.6,
- "MasakhaNEWSClusteringP2P (fra)": 62.82,
- "MasakhaNEWSClusteringS2S (fra)": 31.74,
- "MedrxivClusteringP2P": 32.3,
- "MedrxivClusteringS2S": 26.93,
- "RedditClustering": 57.03,
- "RedditClusteringP2P": 62.34,
- "StackExchangeClustering": 67.13,
- "StackExchangeClusteringP2P": 34.79,
- "TwentyNewsgroupsClustering": 49.53
+ "Model": "bm25s"
}
]
},
"PairClassification": {
"ap": [
{
- "Model": "sentence-t5-xl",
- "OpusparcusPC (fr)": 92.48,
- "PawsXPairClassification (fr)": 62.52,
- "SprintDuplicateQuestions": 91.44,
- "TwitterSemEval2015": 80.89,
- "TwitterURLCorpus": 85.86
+ "Model": "bm25s"
}
]
},
"Reranking": {
"map": [
{
- "Model": "sentence-t5-xl",
- "AlloprofReranking": 63.3,
- "AskUbuntuDupQuestions": 62.86,
- "MindSmallReranking": 29.77,
- "SciDocsRR": 75.16,
- "StackOverflowDupQuestions": 51.05,
- "SyntecReranking": 83.07
+ "Model": "bm25s"
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "bm25s",
+ "ArguAna": 49.28,
+ "CQADupstackRetrieval": 31.86,
+ "ClimateFEVER": 13.62,
+ "DBPedia": 29.91,
+ "FEVER": 48.09,
+ "FiQA2018": 25.14,
+ "HotpotQA": 56.91,
+ "MSMARCO": 21.89,
+ "NFCorpus": 32.08,
+ "NQ": 28.5,
+ "QuoraRetrieval": 80.42,
+ "SCIDOCS": 15.78,
+ "SciFact": 68.7,
+ "TRECCOVID": 62.31,
+ "Touche2020": 33.05
+ }
+ ]
+ },
+ "STS": {
+ "spearman": [
+ {
+ "Model": "bm25s"
+ }
+ ]
+ },
+ "Summarization": {
+ "spearman": [
+ {
+ "Model": "bm25s"
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "bm25s"
+ }
+ ]
+ }
+ },
+ "GritLM-7B": {
+ "BitextMining": {
+ "f1": []
+ },
+ "Classification": {
+ "accuracy": []
+ },
+ "Clustering": {
+ "v_measure": []
+ },
+ "PairClassification": {
+ "ap": []
+ },
+ "Reranking": {
+ "map": []
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "GritLM-7B",
+ "BrightRetrieval (pony)": 21.98,
+ "BrightRetrieval (robotics)": 17.31,
+ "BrightRetrieval (economics)": 19.0,
+ "BrightRetrieval (theoremqa_questions)": 23.34,
+ "BrightRetrieval (leetcode)": 29.85,
+ "BrightRetrieval (earth_science)": 32.77,
+ "BrightRetrieval (stackoverflow)": 11.62,
+ "BrightRetrieval (sustainable_living)": 18.04,
+ "BrightRetrieval (biology)": 25.04,
+ "BrightRetrieval (psychology)": 19.92,
+ "BrightRetrieval (theoremqa_theorems)": 19.75,
+ "BrightRetrieval (aops)": 8.91
+ }
+ ],
+ "recall_at_1": [
+ {
+ "Model": "GritLM-7B",
+ "BrightRetrieval (biology)": 37.46,
+ "BrightRetrieval (robotics)": 17.82,
+ "BrightRetrieval (pony)": 0.0,
+ "BrightRetrieval (sustainable_living)": 32.36,
+ "BrightRetrieval (psychology)": 35.35,
+ "BrightRetrieval (stackoverflow)": 20.08,
+ "BrightRetrieval (earth_science)": 39.44,
+ "BrightRetrieval (economics)": 25.73
+ }
+ ]
+ },
+ "STS": {
+ "spearman": []
+ },
+ "Summarization": {
+ "spearman": []
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "GritLM-7B",
+ "Core17InstructionRetrieval": 2.62,
+ "News21InstructionRetrieval": -1.01,
+ "Robust04InstructionRetrieval": -1.68
}
]
+ }
+ },
+ "bm25": {
+ "BitextMining": {
+ "f1": []
+ },
+ "Classification": {
+ "accuracy": []
+ },
+ "Clustering": {
+ "v_measure": []
+ },
+ "PairClassification": {
+ "ap": []
+ },
+ "Reranking": {
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "sentence-t5-xl",
- "AlloprofRetrieval": 40.38,
- "ArguAna": 39.4,
- "BSARDRetrieval": 0.14,
- "CQADupstackRetrieval": 40.78,
- "ClimateFEVER": 10.61,
- "DBPedia": 33.65,
- "FEVER": 36.12,
- "FiQA2018": 44.71,
- "HotpotQA": 37.17,
- "MSMARCO": 25.17,
- "MintakaRetrieval (fr)": 31.54,
- "NFCorpus": 33.18,
- "NQ": 46.29,
- "QuoraRetrieval": 85.85,
- "SCIDOCS": 15.97,
- "SciFact": 50.91,
- "SyntecRetrieval": 74.24,
- "TRECCOVID": 54.77,
- "Touche2020": 22.51,
- "XPQARetrieval (fr)": 52.14
+ "Model": "bm25",
+ "BrightRetrieval (robotics)": 13.53,
+ "BrightRetrieval (pony)": 7.93,
+ "BrightRetrieval (leetcode)": 24.37,
+ "BrightRetrieval (earth_science)": 27.06,
+ "BrightRetrieval (stackoverflow)": 16.55,
+ "BrightRetrieval (economics)": 14.87,
+ "BrightRetrieval (theoremqa_questions)": 9.78,
+ "BrightRetrieval (theoremqa_theorems)": 4.75,
+ "BrightRetrieval (psychology)": 12.51,
+ "BrightRetrieval (sustainable_living)": 15.22,
+ "BrightRetrieval (biology)": 19.19,
+ "BrightRetrieval (aops)": 6.2
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "sentence-t5-xl",
- "BIOSSES": 73.12,
- "SICK-R": 79.98,
- "SICKFr": 75.08,
- "STS12": 79.02,
- "STS13": 88.8,
- "STS14": 84.33,
- "STS15": 88.89,
- "STS16": 85.31,
- "STS17 (ar-ar)": 11.13,
- "STS17 (en-ar)": -3.93,
- "STS17 (en-de)": 79.04,
- "STS17 (en-en)": 88.91,
- "STS17 (en-tr)": 13.61,
- "STS17 (es-en)": 71.72,
- "STS17 (es-es)": 83.42,
- "STS17 (fr-en)": 71.38,
- "STS17 (it-en)": 69.5,
- "STS17 (ko-ko)": 9.61,
- "STS17 (nl-en)": 66.12,
- "STS22 (ar)": 29.6,
- "STS22 (de)": 47.72,
- "STS22 (de-en)": 49.64,
- "STS22 (de-fr)": 62.21,
- "STS22 (de-pl)": 34.34,
- "STS22 (en)": 64.32,
- "STS22 (es)": 58.16,
- "STS22 (es-en)": 69.15,
- "STS22 (es-it)": 65.26,
- "STS22 (fr)": 77.49,
- "STS22 (fr-pl)": 50.71,
- "STS22 (it)": 66.91,
- "STS22 (pl)": 27.04,
- "STS22 (pl-en)": 58.85,
- "STS22 (ru)": 26.63,
- "STS22 (tr)": 43.36,
- "STS22 (zh)": 33.55,
- "STS22 (zh-en)": 29.0,
- "STSBenchmark": 83.93,
- "STSBenchmarkMultilingualSTS (fr)": 79.42
+ "Model": "bm25",
+ "BrightRetrieval (robotics)": 7.43,
+ "BrightRetrieval (pony)": 5.35,
+ "BrightRetrieval (biology)": 10.68,
+ "BrightRetrieval (stackoverflow)": 22.22,
+ "BrightRetrieval (earth_science)": 15.37,
+ "BrightRetrieval (psychology)": 8.42,
+ "BrightRetrieval (sustainable_living)": 10.68,
+ "BrightRetrieval (economics)": 10.68
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "sentence-t5-xl",
- "SummEval": 29.91,
- "SummEvalFr": 31.59
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "sentence-t5-xl"
+ "Model": "bm25",
+ "Core17InstructionRetrieval": -1.06,
+ "News21InstructionRetrieval": -2.15,
+ "Robust04InstructionRetrieval": -3.06
}
]
}
},
- "Cohere-embed-english-v3.0": {
+ "gte-Qwen1.5-7B-instruct": {
"BitextMining": {
- "f1": [
- {
- "Model": "Cohere-embed-english-v3.0"
- }
- ]
+ "f1": []
},
"Classification": {
"accuracy": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AmazonCounterfactualClassification (en)": 83.16,
+ "AmazonPolarityClassification": 96.7,
+ "AmazonReviewsClassification (en)": 62.17,
+ "AmazonReviewsClassification (zh)": 52.95,
+ "Banking77Classification": 81.68,
+ "EmotionClassification": 54.53,
+ "IFlyTek": 53.77,
+ "ImdbClassification": 95.58,
+ "JDReview": 88.2,
+ "MTOPDomainClassification (en)": 95.75,
+ "MTOPIntentClassification (en)": 84.26,
+ "MassiveIntentClassification (zh-CN)": 76.25,
+ "MassiveIntentClassification (en)": 78.47,
+ "MassiveScenarioClassification (en)": 78.19,
+ "MassiveScenarioClassification (zh-CN)": 77.26,
+ "MultilingualSentiment": 77.42,
+ "OnlineShopping": 94.48,
+ "TNews": 51.24,
+ "ToxicConversationsClassification": 78.75,
+ "TweetSentimentExtractionClassification": 66.0,
+ "Waimai": 88.63
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "ArxivClusteringP2P": 56.4,
+ "ArxivClusteringS2S": 51.45,
+ "BiorxivClusteringP2P": 49.01,
+ "BiorxivClusteringS2S": 45.06,
+ "CLSClusteringP2P": 47.21,
+ "CLSClusteringS2S": 45.79,
+ "MedrxivClusteringP2P": 44.37,
+ "MedrxivClusteringS2S": 42.0,
+ "RedditClustering": 73.37,
+ "RedditClusteringP2P": 72.51,
+ "StackExchangeClustering": 79.07,
+ "StackExchangeClusteringP2P": 49.57,
+ "ThuNewsClusteringP2P": 87.43,
+ "ThuNewsClusteringS2S": 87.9,
+ "TwentyNewsgroupsClustering": 51.31
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cos_sim_ap": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "Cmnli": 91.81,
+ "Ocnli": 85.22,
+ "SprintDuplicateQuestions": 95.99,
+ "TwitterSemEval2015": 79.36,
+ "TwitterURLCorpus": 86.79
}
]
},
"Reranking": {
"map": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AskUbuntuDupQuestions": 66.0,
+ "CMedQAv1": 86.37,
+ "CMedQAv2": 87.41,
+ "MindSmallReranking": 32.71,
+ "SciDocsRR": 87.89,
+ "StackOverflowDupQuestions": 53.93,
+ "T2Reranking": 68.11
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "Cohere-embed-english-v3.0",
- "AILACasedocs": 31.54,
- "AILAStatutes": 27.15,
- "ARCChallenge": 9.89,
- "AlphaNLI": 15.1,
- "BrightRetrieval (psychology)": 21.82,
- "BrightRetrieval (economics)": 20.18,
- "BrightRetrieval (robotics)": 16.21,
- "BrightRetrieval (biology)": 18.98,
- "BrightRetrieval (stackoverflow)": 16.47,
- "BrightRetrieval (theoremqa_theorems)": 6.04,
- "BrightRetrieval (pony)": 1.77,
- "BrightRetrieval (sustainable_living)": 17.69,
- "BrightRetrieval (aops)": 6.46,
- "BrightRetrieval (theoremqa_questions)": 15.07,
- "BrightRetrieval (leetcode)": 26.78,
- "BrightRetrieval (earth_science)": 27.45,
- "GerDaLIRSmall": 6.05,
- "HellaSwag": 26.35,
- "LeCaRDv2": 21.02,
- "LegalBenchConsumerContractsQA": 77.12,
- "LegalBenchCorporateLobbying": 93.68,
- "LegalQuAD": 26.08,
- "LegalSummarization": 61.7,
- "PIQA": 28.49,
- "Quail": 4.1,
- "RARbCode": 57.19,
- "RARbMath": 72.26,
- "SIQA": 4.26,
- "SpartQA": 3.75,
- "TempReasonL1": 1.5,
- "TempReasonL2Fact": 35.91,
- "TempReasonL2Pure": 1.89,
- "TempReasonL3Fact": 27.51,
- "TempReasonL3Pure": 8.53,
- "WinoGrande": 58.01
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "ArguAna": 62.65,
+ "BrightRetrieval (stackoverflow)": 19.85,
+ "BrightRetrieval (earth_science)": 36.22,
+ "BrightRetrieval (leetcode)": 25.46,
+ "BrightRetrieval (theoremqa_questions)": 26.97,
+ "BrightRetrieval (economics)": 17.72,
+ "BrightRetrieval (robotics)": 13.47,
+ "BrightRetrieval (pony)": 9.79,
+ "BrightRetrieval (aops)": 14.36,
+ "BrightRetrieval (psychology)": 24.61,
+ "BrightRetrieval (theoremqa_theorems)": 30.8,
+ "BrightRetrieval (biology)": 30.92,
+ "BrightRetrieval (sustainable_living)": 14.93,
+ "CQADupstackRetrieval": 40.64,
+ "ClimateFEVER": 44.0,
+ "CmedqaRetrieval": 43.47,
+ "CovidRetrieval": 80.87,
+ "DBPedia": 48.04,
+ "DuRetrieval": 86.01,
+ "EcomRetrieval": 66.46,
+ "FEVER": 93.35,
+ "FiQA2018": 55.31,
+ "HotpotQA": 72.25,
+ "MMarcoRetrieval": 73.83,
+ "MSMARCO": 41.68,
+ "MedicalRetrieval": 61.33,
+ "NFCorpus": 38.25,
+ "NQ": 61.79,
+ "QuoraRetrieval": 89.61,
+ "SCIDOCS": 27.69,
+ "SciFact": 75.31,
+ "T2Retrieval": 83.58,
+ "TRECCOVID": 72.72,
+ "Touche2020": 20.3,
+ "VideoRetrieval": 69.41
+ }
+ ],
+ "recall_at_1": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "BrightRetrieval (economics)": 25.73,
+ "BrightRetrieval (pony)": 1.32,
+ "BrightRetrieval (robotics)": 21.29,
+ "BrightRetrieval (biology)": 39.24,
+ "BrightRetrieval (earth_science)": 36.13,
+ "BrightRetrieval (stackoverflow)": 23.5,
+ "BrightRetrieval (psychology)": 42.28,
+ "BrightRetrieval (sustainable_living)": 33.1
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "AFQMC": 58.47,
+ "ATEC": 55.46,
+ "BIOSSES": 81.12,
+ "BQ": 77.59,
+ "LCQMC": 76.29,
+ "PAWSX": 50.22,
+ "QBQTC": 31.82,
+ "SICK-R": 79.15,
+ "STS12": 76.52,
+ "STS13": 88.63,
+ "STS14": 83.32,
+ "STS15": 87.5,
+ "STS16": 86.39,
+ "STS17 (en-en)": 87.79,
+ "STS22 (en)": 66.4,
+ "STS22 (zh)": 67.36,
+ "STSB": 81.37,
+ "STSBenchmark": 87.35
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "Cohere-embed-english-v3.0"
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "SummEval": 31.46
}
]
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "Cohere-embed-english-v3.0",
- "Core17InstructionRetrieval": 2.8,
- "News21InstructionRetrieval": 0.2,
- "Robust04InstructionRetrieval": -3.63
- }
- ]
+ "p-MRR": []
}
},
- "sentence-camembert-base": {
+ "voyage-large-2-instruct": {
"BitextMining": {
- "f1": [
- {
- "Model": "sentence-camembert-base"
- }
- ]
+ "f1": []
},
"Classification": {
"accuracy": [
{
- "Model": "sentence-camembert-base",
- "AmazonReviewsClassification (fr)": 36.03,
- "MTOPDomainClassification (fr)": 77.1,
- "MTOPIntentClassification (fr)": 43.44,
- "MasakhaNEWSClassification (fra)": 70.36,
- "MassiveIntentClassification (fr)": 51.59,
- "MassiveScenarioClassification (fr)": 61.28
+ "Model": "voyage-large-2-instruct",
+ "AmazonCounterfactualClassification (en)": 77.6,
+ "AmazonPolarityClassification": 96.58,
+ "AmazonReviewsClassification (en)": 50.77,
+ "Banking77Classification": 86.96,
+ "EmotionClassification": 59.81,
+ "ImdbClassification": 96.13,
+ "MTOPDomainClassification (en)": 98.86,
+ "MTOPIntentClassification (en)": 86.97,
+ "MassiveIntentClassification (en)": 81.08,
+ "MassiveScenarioClassification (en)": 87.95,
+ "ToxicConversationsClassification": 83.58,
+ "TweetSentimentExtractionClassification": 71.55
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "sentence-camembert-base",
- "AlloProfClusteringP2P": 59.09,
- "AlloProfClusteringS2S": 38.92,
- "HALClusteringS2S": 20.22,
- "MLSUMClusteringP2P": 35.98,
- "MLSUMClusteringS2S": 27.05,
- "MasakhaNEWSClusteringP2P (fra)": 36.03,
- "MasakhaNEWSClusteringS2S (fra)": 30.77
+ "Model": "voyage-large-2-instruct",
+ "ArxivClusteringP2P": 51.81,
+ "ArxivClusteringS2S": 44.73,
+ "BiorxivClusteringP2P": 46.07,
+ "BiorxivClusteringS2S": 40.64,
+ "MedrxivClusteringP2P": 42.94,
+ "MedrxivClusteringS2S": 41.44,
+ "RedditClustering": 68.5,
+ "RedditClusteringP2P": 64.86,
+ "StackExchangeClustering": 74.16,
+ "StackExchangeClusteringP2P": 45.1,
+ "TwentyNewsgroupsClustering": 66.62
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cos_sim_ap": [
{
- "Model": "sentence-camembert-base",
- "OpusparcusPC (fr)": 92.05,
- "PawsXPairClassification (fr)": 57.44
+ "Model": "voyage-large-2-instruct",
+ "SprintDuplicateQuestions": 94.5,
+ "TwitterSemEval2015": 86.32,
+ "TwitterURLCorpus": 86.9
}
]
},
"Reranking": {
"map": [
{
- "Model": "sentence-camembert-base",
- "AlloprofReranking": 48.68,
- "SyntecReranking": 79.75
+ "Model": "voyage-large-2-instruct",
+ "AskUbuntuDupQuestions": 64.92,
+ "MindSmallReranking": 30.97,
+ "SciDocsRR": 89.34,
+ "StackOverflowDupQuestions": 55.11
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "sentence-camembert-base",
- "AlloprofRetrieval": 21.94,
- "BSARDRetrieval": 0.0,
- "MintakaRetrieval (fr)": 13.36,
- "SyntecRetrieval": 68.62,
- "XPQARetrieval (fr)": 57.92
+ "Model": "voyage-large-2-instruct",
+ "ArguAna": 64.06,
+ "BrightRetrieval (theoremqa_questions)": 26.06,
+ "BrightRetrieval (earth_science)": 25.09,
+ "BrightRetrieval (leetcode)": 30.6,
+ "BrightRetrieval (economics)": 19.85,
+ "BrightRetrieval (robotics)": 11.21,
+ "BrightRetrieval (psychology)": 24.79,
+ "BrightRetrieval (aops)": 7.45,
+ "BrightRetrieval (sustainable_living)": 15.58,
+ "BrightRetrieval (pony)": 1.48,
+ "BrightRetrieval (theoremqa_theorems)": 11.1,
+ "BrightRetrieval (biology)": 23.55,
+ "BrightRetrieval (stackoverflow)": 15.03,
+ "CQADupstackRetrieval": 46.6,
+ "ClimateFEVER": 32.65,
+ "DBPedia": 46.03,
+ "FEVER": 91.47,
+ "FiQA2018": 59.76,
+ "HotpotQA": 70.86,
+ "MSMARCO": 40.6,
+ "NFCorpus": 40.32,
+ "NQ": 65.92,
+ "QuoraRetrieval": 87.4,
+ "SCIDOCS": 24.32,
+ "SciFact": 79.99,
+ "TRECCOVID": 85.07,
+ "Touche2020": 39.16
+ }
+ ],
+ "recall_at_1": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "BrightRetrieval (psychology)": 41.58,
+ "BrightRetrieval (robotics)": 12.87,
+ "BrightRetrieval (biology)": 34.38,
+ "BrightRetrieval (stackoverflow)": 13.68,
+ "BrightRetrieval (pony)": 1.28,
+ "BrightRetrieval (economics)": 26.7,
+ "BrightRetrieval (sustainable_living)": 31.1,
+ "BrightRetrieval (earth_science)": 35.35
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "sentence-camembert-base",
- "SICKFr": 74.18,
- "STS22 (fr)": 77.54,
- "STSBenchmarkMultilingualSTS (fr)": 81.64
+ "Model": "voyage-large-2-instruct",
+ "BIOSSES": 89.24,
+ "SICK-R": 83.16,
+ "STS12": 73.34,
+ "STS13": 88.49,
+ "STS14": 86.49,
+ "STS15": 91.13,
+ "STS16": 85.68,
+ "STS17 (en-en)": 90.06,
+ "STS22 (en)": 66.32,
+ "STSBenchmark": 89.22
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "sentence-camembert-base",
- "SummEvalFr": 28.77
+ "Model": "voyage-large-2-instruct",
+ "SummEval": 30.84
}
]
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "sentence-camembert-base"
- }
- ]
+ "p-MRR": []
}
},
- "Cohere-embed-multilingual-v3.0": {
+ "text-embedding-3-large": {
"BitextMining": {
- "f1": [
- {
- "Model": "Cohere-embed-multilingual-v3.0"
- }
- ]
+ "f1": []
},
"Classification": {
"accuracy": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "AmazonReviewsClassification (fr)": 41.89,
- "MTOPDomainClassification (fr)": 86.23,
- "MTOPIntentClassification (fr)": 61.07,
- "MasakhaNEWSClassification (fra)": 83.06,
- "MassiveIntentClassification (fr)": 62.94,
- "MassiveScenarioClassification (fr)": 67.29
+ "Model": "text-embedding-3-large",
+ "AmazonCounterfactualClassification (en)": 78.93,
+ "AmazonPolarityClassification": 92.85,
+ "AmazonReviewsClassification (en)": 48.7,
+ "Banking77Classification": 85.69,
+ "EmotionClassification": 51.58,
+ "ImdbClassification": 87.67,
+ "MTOPDomainClassification (en)": 95.36,
+ "MTOPIntentClassification (en)": 75.07,
+ "MassiveIntentClassification (en)": 74.64,
+ "MassiveScenarioClassification (en)": 79.79,
+ "ToxicConversationsClassification": 72.92,
+ "TweetSentimentExtractionClassification": 62.22
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "AlloProfClusteringP2P": 63.53,
- "AlloProfClusteringS2S": 36.18,
- "HALClusteringS2S": 19.9,
- "MLSUMClusteringP2P": 45.08,
- "MLSUMClusteringS2S": 34.75,
- "MasakhaNEWSClusteringP2P (fra)": 53.18,
- "MasakhaNEWSClusteringS2S (fra)": 32.31
+ "Model": "text-embedding-3-large",
+ "ArxivClusteringP2P": 49.01,
+ "ArxivClusteringS2S": 44.45,
+ "BiorxivClusteringP2P": 38.03,
+ "BiorxivClusteringS2S": 36.53,
+ "MedrxivClusteringP2P": 32.7,
+ "MedrxivClusteringS2S": 31.27,
+ "RedditClustering": 67.84,
+ "RedditClusteringP2P": 67.96,
+ "StackExchangeClustering": 76.26,
+ "StackExchangeClusteringP2P": 36.88,
+ "TwentyNewsgroupsClustering": 58.14
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cos_sim_ap": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "OpusparcusPC (fr)": 94.08,
- "PawsXPairClassification (fr)": 61.26
+ "Model": "text-embedding-3-large",
+ "SprintDuplicateQuestions": 92.25,
+ "TwitterSemEval2015": 77.13,
+ "TwitterURLCorpus": 87.78
}
]
},
"Reranking": {
"map": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "AlloprofReranking": 51.01,
- "SyntecReranking": 85.72
+ "Model": "text-embedding-3-large",
+ "AskUbuntuDupQuestions": 65.03,
+ "MindSmallReranking": 29.86,
+ "SciDocsRR": 86.66,
+ "StackOverflowDupQuestions": 55.08
+ }
+ ]
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
+ {
+ "Model": "text-embedding-3-large",
+ "AILACasedocs": 39.0,
+ "AILAStatutes": 41.31,
+ "ARCChallenge": 23.98,
+ "AlphaNLI": 37.27,
+ "ArguAna": 58.05,
+ "BrightRetrieval (theoremqa_questions)": 22.22,
+ "BrightRetrieval (leetcode)": 23.65,
+ "BrightRetrieval (earth_science)": 26.27,
+ "BrightRetrieval (psychology)": 27.52,
+ "BrightRetrieval (robotics)": 12.93,
+ "BrightRetrieval (economics)": 19.98,
+ "BrightRetrieval (stackoverflow)": 12.49,
+ "BrightRetrieval (biology)": 23.67,
+ "BrightRetrieval (theoremqa_theorems)": 10.82,
+ "BrightRetrieval (pony)": 2.45,
+ "BrightRetrieval (sustainable_living)": 20.32,
+ "BrightRetrieval (aops)": 8.45,
+ "CQADupstackRetrieval": 47.54,
+ "ClimateFEVER": 30.27,
+ "DBPedia": 44.76,
+ "FEVER": 87.94,
+ "FiQA2018": 55.0,
+ "GerDaLIRSmall": 32.77,
+ "HellaSwag": 34.12,
+ "HotpotQA": 71.58,
+ "LEMBNarrativeQARetrieval": 44.09,
+ "LEMBQMSumRetrieval": 32.49,
+ "LEMBSummScreenFDRetrieval": 84.8,
+ "LEMBWikimQARetrieval": 54.16,
+ "LeCaRDv2": 57.2,
+ "LegalBenchConsumerContractsQA": 79.39,
+ "LegalBenchCorporateLobbying": 95.09,
+ "LegalQuAD": 57.47,
+ "LegalSummarization": 71.55,
+ "MSMARCO": 40.24,
+ "NFCorpus": 42.07,
+ "NQ": 61.27,
+ "PIQA": 41.96,
+ "Quail": 10.15,
+ "QuoraRetrieval": 89.05,
+ "RARbCode": 89.64,
+ "RARbMath": 90.08,
+ "SCIDOCS": 23.11,
+ "SIQA": 3.44,
+ "SciFact": 77.77,
+ "SpartQA": 7.51,
+ "TRECCOVID": 79.56,
+ "TempReasonL1": 2.13,
+ "TempReasonL2Fact": 28.65,
+ "TempReasonL2Pure": 10.34,
+ "TempReasonL3Fact": 25.52,
+ "TempReasonL3Pure": 15.28,
+ "Touche2020": 23.35,
+ "WinoGrande": 29.11
+ },
+ {
+ "Model": "text-embedding-3-large",
+ "LEMBNeedleRetrieval": 29.25,
+ "LEMBPasskeyRetrieval": 63.0
}
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
+ ],
+ "recall_at_1": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "AlloprofRetrieval": 38.36,
- "BSARDRetrieval": 0.14,
- "MintakaRetrieval (fr)": 25.44,
- "SyntecRetrieval": 79.27,
- "XPQARetrieval (fr)": 58.87
+ "Model": "text-embedding-3-large",
+ "BrightRetrieval (earth_science)": 32.26,
+ "BrightRetrieval (sustainable_living)": 26.34,
+ "BrightRetrieval (economics)": 24.76,
+ "BrightRetrieval (stackoverflow)": 11.54,
+ "BrightRetrieval (pony)": 0.0,
+ "BrightRetrieval (biology)": 33.09,
+ "BrightRetrieval (robotics)": 11.88,
+ "BrightRetrieval (psychology)": 35.15
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "SICKFr": 79.23,
- "STS22 (fr)": 82.76,
- "STSBenchmarkMultilingualSTS (fr)": 81.84
+ "Model": "text-embedding-3-large",
+ "BIOSSES": 84.68,
+ "SICK-R": 79.0,
+ "STS12": 72.84,
+ "STS13": 86.1,
+ "STS14": 81.15,
+ "STS15": 88.49,
+ "STS16": 85.08,
+ "STS17 (en-en)": 90.22,
+ "STS22 (en)": 66.14,
+ "STSBenchmark": 83.56
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "Cohere-embed-multilingual-v3.0",
- "SummEvalFr": 31.26
+ "Model": "text-embedding-3-large",
+ "SummEval": 29.92
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "Cohere-embed-multilingual-v3.0"
+ "Model": "text-embedding-3-large",
+ "Core17InstructionRetrieval": -0.2,
+ "News21InstructionRetrieval": -2.03,
+ "Robust04InstructionRetrieval": -5.81
}
]
}
},
- "text2vec-large-chinese": {
+ "e5-mistral-7b-instruct": {
"BitextMining": {
- "f1": [
- {
- "Model": "text2vec-large-chinese"
- }
- ]
+ "f1": []
},
"Classification": {
"accuracy": [
{
- "Model": "text2vec-large-chinese",
- "AmazonReviewsClassification (zh)": 33.77,
- "IFlyTek": 41.54,
- "JDReview": 81.56,
- "MassiveIntentClassification (zh-CN)": 63.23,
- "MassiveScenarioClassification (zh-CN)": 68.45,
- "MultilingualSentiment": 58.97,
- "OnlineShopping": 83.51,
- "TNews": 38.92,
- "Waimai": 76.01
+ "Model": "e5-mistral-7b-instruct",
+ "AmazonReviewsClassification (fr)": 36.71,
+ "MTOPDomainClassification (fr)": 74.8,
+ "MTOPIntentClassification (fr)": 53.97,
+ "MasakhaNEWSClassification (fra)": 80.59,
+ "MassiveIntentClassification (fr)": 46.39,
+ "MassiveScenarioClassification (fr)": 53.86
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "text2vec-large-chinese",
- "CLSClusteringP2P": 30.13,
- "CLSClusteringS2S": 28.77,
- "ThuNewsClusteringP2P": 35.05,
- "ThuNewsClusteringS2S": 26.14
+ "Model": "e5-mistral-7b-instruct",
+ "AlloProfClusteringP2P": 61.06,
+ "AlloProfClusteringS2S": 28.12,
+ "HALClusteringS2S": 19.69,
+ "MLSUMClusteringP2P": 45.59,
+ "MLSUMClusteringS2S": 32.0,
+ "MasakhaNEWSClusteringP2P (fra)": 52.47,
+ "MasakhaNEWSClusteringS2S (fra)": 49.2
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cos_sim_ap": [
{
- "Model": "text2vec-large-chinese",
- "Cmnli": 77.67,
- "Ocnli": 64.04
+ "Model": "e5-mistral-7b-instruct",
+ "OpusparcusPC (fr)": 88.5,
+ "PawsXPairClassification (fr)": 63.65
}
]
},
"Reranking": {
"map": [
{
- "Model": "text2vec-large-chinese",
- "CMedQAv1": 58.92,
- "CMedQAv2": 60.41,
- "MMarcoReranking": 12.48,
- "T2Reranking": 64.82
+ "Model": "e5-mistral-7b-instruct",
+ "AlloprofReranking": 47.36,
+ "SyntecReranking": 77.05
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "text2vec-large-chinese",
- "CmedqaRetrieval": 15.53,
- "CovidRetrieval": 60.48,
- "DuRetrieval": 51.87,
- "EcomRetrieval": 37.58,
- "MMarcoRetrieval": 45.96,
- "MedicalRetrieval": 30.93,
- "T2Retrieval": 50.52,
- "VideoRetrieval": 42.65
+ "Model": "e5-mistral-7b-instruct",
+ "AILACasedocs": 38.76,
+ "AILAStatutes": 38.07,
+ "AlloprofRetrieval": 16.46,
+ "BSARDRetrieval": 0.0,
+ "BrightRetrieval (sustainable_living)": 18.51,
+ "BrightRetrieval (economics)": 15.49,
+ "BrightRetrieval (theoremqa_theorems)": 25.09,
+ "BrightRetrieval (aops)": 7.1,
+ "BrightRetrieval (theoremqa_questions)": 23.94,
+ "BrightRetrieval (stackoverflow)": 9.83,
+ "BrightRetrieval (psychology)": 15.79,
+ "BrightRetrieval (pony)": 4.81,
+ "BrightRetrieval (leetcode)": 28.72,
+ "BrightRetrieval (biology)": 18.84,
+ "BrightRetrieval (earth_science)": 25.96,
+ "BrightRetrieval (robotics)": 16.37,
+ "GerDaLIRSmall": 37.18,
+ "LEMBNarrativeQARetrieval": 44.62,
+ "LEMBQMSumRetrieval": 43.63,
+ "LEMBSummScreenFDRetrieval": 96.82,
+ "LEMBWikimQARetrieval": 82.11,
+ "LeCaRDv2": 68.56,
+ "LegalBenchConsumerContractsQA": 75.46,
+ "LegalBenchCorporateLobbying": 94.01,
+ "LegalQuAD": 59.64,
+ "LegalSummarization": 66.51,
+ "MintakaRetrieval (fr)": 3.57,
+ "SyntecRetrieval": 55.9,
+ "XPQARetrieval (fr)": 41.29
+ },
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "LEMBNeedleRetrieval": 48.25,
+ "LEMBPasskeyRetrieval": 71.0
+ }
+ ],
+ "recall_at_1": [
+ {
+ "Model": "e5-mistral-7b-instruct",
+ "BrightRetrieval (pony)": 1.14,
+ "BrightRetrieval (robotics)": 17.33,
+ "BrightRetrieval (economics)": 26.21,
+ "BrightRetrieval (biology)": 29.93,
+ "BrightRetrieval (earth_science)": 36.28,
+ "BrightRetrieval (psychology)": 46.73,
+ "BrightRetrieval (sustainable_living)": 32.21,
+ "BrightRetrieval (stackoverflow)": 14.53
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "text2vec-large-chinese",
- "AFQMC": 24.51,
- "ATEC": 32.45,
- "BQ": 44.22,
- "LCQMC": 69.16,
- "PAWSX": 14.55,
- "QBQTC": 29.51,
- "STS22 (zh)": 65.94,
- "STSB": 79.45
+ "Model": "e5-mistral-7b-instruct",
+ "SICKFr": 64.39,
+ "STS22 (fr)": 69.82,
+ "STSBenchmarkMultilingualSTS (fr)": 61.87
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "text2vec-large-chinese"
+ "Model": "e5-mistral-7b-instruct",
+ "SummEvalFr": 32.22
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "text2vec-large-chinese"
+ "Model": "e5-mistral-7b-instruct",
+ "Core17InstructionRetrieval": 0.09,
+ "News21InstructionRetrieval": -0.86,
+ "Robust04InstructionRetrieval": -9.59
}
]
}
},
- "dragon-plus": {
+ "gte-Qwen2-7B-instruct": {
"BitextMining": {
- "f1": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "v_measure": []
},
"PairClassification": {
- "ap": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "ap": []
},
"Reranking": {
- "map": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "dragon-plus",
- "ARCChallenge": 8.91,
- "AlphaNLI": 32.1,
- "HellaSwag": 27.69,
- "PIQA": 28.01,
- "Quail": 4.09,
- "RARbCode": 17.58,
- "RARbMath": 45.09,
- "SIQA": 2.0,
- "SpartQA": 10.34,
- "TempReasonL1": 1.82,
- "TempReasonL2Fact": 17.45,
- "TempReasonL2Pure": 0.55,
- "TempReasonL3Fact": 15.71,
- "TempReasonL3Pure": 7.97,
- "WinoGrande": 67.18
+ "Model": "gte-Qwen2-7B-instruct",
+ "BrightRetrieval (earth_science)": 40.66,
+ "BrightRetrieval (sustainable_living)": 20.82,
+ "BrightRetrieval (theoremqa_theorems)": 34.22,
+ "BrightRetrieval (aops)": 15.1,
+ "BrightRetrieval (economics)": 16.18,
+ "BrightRetrieval (pony)": 1.25,
+ "BrightRetrieval (stackoverflow)": 13.95,
+ "BrightRetrieval (leetcode)": 31.07,
+ "BrightRetrieval (biology)": 32.09,
+ "BrightRetrieval (theoremqa_questions)": 29.9,
+ "BrightRetrieval (robotics)": 12.82,
+ "BrightRetrieval (psychology)": 26.58
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "dragon-plus"
+ "Model": "gte-Qwen2-7B-instruct",
+ "BrightRetrieval (psychology)": 46.73,
+ "BrightRetrieval (biology)": 34.87,
+ "BrightRetrieval (sustainable_living)": 31.28,
+ "BrightRetrieval (robotics)": 10.89,
+ "BrightRetrieval (pony)": 1.17,
+ "BrightRetrieval (earth_science)": 38.36,
+ "BrightRetrieval (stackoverflow)": 16.67,
+ "BrightRetrieval (economics)": 27.67
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "dragon-plus"
- }
- ]
+ "p-MRR": []
}
},
- "bge-small-zh-v1.5": {
+ "all-mpnet-base-v2": {
"BitextMining": {
"f1": [
{
- "Model": "bge-small-zh-v1.5"
+ "Model": "all-mpnet-base-v2",
+ "BornholmBitextMining (dan-Latn)": 27.44,
+ "Tatoeba (pol-Latn_eng-Latn)": 4.09,
+ "Tatoeba (ita-Latn_eng-Latn)": 11.1,
+ "Tatoeba (cat-Latn_eng-Latn)": 9.44,
+ "Tatoeba (aze-Latn_eng-Latn)": 1.49,
+ "Tatoeba (eus-Latn_eng-Latn)": 3.94,
+ "Tatoeba (epo-Latn_eng-Latn)": 7.15,
+ "Tatoeba (lit-Latn_eng-Latn)": 1.02,
+ "Tatoeba (ast-Latn_eng-Latn)": 9.78,
+ "Tatoeba (bul-Cyrl_eng-Latn)": 0.35,
+ "Tatoeba (ceb-Latn_eng-Latn)": 4.41,
+ "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0,
+ "Tatoeba (tzl-Latn_eng-Latn)": 3.55,
+ "Tatoeba (zsm-Latn_eng-Latn)": 4.75,
+ "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17,
+ "Tatoeba (pam-Latn_eng-Latn)": 4.32,
+ "Tatoeba (amh-Ethi_eng-Latn)": 0.0,
+ "Tatoeba (slv-Latn_eng-Latn)": 3.73,
+ "Tatoeba (lvs-Latn_eng-Latn)": 2.98,
+ "Tatoeba (sqi-Latn_eng-Latn)": 3.45,
+ "Tatoeba (orv-Cyrl_eng-Latn)": 0.0,
+ "Tatoeba (vie-Latn_eng-Latn)": 4.96,
+ "Tatoeba (pes-Arab_eng-Latn)": 0.2,
+ "Tatoeba (por-Latn_eng-Latn)": 10.48,
+ "Tatoeba (dtp-Latn_eng-Latn)": 3.54,
+ "Tatoeba (yid-Hebr_eng-Latn)": 0.08,
+ "Tatoeba (isl-Latn_eng-Latn)": 3.86,
+ "Tatoeba (cha-Latn_eng-Latn)": 12.2,
+ "Tatoeba (ron-Latn_eng-Latn)": 7.34,
+ "Tatoeba (hye-Armn_eng-Latn)": 0.14,
+ "Tatoeba (mar-Deva_eng-Latn)": 0.11,
+ "Tatoeba (hin-Deva_eng-Latn)": 0.02,
+ "Tatoeba (kor-Hang_eng-Latn)": 0.32,
+ "Tatoeba (srp-Cyrl_eng-Latn)": 1.89,
+ "Tatoeba (csb-Latn_eng-Latn)": 4.19,
+ "Tatoeba (jpn-Jpan_eng-Latn)": 1.71,
+ "Tatoeba (ber-Tfng_eng-Latn)": 4.56,
+ "Tatoeba (wuu-Hans_eng-Latn)": 0.91,
+ "Tatoeba (jav-Latn_eng-Latn)": 3.17,
+ "Tatoeba (nob-Latn_eng-Latn)": 4.37,
+ "Tatoeba (bre-Latn_eng-Latn)": 3.65,
+ "Tatoeba (kzj-Latn_eng-Latn)": 3.62,
+ "Tatoeba (urd-Arab_eng-Latn)": 0.0,
+ "Tatoeba (ces-Latn_eng-Latn)": 3.56,
+ "Tatoeba (cbk-Latn_eng-Latn)": 9.33,
+ "Tatoeba (gla-Latn_eng-Latn)": 2.04,
+ "Tatoeba (war-Latn_eng-Latn)": 5.14,
+ "Tatoeba (swh-Latn_eng-Latn)": 6.01,
+ "Tatoeba (swg-Latn_eng-Latn)": 7.86,
+ "Tatoeba (glg-Latn_eng-Latn)": 12.0,
+ "Tatoeba (fao-Latn_eng-Latn)": 7.08,
+ "Tatoeba (gsw-Latn_eng-Latn)": 10.67,
+ "Tatoeba (rus-Cyrl_eng-Latn)": 0.14,
+ "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52,
+ "Tatoeba (gle-Latn_eng-Latn)": 2.19,
+ "Tatoeba (slk-Latn_eng-Latn)": 3.4,
+ "Tatoeba (nno-Latn_eng-Latn)": 5.75,
+ "Tatoeba (cor-Latn_eng-Latn)": 2.42,
+ "Tatoeba (nov-Latn_eng-Latn)": 16.61,
+ "Tatoeba (swe-Latn_eng-Latn)": 6.55,
+ "Tatoeba (max-Deva_eng-Latn)": 6.46,
+ "Tatoeba (oci-Latn_eng-Latn)": 8.57,
+ "Tatoeba (lfn-Latn_eng-Latn)": 6.1,
+ "Tatoeba (fra-Latn_eng-Latn)": 16.9,
+ "Tatoeba (ben-Beng_eng-Latn)": 0.0,
+ "Tatoeba (bel-Cyrl_eng-Latn)": 0.65,
+ "Tatoeba (lat-Latn_eng-Latn)": 5.78,
+ "Tatoeba (cmn-Hans_eng-Latn)": 2.22,
+ "Tatoeba (kat-Geor_eng-Latn)": 0.43,
+ "Tatoeba (bos-Latn_eng-Latn)": 4.6,
+ "Tatoeba (xho-Latn_eng-Latn)": 3.3,
+ "Tatoeba (tha-Thai_eng-Latn)": 0.0,
+ "Tatoeba (cym-Latn_eng-Latn)": 4.88,
+ "Tatoeba (deu-Latn_eng-Latn)": 11.46,
+ "Tatoeba (awa-Deva_eng-Latn)": 0.44,
+ "Tatoeba (ido-Latn_eng-Latn)": 9.84,
+ "Tatoeba (tat-Cyrl_eng-Latn)": 0.24,
+ "Tatoeba (kab-Latn_eng-Latn)": 1.31,
+ "Tatoeba (uzb-Latn_eng-Latn)": 1.98,
+ "Tatoeba (heb-Hebr_eng-Latn)": 0.28,
+ "Tatoeba (ara-Arab_eng-Latn)": 0.1,
+ "Tatoeba (fry-Latn_eng-Latn)": 12.43,
+ "Tatoeba (afr-Latn_eng-Latn)": 6.08,
+ "Tatoeba (kur-Latn_eng-Latn)": 3.65,
+ "Tatoeba (pms-Latn_eng-Latn)": 7.63,
+ "Tatoeba (ell-Grek_eng-Latn)": 0.0,
+ "Tatoeba (spa-Latn_eng-Latn)": 10.12,
+ "Tatoeba (dsb-Latn_eng-Latn)": 2.96,
+ "Tatoeba (uig-Arab_eng-Latn)": 0.33,
+ "Tatoeba (nld-Latn_eng-Latn)": 9.29,
+ "Tatoeba (tel-Telu_eng-Latn)": 0.73,
+ "Tatoeba (hrv-Latn_eng-Latn)": 3.77,
+ "Tatoeba (nds-Latn_eng-Latn)": 10.96,
+ "Tatoeba (hun-Latn_eng-Latn)": 3.23,
+ "Tatoeba (est-Latn_eng-Latn)": 2.35,
+ "Tatoeba (mal-Mlym_eng-Latn)": 0.15,
+ "Tatoeba (khm-Khmr_eng-Latn)": 0.28,
+ "Tatoeba (hsb-Latn_eng-Latn)": 3.12,
+ "Tatoeba (tgl-Latn_eng-Latn)": 4.06,
+ "Tatoeba (ang-Latn_eng-Latn)": 9.77,
+ "Tatoeba (tur-Latn_eng-Latn)": 3.16,
+ "Tatoeba (tuk-Latn_eng-Latn)": 2.23,
+ "Tatoeba (ile-Latn_eng-Latn)": 17.84,
+ "Tatoeba (mon-Cyrl_eng-Latn)": 0.81,
+ "Tatoeba (yue-Hant_eng-Latn)": 1.16,
+ "Tatoeba (ina-Latn_eng-Latn)": 22.55,
+ "Tatoeba (tam-Taml_eng-Latn)": 0.73,
+ "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5,
+ "Tatoeba (dan-Latn_eng-Latn)": 10.01,
+ "Tatoeba (arq-Arab_eng-Latn)": 0.33,
+ "Tatoeba (arz-Arab_eng-Latn)": 0.0,
+ "Tatoeba (fin-Latn_eng-Latn)": 3.82,
+ "Tatoeba (ind-Latn_eng-Latn)": 4.88
}
]
},
"Classification": {
"accuracy": [
{
- "Model": "bge-small-zh-v1.5",
- "AmazonReviewsClassification (zh)": 35.91,
- "IFlyTek": 45.49,
- "JDReview": 80.04,
- "MassiveIntentClassification (zh-CN)": 63.95,
- "MassiveScenarioClassification (zh-CN)": 70.8,
- "MultilingualSentiment": 63.06,
- "OnlineShopping": 85.05,
- "TNews": 48.15,
- "Waimai": 83.18
+ "Model": "all-mpnet-base-v2",
+ "AllegroReviews (pol-Latn)": 22.99,
+ "AmazonCounterfactualClassification (en-ext)": 67.5,
+ "AmazonCounterfactualClassification (en)": 65.03,
+ "AmazonCounterfactualClassification (deu-Latn)": 55.66,
+ "AmazonCounterfactualClassification (jpn-Jpan)": 60.69,
+ "AmazonPolarityClassification": 67.14,
+ "AmazonReviewsClassification (en)": 31.44,
+ "AmazonReviewsClassification (deu-Latn)": 26.05,
+ "AmazonReviewsClassification (spa-Latn)": 27.73,
+ "AmazonReviewsClassification (fra-Latn)": 28.49,
+ "AmazonReviewsClassification (jpn-Jpan)": 23.65,
+ "AmazonReviewsClassification (cmn-Hans)": 23.62,
+ "AngryTweetsClassification (dan-Latn)": 44.13,
+ "Banking77Classification": 81.7,
+ "CBD (pol-Latn)": 50.25,
+ "DanishPoliticalCommentsClassification (dan-Latn)": 28.31,
+ "EmotionClassification": 42.22,
+ "GeoreviewClassification (rus-Cyrl)": 25.93,
+ "HeadlineClassification (rus-Cyrl)": 28.53,
+ "IFlyTek (cmn-Hans)": 17.18,
+ "ImdbClassification": 71.17,
+ "InappropriatenessClassification (rus-Cyrl)": 51.82,
+ "JDReview (cmn-Hans)": 60.19,
+ "KinopoiskClassification (rus-Cyrl)": 34.18,
+ "LccSentimentClassification (dan-Latn)": 39.27,
+ "MTOPDomainClassification (en)": 91.89,
+ "MTOPDomainClassification (deu-Latn)": 71.86,
+ "MTOPDomainClassification (spa-Latn)": 71.3,
+ "MTOPDomainClassification (fra-Latn)": 74.88,
+ "MTOPDomainClassification (hin-Deva)": 39.93,
+ "MTOPDomainClassification (tha-Thai)": 17.54,
+ "MTOPIntentClassification (en)": 68.27,
+ "MTOPIntentClassification (deu-Latn)": 44.36,
+ "MTOPIntentClassification (spa-Latn)": 39.48,
+ "MTOPIntentClassification (fra-Latn)": 37.57,
+ "MTOPIntentClassification (hin-Deva)": 18.63,
+ "MTOPIntentClassification (tha-Thai)": 5.39,
+ "MasakhaNEWSClassification (amh-Ethi)": 36.49,
+ "MasakhaNEWSClassification (eng)": 79.75,
+ "MasakhaNEWSClassification (fra-Latn)": 77.77,
+ "MasakhaNEWSClassification (hau-Latn)": 59.22,
+ "MasakhaNEWSClassification (ibo-Latn)": 61.64,
+ "MasakhaNEWSClassification (lin-Latn)": 74.0,
+ "MasakhaNEWSClassification (lug-Latn)": 58.43,
+ "MasakhaNEWSClassification (orm-Ethi)": 48.15,
+ "MasakhaNEWSClassification (pcm-Latn)": 92.2,
+ "MasakhaNEWSClassification (run-Latn)": 64.72,
+ "MasakhaNEWSClassification (sna-Latn)": 73.69,
+ "MasakhaNEWSClassification (som-Latn)": 49.97,
+ "MasakhaNEWSClassification (swa-Latn)": 55.15,
+ "MasakhaNEWSClassification (tir-Ethi)": 27.46,
+ "MasakhaNEWSClassification (xho-Latn)": 60.98,
+ "MasakhaNEWSClassification (yor-Latn)": 63.33,
+ "MassiveIntentClassification (en)": 69.76,
+ "MassiveIntentClassification (ara-Arab)": 20.42,
+ "MassiveIntentClassification (isl-Latn)": 31.46,
+ "MassiveIntentClassification (rus-Cyrl)": 23.98,
+ "MassiveIntentClassification (hun-Latn)": 34.38,
+ "MassiveIntentClassification (pol-Latn)": 34.26,
+ "MassiveIntentClassification (lav-Latn)": 35.08,
+ "MassiveIntentClassification (msa-Latn)": 30.53,
+ "MassiveIntentClassification (ind-Latn)": 36.31,
+ "MassiveIntentClassification (kan-Knda)": 3.76,
+ "MassiveIntentClassification (tam-Taml)": 9.25,
+ "MassiveIntentClassification (ron-Latn)": 38.07,
+ "MassiveIntentClassification (por-Latn)": 42.83,
+ "MassiveIntentClassification (jpn-Jpan)": 33.13,
+ "MassiveIntentClassification (tgl-Latn)": 36.33,
+ "MassiveIntentClassification (amh-Ethi)": 2.4,
+ "MassiveIntentClassification (fin-Latn)": 34.58,
+ "MassiveIntentClassification (hye-Armn)": 10.11,
+ "MassiveIntentClassification (nld-Latn)": 38.49,
+ "MassiveIntentClassification (tur-Latn)": 32.02,
+ "MassiveIntentClassification (urd-Arab)": 12.86,
+ "MassiveIntentClassification (cym-Latn)": 30.82,
+ "MassiveIntentClassification (fra-Latn)": 44.27,
+ "MassiveIntentClassification (aze-Latn)": 28.92,
+ "MassiveIntentClassification (ben-Beng)": 12.35,
+ "MassiveIntentClassification (mon-Cyrl)": 19.65,
+ "MassiveIntentClassification (ita-Latn)": 40.29,
+ "MassiveIntentClassification (tel-Telu)": 2.26,
+ "MassiveIntentClassification (kat-Geor)": 7.66,
+ "MassiveIntentClassification (hin-Deva)": 17.68,
+ "MassiveIntentClassification (fas-Arab)": 22.45,
+ "MassiveIntentClassification (swe-Latn)": 39.02,
+ "MassiveIntentClassification (heb-Hebr)": 23.6,
+ "MassiveIntentClassification (sqi-Latn)": 37.26,
+ "MassiveIntentClassification (mal-Mlym)": 2.62,
+ "MassiveIntentClassification (vie-Latn)": 31.47,
+ "MassiveIntentClassification (mya-Mymr)": 4.6,
+ "MassiveIntentClassification (jav-Latn)": 31.75,
+ "MassiveIntentClassification (cmo-Hans)": 24.36,
+ "MassiveIntentClassification (swa-Latn)": 31.82,
+ "MassiveIntentClassification (nob-Latn)": 39.3,
+ "MassiveIntentClassification (cmo-Hant)": 22.43,
+ "MassiveIntentClassification (ell-Grek)": 24.52,
+ "MassiveIntentClassification (deu-Latn)": 44.54,
+ "MassiveIntentClassification (tha-Thai)": 8.51,
+ "MassiveIntentClassification (dan-Latn)": 42.36,
+ "MassiveIntentClassification (afr-Latn)": 36.49,
+ "MassiveIntentClassification (spa-Latn)": 39.75,
+ "MassiveIntentClassification (kor-Kore)": 13.35,
+ "MassiveIntentClassification (slv-Latn)": 34.49,
+ "MassiveIntentClassification (khm-Khmr)": 4.76,
+ "MassiveScenarioClassification (en)": 75.67,
+ "MassiveScenarioClassification (kor-Kore)": 17.28,
+ "MassiveScenarioClassification (swe-Latn)": 44.53,
+ "MassiveScenarioClassification (hye-Armn)": 16.86,
+ "MassiveScenarioClassification (nob-Latn)": 45.75,
+ "MassiveScenarioClassification (pol-Latn)": 42.66,
+ "MassiveScenarioClassification (ind-Latn)": 43.05,
+ "MassiveScenarioClassification (ita-Latn)": 51.37,
+ "MassiveScenarioClassification (tgl-Latn)": 47.04,
+ "MassiveScenarioClassification (jav-Latn)": 40.0,
+ "MassiveScenarioClassification (lav-Latn)": 39.28,
+ "MassiveScenarioClassification (mya-Mymr)": 10.8,
+ "MassiveScenarioClassification (por-Latn)": 52.06,
+ "MassiveScenarioClassification (tel-Telu)": 7.81,
+ "MassiveScenarioClassification (deu-Latn)": 54.09,
+ "MassiveScenarioClassification (fas-Arab)": 27.8,
+ "MassiveScenarioClassification (hin-Deva)": 23.13,
+ "MassiveScenarioClassification (hun-Latn)": 41.01,
+ "MassiveScenarioClassification (vie-Latn)": 35.9,
+ "MassiveScenarioClassification (fra-Latn)": 54.26,
+ "MassiveScenarioClassification (jpn-Jpan)": 40.57,
+ "MassiveScenarioClassification (tha-Thai)": 17.01,
+ "MassiveScenarioClassification (swa-Latn)": 40.34,
+ "MassiveScenarioClassification (ell-Grek)": 33.85,
+ "MassiveScenarioClassification (aze-Latn)": 36.42,
+ "MassiveScenarioClassification (heb-Hebr)": 25.49,
+ "MassiveScenarioClassification (kat-Geor)": 13.45,
+ "MassiveScenarioClassification (afr-Latn)": 43.63,
+ "MassiveScenarioClassification (ben-Beng)": 17.49,
+ "MassiveScenarioClassification (cym-Latn)": 34.82,
+ "MassiveScenarioClassification (mon-Cyrl)": 25.58,
+ "MassiveScenarioClassification (tur-Latn)": 39.11,
+ "MassiveScenarioClassification (tam-Taml)": 14.55,
+ "MassiveScenarioClassification (ara-Arab)": 27.8,
+ "MassiveScenarioClassification (msa-Latn)": 37.28,
+ "MassiveScenarioClassification (cmo-Hant)": 31.7,
+ "MassiveScenarioClassification (dan-Latn)": 49.45,
+ "MassiveScenarioClassification (kan-Knda)": 8.34,
+ "MassiveScenarioClassification (urd-Arab)": 20.0,
+ "MassiveScenarioClassification (cmo-Hans)": 35.33,
+ "MassiveScenarioClassification (amh-Ethi)": 7.43,
+ "MassiveScenarioClassification (ron-Latn)": 47.86,
+ "MassiveScenarioClassification (fin-Latn)": 38.41,
+ "MassiveScenarioClassification (isl-Latn)": 39.36,
+ "MassiveScenarioClassification (sqi-Latn)": 44.67,
+ "MassiveScenarioClassification (spa-Latn)": 50.92,
+ "MassiveScenarioClassification (mal-Mlym)": 7.69,
+ "MassiveScenarioClassification (slv-Latn)": 39.88,
+ "MassiveScenarioClassification (nld-Latn)": 47.79,
+ "MassiveScenarioClassification (khm-Khmr)": 9.63,
+ "MassiveScenarioClassification (rus-Cyrl)": 28.71,
+ "MultilingualSentiment (cmn-Hans)": 41.2,
+ "NoRecClassification (nob-Latn)": 38.34,
+ "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15,
+ "OnlineShopping (cmn-Hans)": 56.94,
+ "PAC (pol-Latn)": 62.1,
+ "PolEmo2.0-IN (pol-Latn)": 41.63,
+ "PolEmo2.0-OUT (pol-Latn)": 25.0,
+ "RuReviewsClassification (rus-Cyrl)": 42.33,
+ "RuSciBenchGRNTIClassification (rus-Cyrl)": 13.29,
+ "RuSciBenchOECDClassification (rus-Cyrl)": 10.62,
+ "TNews (cmn-Hans)": 21.05,
+ "ToxicConversationsClassification": 61.05,
+ "TweetSentimentExtractionClassification": 55.05,
+ "Waimai (cmn-Hans)": 63.31
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "bge-small-zh-v1.5",
- "CLSClusteringP2P": 38.14,
- "CLSClusteringS2S": 35.14,
- "ThuNewsClusteringP2P": 54.22,
- "ThuNewsClusteringS2S": 49.22
+ "Model": "all-mpnet-base-v2",
+ "ArxivClusteringP2P": 48.38,
+ "ArxivClusteringS2S": 39.72,
+ "BiorxivClusteringP2P": 39.62,
+ "BiorxivClusteringS2S": 35.02,
+ "GeoreviewClusteringP2P (rus-Cyrl)": 20.33,
+ "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49,
+ "MasakhaNEWSClusteringP2P (eng)": 67.24,
+ "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99,
+ "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17,
+ "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0,
+ "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68,
+ "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96,
+ "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42,
+ "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01,
+ "MasakhaNEWSClusteringP2P (run-Latn)": 57.6,
+ "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99,
+ "MasakhaNEWSClusteringP2P (som-Latn)": 31.16,
+ "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29,
+ "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85,
+ "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24,
+ "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15,
+ "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48,
+ "MasakhaNEWSClusteringS2S (eng)": 35.69,
+ "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05,
+ "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64,
+ "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63,
+ "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72,
+ "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97,
+ "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85,
+ "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7,
+ "MasakhaNEWSClusteringS2S (run-Latn)": 52.27,
+ "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64,
+ "MasakhaNEWSClusteringS2S (som-Latn)": 30.94,
+ "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12,
+ "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01,
+ "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16,
+ "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04,
+ "MedrxivClusteringP2P": 35.58,
+ "MedrxivClusteringS2S": 32.87,
+ "RedditClustering": 54.82,
+ "RedditClusteringP2P": 56.77,
+ "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 14.66,
+ "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 12.49,
+ "StackExchangeClustering": 53.8,
+ "StackExchangeClusteringP2P": 34.28,
+ "TwentyNewsgroupsClustering": 49.74
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cosine_ap": [
{
- "Model": "bge-small-zh-v1.5",
- "Cmnli": 76.24,
- "Ocnli": 64.57
+ "Model": "all-mpnet-base-v2",
+ "CDSC-E (pol-Latn)": 45.37,
+ "OpusparcusPC (deu-Latn)": 89.78,
+ "OpusparcusPC (en)": 97.75,
+ "OpusparcusPC (fin-Latn)": 85.82,
+ "OpusparcusPC (fra-Latn)": 86.61,
+ "OpusparcusPC (rus-Cyrl)": 79.85,
+ "OpusparcusPC (swe-Latn)": 81.81,
+ "PSC (pol-Latn)": 83.28,
+ "PawsXPairClassification (deu-Latn)": 52.17,
+ "PawsXPairClassification (en)": 61.99,
+ "PawsXPairClassification (spa-Latn)": 55.06,
+ "PawsXPairClassification (fra-Latn)": 56.42,
+ "PawsXPairClassification (jpn-Hira)": 47.43,
+ "PawsXPairClassification (kor-Hang)": 49.75,
+ "PawsXPairClassification (cmn-Hans)": 52.47,
+ "SICK-E-PL (pol-Latn)": 46.51,
+ "SprintDuplicateQuestions": 90.15,
+ "TERRa (rus-Cyrl)": 44.52,
+ "TwitterSemEval2015": 73.85,
+ "TwitterURLCorpus": 85.11
}
]
},
"Reranking": {
"map": [
{
- "Model": "bge-small-zh-v1.5",
- "CMedQAv1": 77.4,
- "CMedQAv2": 79.86,
- "MMarcoReranking": 20.5,
- "T2Reranking": 65.9
+ "Model": "all-mpnet-base-v2",
+ "AlloprofReranking (fra-Latn)": 69.63,
+ "AskUbuntuDupQuestions": 65.85,
+ "MMarcoReranking (cmn-Hans)": 4.65,
+ "MindSmallReranking": 30.97,
+ "RuBQReranking (rus-Cyrl)": 30.96,
+ "SciDocsRR": 88.65,
+ "StackOverflowDupQuestions": 51.98,
+ "SyntecReranking (fra-Latn)": 66.12,
+ "T2Reranking (cmn-Hans)": 58.3
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bge-small-zh-v1.5",
- "CmedqaRetrieval": 35.11,
- "CovidRetrieval": 70.14,
- "DuRetrieval": 77.28,
- "EcomRetrieval": 55.71,
- "MMarcoRetrieval": 63.48,
- "MedicalRetrieval": 49.8,
- "T2Retrieval": 76.43,
- "VideoRetrieval": 66.19
- }
- ]
- },
- "STS": {
- "spearman": [
- {
- "Model": "bge-small-zh-v1.5",
- "AFQMC": 33.42,
- "ATEC": 43.01,
- "BQ": 55.22,
- "LCQMC": 72.19,
- "PAWSX": 9.26,
- "QBQTC": 35.29,
- "STS22 (zh)": 67.72,
- "STSB": 76.73
- }
- ]
- },
- "Summarization": {
- "spearman": [
- {
- "Model": "bge-small-zh-v1.5"
- }
- ]
- },
- "InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "bge-small-zh-v1.5"
- }
- ]
- }
- },
- "text-embedding-3-large": {
- "BitextMining": {
- "f1": [
- {
- "Model": "text-embedding-3-large"
- }
- ]
- },
- "Classification": {
- "accuracy": [
- {
- "Model": "text-embedding-3-large",
- "AmazonCounterfactualClassification (en)": 78.93,
- "AmazonPolarityClassification": 92.85,
- "AmazonReviewsClassification (en)": 48.7,
- "Banking77Classification": 85.69,
- "EmotionClassification": 51.58,
- "ImdbClassification": 87.67,
- "MTOPDomainClassification (en)": 95.36,
- "MTOPIntentClassification (en)": 75.07,
- "MassiveIntentClassification (en)": 74.64,
- "MassiveScenarioClassification (en)": 79.79,
- "ToxicConversationsClassification": 72.92,
- "TweetSentimentExtractionClassification": 62.22
- }
- ]
- },
- "Clustering": {
- "v_measure": [
- {
- "Model": "text-embedding-3-large",
- "ArxivClusteringP2P": 49.01,
- "ArxivClusteringS2S": 44.45,
- "BiorxivClusteringP2P": 38.03,
- "BiorxivClusteringS2S": 36.53,
- "MedrxivClusteringP2P": 32.7,
- "MedrxivClusteringS2S": 31.27,
- "RedditClustering": 67.84,
- "RedditClusteringP2P": 67.96,
- "StackExchangeClustering": 76.26,
- "StackExchangeClusteringP2P": 36.88,
- "TwentyNewsgroupsClustering": 58.14
- }
- ]
- },
- "PairClassification": {
- "ap": [
- {
- "Model": "text-embedding-3-large",
- "SprintDuplicateQuestions": 92.25,
- "TwitterSemEval2015": 77.13,
- "TwitterURLCorpus": 87.78
- }
- ]
- },
- "Reranking": {
- "map": [
- {
- "Model": "text-embedding-3-large",
- "AskUbuntuDupQuestions": 65.03,
- "MindSmallReranking": 29.86,
- "SciDocsRR": 86.66,
- "StackOverflowDupQuestions": 55.08
- }
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
+ "Model": "all-mpnet-base-v2",
+ "AILACasedocs": 22.51,
+ "AILAStatutes": 21.27,
+ "ARCChallenge": 11.8,
+ "AlloprofRetrieval (fra-Latn)": 34.27,
+ "AlphaNLI": 22.41,
+ "AppsRetrieval (eng-Latn_python-Code)": 8.41,
+ "ArguAna": 46.52,
+ "ArguAna-PL (pol-Latn)": 14.72,
+ "BSARDRetrieval (fra-Latn)": 6.98,
+ "CQADupstackRetrieval": 44.96,
+ "ClimateFEVER": 21.97,
+ "CmedqaRetrieval (cmn-Hans)": 2.0,
+ "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 37.72,
+ "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 69.32,
+ "CodeSearchNetCCRetrieval (python-Code)": 71.83,
+ "CodeSearchNetCCRetrieval (javascript-Code)": 67.99,
+ "CodeSearchNetCCRetrieval (go-Code)": 61.44,
+ "CodeSearchNetCCRetrieval (ruby-Code)": 66.42,
+ "CodeSearchNetCCRetrieval (java-Code)": 68.88,
+ "CodeSearchNetCCRetrieval (php-Code)": 59.22,
+ "CodeSearchNetRetrieval (python-Code)": 81.01,
+ "CodeSearchNetRetrieval (javascript-Code)": 65.58,
+ "CodeSearchNetRetrieval (go-Code)": 88.25,
+ "CodeSearchNetRetrieval (ruby-Code)": 74.53,
+ "CodeSearchNetRetrieval (java-Code)": 65.11,
+ "CodeSearchNetRetrieval (php-Code)": 74.47,
+ "CodeTransOceanContest (python-Code_c++-Code)": 70.58,
+ "CodeTransOceanDL": 29.45,
+ "CosQA (eng-Latn_python-Code)": 33.71,
+ "CovidRetrieval (cmn-Hans)": 3.7,
+ "DBPedia": 32.09,
+ "DuRetrieval (cmn-Hans)": 4.92,
+ "EcomRetrieval (cmn-Hans)": 3.94,
+ "FEVER": 50.86,
+ "FiQA-PL (pol-Latn)": 3.6,
+ "FiQA2018": 49.96,
+ "GerDaLIRSmall (deu-Latn)": 3.78,
+ "HellaSwag": 26.27,
+ "HotpotQA": 39.29,
+ "LEMBNarrativeQARetrieval": 19.34,
+ "LEMBQMSumRetrieval": 21.54,
+ "LEMBSummScreenFDRetrieval": 60.43,
+ "LEMBWikimQARetrieval": 44.92,
+ "LeCaRDv2 (zho-Hans)": 18.09,
+ "LegalBenchConsumerContractsQA": 75.25,
+ "LegalBenchCorporateLobbying": 89.04,
+ "LegalQuAD (deu-Latn)": 10.67,
+ "LegalSummarization": 58.55,
+ "MMarcoRetrieval (cmn-Hans)": 7.13,
+ "MSMARCO": 39.75,
+ "MedicalRetrieval (cmn-Hans)": 1.71,
+ "MintakaRetrieval (ara-Arab)": 1.97,
+ "MintakaRetrieval (deu-Latn)": 17.21,
+ "MintakaRetrieval (spa-Latn)": 10.11,
+ "MintakaRetrieval (fra-Latn)": 12.93,
+ "MintakaRetrieval (hin-Deva)": 2.03,
+ "MintakaRetrieval (ita-Latn)": 5.63,
+ "MintakaRetrieval (jpn-Hira)": 6.77,
+ "MintakaRetrieval (por-Latn)": 8.05,
+ "NFCorpus": 33.29,
+ "NFCorpus-PL (pol-Latn)": 8.77,
+ "NQ": 50.45,
+ "PIQA": 29.03,
+ "Quail": 3.41,
+ "QuoraRetrieval": 87.46,
+ "RARbCode": 53.21,
+ "RARbMath": 71.85,
+ "RuBQRetrieval (rus-Cyrl)": 4.75,
+ "SCIDOCS": 23.76,
+ "SCIDOCS-PL (pol-Latn)": 4.02,
+ "SIQA": 2.38,
+ "SciFact": 65.57,
+ "SciFact-PL (pol-Latn)": 13.31,
+ "SpartQA": 0.22,
+ "StackOverflowQA": 90.32,
+ "SyntecRetrieval (fra-Latn)": 57.39,
+ "SyntheticText2SQL (eng-Latn_sql-Code)": 45.09,
+ "T2Retrieval (cmn-Hans)": 2.98,
+ "TRECCOVID": 51.33,
+ "TRECCOVID-PL (pol-Latn)": 12.11,
+ "TempReasonL1": 1.77,
+ "TempReasonL2Fact": 11.2,
+ "TempReasonL2Pure": 1.15,
+ "TempReasonL3Fact": 9.42,
+ "TempReasonL3Pure": 5.59,
+ "Touche2020": 19.93,
+ "VideoRetrieval (cmn-Hans)": 8.48,
+ "WinoGrande": 20.77,
+ "XPQARetrieval (ara-Arab_ara-Arab)": 9.42,
+ "XPQARetrieval (eng-Latn_ara-Arab)": 2.36,
+ "XPQARetrieval (ara-Arab_eng-Latn)": 8.98,
+ "XPQARetrieval (deu-Latn_deu-Latn)": 55.82,
+ "XPQARetrieval (eng-Latn_deu-Latn)": 11.74,
+ "XPQARetrieval (deu-Latn_eng-Latn)": 30.44,
+ "XPQARetrieval (spa-Latn_spa-Latn)": 40.01,
+ "XPQARetrieval (eng-Latn_spa-Latn)": 6.12,
+ "XPQARetrieval (spa-Latn_eng-Latn)": 29.44,
+ "XPQARetrieval (fra-Latn_fra-Latn)": 51.94,
+ "XPQARetrieval (eng-Latn_fra-Latn)": 11.48,
+ "XPQARetrieval (fra-Latn_eng-Latn)": 32.52,
+ "XPQARetrieval (hin-Deva_hin-Deva)": 37.45,
+ "XPQARetrieval (eng-Latn_hin-Deva)": 5.11,
+ "XPQARetrieval (hin-Deva_eng-Latn)": 7.37,
+ "XPQARetrieval (ita-Latn_ita-Latn)": 54.2,
+ "XPQARetrieval (eng-Latn_ita-Latn)": 6.08,
+ "XPQARetrieval (ita-Latn_eng-Latn)": 30.32,
+ "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.46,
+ "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79,
+ "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77,
+ "XPQARetrieval (kor-Hang_kor-Hang)": 10.39,
+ "XPQARetrieval (eng-Latn_kor-Hang)": 7.09,
+ "XPQARetrieval (kor-Hang_eng-Latn)": 6.96,
+ "XPQARetrieval (pol-Latn_pol-Latn)": 23.71,
+ "XPQARetrieval (eng-Latn_pol-Latn)": 8.83,
+ "XPQARetrieval (pol-Latn_eng-Latn)": 15.94,
+ "XPQARetrieval (por-Latn_por-Latn)": 33.56,
+ "XPQARetrieval (eng-Latn_por-Latn)": 3.76,
+ "XPQARetrieval (por-Latn_eng-Latn)": 23.45,
+ "XPQARetrieval (tam-Taml_tam-Taml)": 5.5,
+ "XPQARetrieval (eng-Latn_tam-Taml)": 3.3,
+ "XPQARetrieval (tam-Taml_eng-Latn)": 4.18,
+ "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.8,
+ "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2,
+ "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84
+ },
{
- "Model": "text-embedding-3-large",
- "AILACasedocs": 39.0,
- "AILAStatutes": 41.31,
- "ARCChallenge": 23.98,
- "AlphaNLI": 37.27,
- "ArguAna": 58.05,
- "BrightRetrieval (theoremqa_questions)": 22.22,
- "BrightRetrieval (leetcode)": 23.65,
- "BrightRetrieval (earth_science)": 26.27,
- "BrightRetrieval (psychology)": 27.52,
- "BrightRetrieval (robotics)": 12.93,
- "BrightRetrieval (economics)": 19.98,
- "BrightRetrieval (stackoverflow)": 12.49,
- "BrightRetrieval (biology)": 23.67,
- "BrightRetrieval (theoremqa_theorems)": 9.25,
- "BrightRetrieval (pony)": 2.45,
- "BrightRetrieval (sustainable_living)": 20.32,
- "BrightRetrieval (aops)": 8.45,
- "CQADupstackRetrieval": 47.54,
- "ClimateFEVER": 30.27,
- "DBPedia": 44.76,
- "FEVER": 87.94,
- "FiQA2018": 55.0,
- "GerDaLIRSmall": 32.77,
- "HellaSwag": 34.12,
- "HotpotQA": 71.58,
- "LEMBNarrativeQARetrieval": 44.09,
- "LEMBNeedleRetrieval": 29.25,
- "LEMBPasskeyRetrieval": 63.0,
- "LEMBQMSumRetrieval": 32.49,
- "LEMBSummScreenFDRetrieval": 84.8,
- "LEMBWikimQARetrieval": 54.16,
- "LeCaRDv2": 57.2,
- "LegalBenchConsumerContractsQA": 79.39,
- "LegalBenchCorporateLobbying": 95.09,
- "LegalQuAD": 57.47,
- "LegalSummarization": 71.55,
- "MSMARCO": 40.24,
- "NFCorpus": 42.07,
- "NQ": 61.27,
- "PIQA": 41.96,
- "Quail": 10.15,
- "QuoraRetrieval": 89.05,
- "RARbCode": 89.64,
- "RARbMath": 90.08,
- "SCIDOCS": 23.11,
- "SIQA": 3.44,
- "SciFact": 77.77,
- "SpartQA": 7.51,
- "TRECCOVID": 79.56,
- "TempReasonL1": 2.13,
- "TempReasonL2Fact": 28.65,
- "TempReasonL2Pure": 10.34,
- "TempReasonL3Fact": 25.52,
- "TempReasonL3Pure": 15.28,
- "Touche2020": 23.35,
- "WinoGrande": 29.11
+ "Model": "all-mpnet-base-v2",
+ "LEMBNeedleRetrieval": 16.0,
+ "LEMBPasskeyRetrieval": 24.5
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cosine_spearman": [
{
- "Model": "text-embedding-3-large",
- "BIOSSES": 84.68,
- "SICK-R": 79.0,
- "STS12": 72.84,
- "STS13": 86.1,
- "STS14": 81.15,
- "STS15": 88.49,
- "STS16": 85.08,
- "STS17 (en-en)": 90.22,
- "STS22 (en)": 66.14,
- "STSBenchmark": 83.56
+ "Model": "all-mpnet-base-v2",
+ "AFQMC (cmn-Hans)": 8.01,
+ "ATEC (cmn-Hans)": 14.03,
+ "BIOSSES": 80.43,
+ "BQ (cmn-Hans)": 21.39,
+ "CDSC-R (pol-Latn)": 77.04,
+ "LCQMC (cmn-Hans)": 22.84,
+ "PAWSX (cmn-Hans)": 6.44,
+ "RUParaPhraserSTS (rus-Cyrl)": 42.15,
+ "RuSTSBenchmarkSTS (rus-Cyrl)": 55.68,
+ "SICK-R": 80.59,
+ "SICK-R-PL (pol-Latn)": 50.2,
+ "SICKFr (fra-Latn)": 67.05,
+ "STS12": 72.63,
+ "STS13": 83.48,
+ "STS14": 78.0,
+ "STS15": 85.66,
+ "STS16": 80.03,
+ "STS17 (fra-Latn_eng-Latn)": 41.64,
+ "STS17 (nld-Latn_eng-Latn)": 32.89,
+ "STS17 (spa-Latn_eng-Latn)": 25.28,
+ "STS17 (en-en)": 90.6,
+ "STS17 (kor-Hang)": 39.11,
+ "STS17 (ara-Arab)": 55.42,
+ "STS17 (spa-Latn)": 78.4,
+ "STS17 (eng-Latn_deu-Latn)": 35.5,
+ "STS17 (eng-Latn_ara-Arab)": 6.76,
+ "STS17 (eng-Latn_tur-Latn)": -4.58,
+ "STS17 (ita-Latn_eng-Latn)": 31.8,
+ "STS22 (pol-Latn)": 24.21,
+ "STS22 (ita-Latn)": 58.02,
+ "STS22 (spa-Latn_eng-Latn)": 55.09,
+ "STS22 (fra-Latn)": 77.1,
+ "STS22 (tur-Latn)": 29.35,
+ "STS22 (cmn-Hans)": 42.24,
+ "STS22 (deu-Latn)": 27.0,
+ "STS22 (spa-Latn_ita-Latn)": 41.61,
+ "STS22 (fra-Latn_pol-Latn)": 73.25,
+ "STS22 (deu-Latn_eng-Latn)": 49.73,
+ "STS22 (cmn-Hans_eng-Latn)": 40.47,
+ "STS22 (spa-Latn)": 55.98,
+ "STS22 (ara-Arab)": 38.96,
+ "STS22 (en)": 68.39,
+ "STS22 (deu-Latn_pol-Latn)": 23.53,
+ "STS22 (rus-Cyrl)": 15.83,
+ "STS22 (pol-Latn_eng-Latn)": 51.07,
+ "STS22 (deu-Latn_fra-Latn)": 31.39,
+ "STSB (cmn-Hans)": 37.7,
+ "STSBenchmark": 83.42,
+ "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01,
+ "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54,
+ "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15,
+ "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72,
+ "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78,
+ "STSBenchmarkMultilingualSTS (en)": 83.42,
+ "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43,
+ "STSBenchmarkMultilingualSTS (por-Latn)": 62.12,
+ "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43,
+ "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36
+ },
+ {
+ "Model": "all-mpnet-base-v2",
+ "STS17 (en-en)": 90.6,
+ "STS22 (en)": 67.95
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cosine_spearman": [
{
- "Model": "text-embedding-3-large",
- "SummEval": 29.92
+ "Model": "all-mpnet-base-v2",
+ "SummEval": 27.49,
+ "SummEvalFr (fra-Latn)": 28.11
+ },
+ {
+ "Model": "all-mpnet-base-v2",
+ "SummEval": 27.49,
+ "SummEvalFr (fra-Latn)": 28.11
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "text-embedding-3-large",
- "Core17InstructionRetrieval": -0.2,
- "News21InstructionRetrieval": -2.03,
- "Robust04InstructionRetrieval": -5.81
+ "Model": "all-mpnet-base-v2",
+ "Core17InstructionRetrieval": -0.74,
+ "News21InstructionRetrieval": -1.79,
+ "Robust04InstructionRetrieval": -6.71
}
]
}
},
- "nb-bert-large": {
+ "instructor-xl": {
"BitextMining": {
- "f1": [
- {
- "Model": "nb-bert-large",
- "BornholmBitextMining": 4.53
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "nb-bert-large",
- "AngryTweetsClassification": 52.14,
- "DKHateClassification": 62.13,
- "DanishPoliticalCommentsClassification": 35.04,
- "LccSentimentClassification": 56.27,
- "MassiveIntentClassification (da)": 57.03,
- "MassiveIntentClassification (nb)": 62.68,
- "MassiveIntentClassification (sv)": 55.02,
- "MassiveScenarioClassification (da)": 60.43,
- "MassiveScenarioClassification (nb)": 67.44,
- "MassiveScenarioClassification (sv)": 57.12,
- "NoRecClassification": 55.46,
- "NordicLangClassification": 85.27,
- "NorwegianParliament": 62.58,
- "ScalaDaClassification": 62.85,
- "ScalaNbClassification": 66.97
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "nb-bert-large"
- }
- ]
+ "v_measure": []
},
"PairClassification": {
- "ap": [
- {
- "Model": "nb-bert-large"
- }
- ]
+ "ap": []
},
"Reranking": {
- "map": [
+ "map": []
+ },
+ "Retrieval": {
+ "ndcg_at_10": [
{
- "Model": "nb-bert-large"
+ "Model": "instructor-xl",
+ "BrightRetrieval (aops)": 8.26,
+ "BrightRetrieval (robotics)": 17.39,
+ "BrightRetrieval (economics)": 22.81,
+ "BrightRetrieval (stackoverflow)": 19.06,
+ "BrightRetrieval (leetcode)": 27.5,
+ "BrightRetrieval (theoremqa_questions)": 14.59,
+ "BrightRetrieval (psychology)": 27.43,
+ "BrightRetrieval (biology)": 21.91,
+ "BrightRetrieval (theoremqa_theorems)": 6.22,
+ "BrightRetrieval (earth_science)": 34.35,
+ "BrightRetrieval (sustainable_living)": 18.82,
+ "BrightRetrieval (pony)": 5.02
}
- ]
- },
- "Retrieval": {
- "ndcg_at_10": [
+ ],
+ "recall_at_1": [
{
- "Model": "nb-bert-large"
+ "Model": "instructor-xl",
+ "BrightRetrieval (stackoverflow)": 14.96,
+ "BrightRetrieval (biology)": 22.01,
+ "BrightRetrieval (sustainable_living)": 20.14,
+ "BrightRetrieval (pony)": 5.93,
+ "BrightRetrieval (psychology)": 20.5,
+ "BrightRetrieval (robotics)": 12.87,
+ "BrightRetrieval (economics)": 14.08,
+ "BrightRetrieval (earth_science)": 32.04
}
]
},
"STS": {
- "spearman": [
- {
- "Model": "nb-bert-large"
- }
- ]
+ "spearman": []
},
"Summarization": {
- "spearman": [
- {
- "Model": "nb-bert-large"
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "nb-bert-large"
+ "Model": "instructor-xl",
+ "Core17InstructionRetrieval": 0.69,
+ "News21InstructionRetrieval": -0.9,
+ "Robust04InstructionRetrieval": -8.08
}
]
}
},
- "e5-large-v2": {
+ "google-gecko.text-embedding-preview-0409": {
"BitextMining": {
- "f1": [
- {
- "Model": "e5-large-v2"
- }
- ]
+ "f1": []
},
"Classification": {
"accuracy": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "AmazonCounterfactualClassification (en)": 75.34,
+ "AmazonPolarityClassification": 97.34,
+ "AmazonReviewsClassification (en)": 51.17,
+ "Banking77Classification": 88.62,
+ "EmotionClassification": 52.51,
+ "ImdbClassification": 95.65,
+ "MTOPDomainClassification (en)": 98.35,
+ "MTOPIntentClassification (en)": 83.43,
+ "MassiveIntentClassification (en)": 80.22,
+ "MassiveScenarioClassification (en)": 87.19,
+ "ToxicConversationsClassification": 89.67,
+ "TweetSentimentExtractionClassification": 74.52
}
]
},
"Clustering": {
"v_measure": [
{
- "Model": "e5-large-v2",
- "BiorxivClusteringP2P": 36.72,
- "BiorxivClusteringS2S": 35.47,
- "MedrxivClusteringP2P": 31.45,
- "MedrxivClusteringS2S": 29.91,
- "RedditClustering": 55.5,
- "RedditClusteringP2P": 63.71,
- "StackExchangeClustering": 65.23,
- "StackExchangeClusteringP2P": 33.62,
- "TwentyNewsgroupsClustering": 48.73
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "ArxivClusteringP2P": 46.27,
+ "ArxivClusteringS2S": 38.36,
+ "BiorxivClusteringP2P": 37.87,
+ "BiorxivClusteringS2S": 35.67,
+ "MedrxivClusteringP2P": 33.11,
+ "MedrxivClusteringS2S": 31.54,
+ "RedditClustering": 65.81,
+ "RedditClusteringP2P": 66.62,
+ "StackExchangeClustering": 74.52,
+ "StackExchangeClusteringP2P": 37.63,
+ "TwentyNewsgroupsClustering": 54.87
}
]
},
"PairClassification": {
- "ap": [
+ "ap": [],
+ "cos_sim_ap": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "SprintDuplicateQuestions": 96.26,
+ "TwitterSemEval2015": 79.04,
+ "TwitterURLCorpus": 87.53
}
]
},
"Reranking": {
"map": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "AskUbuntuDupQuestions": 64.4,
+ "MindSmallReranking": 33.07,
+ "SciDocsRR": 83.59,
+ "StackOverflowDupQuestions": 54.56
}
]
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "ArguAna": 62.18,
+ "BrightRetrieval (earth_science)": 34.38,
+ "BrightRetrieval (leetcode)": 29.64,
+ "BrightRetrieval (theoremqa_questions)": 21.51,
+ "BrightRetrieval (aops)": 9.33,
+ "BrightRetrieval (sustainable_living)": 17.25,
+ "BrightRetrieval (pony)": 3.59,
+ "BrightRetrieval (theoremqa_theorems)": 14.31,
+ "BrightRetrieval (stackoverflow)": 17.93,
+ "BrightRetrieval (biology)": 22.98,
+ "BrightRetrieval (robotics)": 15.98,
+ "BrightRetrieval (economics)": 19.5,
+ "BrightRetrieval (psychology)": 27.86,
+ "CQADupstackRetrieval": 48.89,
+ "ClimateFEVER": 33.21,
+ "DBPedia": 47.12,
+ "FEVER": 86.96,
+ "FiQA2018": 59.24,
+ "HotpotQA": 71.33,
+ "MSMARCO": 32.58,
+ "NFCorpus": 40.33,
+ "NQ": 61.28,
+ "QuoraRetrieval": 88.18,
+ "SCIDOCS": 20.34,
+ "SciFact": 75.42,
+ "TRECCOVID": 82.62,
+ "Touche2020": 25.86
+ }
+ ],
+ "recall_at_1": [
+ {
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "BrightRetrieval (economics)": 21.84,
+ "BrightRetrieval (stackoverflow)": 19.23,
+ "BrightRetrieval (pony)": 0.29,
+ "BrightRetrieval (earth_science)": 38.0,
+ "BrightRetrieval (sustainable_living)": 25.65,
+ "BrightRetrieval (robotics)": 12.87,
+ "BrightRetrieval (psychology)": 30.69,
+ "BrightRetrieval (biology)": 30.91
}
]
},
"STS": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "BIOSSES": 89.46,
+ "SICK-R": 81.93,
+ "STS12": 77.59,
+ "STS13": 90.36,
+ "STS14": 85.25,
+ "STS15": 89.66,
+ "STS16": 87.34,
+ "STS17 (en-en)": 92.06,
+ "STS22 (en)": 68.02,
+ "STSBenchmark": 88.99
}
]
},
"Summarization": {
- "spearman": [
+ "spearman": [],
+ "cos_sim_spearman": [
{
- "Model": "e5-large-v2"
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "SummEval": 32.63
}
]
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "e5-large-v2",
- "Core17InstructionRetrieval": 0.12,
- "News21InstructionRetrieval": 0.87,
- "Robust04InstructionRetrieval": -4.16
+ "Model": "google-gecko.text-embedding-preview-0409",
+ "Core17InstructionRetrieval": 5.44,
+ "News21InstructionRetrieval": 3.94,
+ "Robust04InstructionRetrieval": -2.4
}
]
}
},
- "universal-sentence-encoder-multilingual-3": {
+ "instructor-large": {
"BitextMining": {
- "f1": [
- {
- "Model": "universal-sentence-encoder-multilingual-3"
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "universal-sentence-encoder-multilingual-3",
- "AmazonReviewsClassification (fr)": 33.51,
- "MTOPDomainClassification (fr)": 85.5,
- "MTOPIntentClassification (fr)": 53.98,
- "MasakhaNEWSClassification (fra)": 82.06,
- "MassiveIntentClassification (fr)": 61.19,
- "MassiveScenarioClassification (fr)": 70.22
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "universal-sentence-encoder-multilingual-3",
- "AlloProfClusteringP2P": 56.9,
- "AlloProfClusteringS2S": 37.84,
- "HALClusteringS2S": 18.95,
- "MLSUMClusteringP2P": 43.9,
- "MLSUMClusteringS2S": 35.5,
- "MasakhaNEWSClusteringP2P (fra)": 60.57,
- "MasakhaNEWSClusteringS2S (fra)": 40.31
- }
- ]
+ "v_measure": []
},
"PairClassification": {
- "ap": [
- {
- "Model": "universal-sentence-encoder-multilingual-3",
- "OpusparcusPC (fr)": 91.46,
- "PawsXPairClassification (fr)": 52.39
- }
- ]
+ "ap": []
},
"Reranking": {
- "map": [
- {
- "Model": "universal-sentence-encoder-multilingual-3",
- "AlloprofReranking": 56.23,
- "SyntecReranking": 73.85
- }
- ]
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "universal-sentence-encoder-multilingual-3",
- "AlloprofRetrieval": 35.27,
- "BSARDRetrieval": 0.0,
- "MintakaRetrieval (fr)": 26.12,
- "SyntecRetrieval": 69.82,
- "XPQARetrieval (fr)": 59.59
+ "Model": "instructor-large",
+ "BrightRetrieval (pony)": 1.32,
+ "BrightRetrieval (sustainable_living)": 13.16,
+ "BrightRetrieval (aops)": 7.94,
+ "BrightRetrieval (biology)": 15.61,
+ "BrightRetrieval (stackoverflow)": 11.21,
+ "BrightRetrieval (theoremqa_theorems)": 8.27,
+ "BrightRetrieval (psychology)": 21.94,
+ "BrightRetrieval (economics)": 15.99,
+ "BrightRetrieval (robotics)": 11.45,
+ "BrightRetrieval (leetcode)": 20.0,
+ "BrightRetrieval (earth_science)": 21.52,
+ "BrightRetrieval (theoremqa_questions)": 20.07
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "universal-sentence-encoder-multilingual-3",
- "SICKFr": 71.37,
- "STS22 (fr)": 77.91,
- "STSBenchmarkMultilingualSTS (fr)": 75.48
+ "Model": "instructor-large",
+ "BrightRetrieval (stackoverflow)": 14.53,
+ "BrightRetrieval (pony)": 3.94,
+ "BrightRetrieval (economics)": 14.08,
+ "BrightRetrieval (earth_science)": 29.45,
+ "BrightRetrieval (sustainable_living)": 25.42,
+ "BrightRetrieval (psychology)": 21.29,
+ "BrightRetrieval (robotics)": 12.87,
+ "BrightRetrieval (biology)": 24.11
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "universal-sentence-encoder-multilingual-3",
- "SummEvalFr": 28.21
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "universal-sentence-encoder-multilingual-3"
- }
- ]
+ "p-MRR": []
}
},
- "LaBSE-ru-turbo": {
+ "bge-large-en-v1.5": {
"BitextMining": {
- "f1": [
- {
- "Model": "LaBSE-ru-turbo",
- "Tatoeba (rus-Cyrl_eng-Latn)": 93.22
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "LaBSE-ru-turbo",
- "GeoreviewClassification (rus-Cyrl)": 46.04,
- "HeadlineClassification (rus-Cyrl)": 69.98,
- "InappropriatenessClassification (rus-Cyrl)": 61.39,
- "KinopoiskClassification (rus-Cyrl)": 53.59,
- "MassiveIntentClassification (rus-Cyrl)": 66.08,
- "MassiveScenarioClassification (rus-Cyrl)": 71.13,
- "RuReviewsClassification (rus-Cyrl)": 64.58,
- "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.67,
- "RuSciBenchOECDClassification (rus-Cyrl)": 43.58
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "LaBSE-ru-turbo",
- "GeoreviewClusteringP2P (rus-Cyrl)": 64.55,
- "MLSUMClusteringP2P (rus-Cyrl)": 45.7,
- "MLSUMClusteringS2S (rus-Cyrl)": 42.93,
- "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.64,
- "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48
- }
- ]
+ "v_measure": []
},
- "PairClassification": {
- "ap": [
- {
- "Model": "LaBSE-ru-turbo",
- "OpusparcusPC (rus-Cyrl)": 89.32,
- "TERRa (rus-Cyrl)": 57.81
- }
- ]
+ "PairClassification": {
+ "ap": []
},
"Reranking": {
- "map": [
- {
- "Model": "LaBSE-ru-turbo",
- "RuBQReranking (rus-Cyrl)": 68.65
- }
- ]
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "LaBSE-ru-turbo",
- "RiaNewsRetrieval (rus-Cyrl)": 69.36,
- "RuBQRetrieval (rus-Cyrl)": 65.71
+ "Model": "bge-large-en-v1.5",
+ "AILACasedocs": 25.15,
+ "AILAStatutes": 20.74,
+ "ARCChallenge": 9.99,
+ "AlphaNLI": 13.13,
+ "BrightRetrieval (stackoverflow)": 9.51,
+ "BrightRetrieval (earth_science)": 24.15,
+ "BrightRetrieval (aops)": 6.08,
+ "BrightRetrieval (sustainable_living)": 13.27,
+ "BrightRetrieval (psychology)": 17.44,
+ "BrightRetrieval (robotics)": 12.21,
+ "BrightRetrieval (theoremqa_theorems)": 6.72,
+ "BrightRetrieval (pony)": 5.64,
+ "BrightRetrieval (biology)": 11.96,
+ "BrightRetrieval (theoremqa_questions)": 12.56,
+ "BrightRetrieval (leetcode)": 26.68,
+ "BrightRetrieval (economics)": 16.59,
+ "GerDaLIRSmall": 3.96,
+ "HellaSwag": 28.5,
+ "LeCaRDv2": 22.68,
+ "LegalBenchConsumerContractsQA": 73.52,
+ "LegalBenchCorporateLobbying": 91.51,
+ "LegalQuAD": 16.22,
+ "LegalSummarization": 59.99,
+ "PIQA": 27.99,
+ "Quail": 1.83,
+ "RARbCode": 48.12,
+ "RARbMath": 57.36,
+ "SIQA": 1.04,
+ "SpartQA": 2.99,
+ "TempReasonL1": 1.46,
+ "TempReasonL2Fact": 24.25,
+ "TempReasonL2Pure": 2.35,
+ "TempReasonL3Fact": 20.64,
+ "TempReasonL3Pure": 6.67,
+ "WinoGrande": 19.18
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "LaBSE-ru-turbo",
- "RUParaPhraserSTS (rus-Cyrl)": 72.97,
- "RuSTSBenchmarkSTS (rus-Cyrl)": 81.77,
- "STS22 (rus-Cyrl)": 62.89,
- "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81
+ "Model": "bge-large-en-v1.5",
+ "BrightRetrieval (pony)": 0.36,
+ "BrightRetrieval (psychology)": 11.58,
+ "BrightRetrieval (stackoverflow)": 13.25,
+ "BrightRetrieval (robotics)": 10.89,
+ "BrightRetrieval (earth_science)": 27.73,
+ "BrightRetrieval (biology)": 16.42,
+ "BrightRetrieval (economics)": 20.87,
+ "BrightRetrieval (sustainable_living)": 16.9
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "LaBSE-ru-turbo"
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "LaBSE-ru-turbo"
- }
- ]
+ "p-MRR": []
}
},
- "bge-large-zh-noinstruct": {
+ "SFR-Embedding-Mistral": {
"BitextMining": {
- "f1": [
- {
- "Model": "bge-large-zh-noinstruct"
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "bge-large-zh-noinstruct",
- "AmazonReviewsClassification (zh)": 41.94,
- "IFlyTek": 45.32,
- "JDReview": 85.38,
- "MassiveIntentClassification (zh-CN)": 66.96,
- "MassiveScenarioClassification (zh-CN)": 73.39,
- "MultilingualSentiment": 73.7,
- "OnlineShopping": 91.66,
- "TNews": 52.05,
- "Waimai": 86.83
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "bge-large-zh-noinstruct",
- "CLSClusteringP2P": 41.23,
- "CLSClusteringS2S": 40.04,
- "ThuNewsClusteringP2P": 62.03,
- "ThuNewsClusteringS2S": 56.75
- }
- ]
+ "v_measure": []
},
"PairClassification": {
- "ap": [
- {
- "Model": "bge-large-zh-noinstruct",
- "Cmnli": 82.17,
- "Ocnli": 71.37
- }
- ]
+ "ap": []
},
"Reranking": {
- "map": [
- {
- "Model": "bge-large-zh-noinstruct",
- "CMedQAv1": 81.72,
- "CMedQAv2": 84.64,
- "MMarcoReranking": 27.1,
- "T2Reranking": 66.16
- }
- ]
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bge-large-zh-noinstruct",
- "CmedqaRetrieval": 41.03,
- "CovidRetrieval": 75.07,
- "DuRetrieval": 84.68,
- "EcomRetrieval": 65.6,
- "MMarcoRetrieval": 81.38,
- "MedicalRetrieval": 58.28,
- "T2Retrieval": 84.39,
- "VideoRetrieval": 73.93
+ "Model": "SFR-Embedding-Mistral",
+ "BrightRetrieval (sustainable_living)": 19.79,
+ "BrightRetrieval (economics)": 17.84,
+ "BrightRetrieval (theoremqa_theorems)": 24.32,
+ "BrightRetrieval (aops)": 7.43,
+ "BrightRetrieval (theoremqa_questions)": 23.05,
+ "BrightRetrieval (psychology)": 18.97,
+ "BrightRetrieval (stackoverflow)": 12.72,
+ "BrightRetrieval (pony)": 1.97,
+ "BrightRetrieval (leetcode)": 27.35,
+ "BrightRetrieval (biology)": 19.49,
+ "BrightRetrieval (earth_science)": 26.63,
+ "BrightRetrieval (robotics)": 16.7
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "bge-large-zh-noinstruct",
- "AFQMC": 43.06,
- "ATEC": 48.29,
- "BQ": 60.53,
- "LCQMC": 74.71,
- "PAWSX": 16.64,
- "QBQTC": 35.2,
- "STS22 (zh)": 67.19,
- "STSB": 78.41
+ "Model": "SFR-Embedding-Mistral",
+ "BrightRetrieval (earth_science)": 37.0,
+ "BrightRetrieval (biology)": 30.26,
+ "BrightRetrieval (stackoverflow)": 14.53,
+ "BrightRetrieval (sustainable_living)": 34.99,
+ "BrightRetrieval (psychology)": 47.72,
+ "BrightRetrieval (pony)": 2.0,
+ "BrightRetrieval (economics)": 24.27,
+ "BrightRetrieval (robotics)": 17.33
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "bge-large-zh-noinstruct"
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
- "p-MRR": [
- {
- "Model": "bge-large-zh-noinstruct"
- }
- ]
+ "p-MRR": []
}
},
- "bm25s": {
+ "Cohere-embed-english-v3.0": {
"BitextMining": {
- "f1": [
- {
- "Model": "bm25s"
- }
- ]
+ "f1": []
},
"Classification": {
- "accuracy": [
- {
- "Model": "bm25s"
- }
- ]
+ "accuracy": []
},
"Clustering": {
- "v_measure": [
- {
- "Model": "bm25s"
- }
- ]
+ "v_measure": []
},
"PairClassification": {
- "ap": [
- {
- "Model": "bm25s"
- }
- ]
+ "ap": []
},
"Reranking": {
- "map": [
- {
- "Model": "bm25s"
- }
- ]
+ "map": []
},
"Retrieval": {
"ndcg_at_10": [
{
- "Model": "bm25s",
- "ArguAna": 49.28,
- "CQADupstackRetrieval": 31.86,
- "ClimateFEVER": 13.62,
- "DBPedia": 29.91,
- "FEVER": 48.09,
- "FiQA2018": 25.14,
- "HotpotQA": 56.91,
- "MSMARCO": 21.89,
- "NFCorpus": 32.08,
- "NQ": 28.5,
- "QuoraRetrieval": 80.42,
- "SCIDOCS": 15.78,
- "SciFact": 68.7,
- "TRECCOVID": 62.31,
- "Touche2020": 33.05
+ "Model": "Cohere-embed-english-v3.0",
+ "AILACasedocs": 31.54,
+ "AILAStatutes": 27.15,
+ "ARCChallenge": 9.89,
+ "AlphaNLI": 15.1,
+ "BrightRetrieval (psychology)": 21.82,
+ "BrightRetrieval (economics)": 20.18,
+ "BrightRetrieval (robotics)": 16.21,
+ "BrightRetrieval (biology)": 18.98,
+ "BrightRetrieval (stackoverflow)": 16.47,
+ "BrightRetrieval (theoremqa_theorems)": 7.14,
+ "BrightRetrieval (pony)": 1.77,
+ "BrightRetrieval (sustainable_living)": 17.69,
+ "BrightRetrieval (aops)": 6.46,
+ "BrightRetrieval (theoremqa_questions)": 15.07,
+ "BrightRetrieval (leetcode)": 26.78,
+ "BrightRetrieval (earth_science)": 27.45,
+ "GerDaLIRSmall": 6.05,
+ "HellaSwag": 26.35,
+ "LeCaRDv2": 21.02,
+ "LegalBenchConsumerContractsQA": 77.12,
+ "LegalBenchCorporateLobbying": 93.68,
+ "LegalQuAD": 26.08,
+ "LegalSummarization": 61.7,
+ "PIQA": 28.49,
+ "Quail": 4.1,
+ "RARbCode": 57.19,
+ "RARbMath": 72.26,
+ "SIQA": 4.26,
+ "SpartQA": 3.75,
+ "TempReasonL1": 1.5,
+ "TempReasonL2Fact": 35.91,
+ "TempReasonL2Pure": 1.89,
+ "TempReasonL3Fact": 27.51,
+ "TempReasonL3Pure": 8.53,
+ "WinoGrande": 58.01
}
- ]
- },
- "STS": {
- "spearman": [
+ ],
+ "recall_at_1": [
{
- "Model": "bm25s"
+ "Model": "Cohere-embed-english-v3.0",
+ "BrightRetrieval (robotics)": 9.9,
+ "BrightRetrieval (psychology)": 20.5,
+ "BrightRetrieval (biology)": 31.47,
+ "BrightRetrieval (economics)": 17.96,
+ "BrightRetrieval (stackoverflow)": 15.81,
+ "BrightRetrieval (pony)": 0.84,
+ "BrightRetrieval (sustainable_living)": 15.23,
+ "BrightRetrieval (earth_science)": 35.49
}
]
},
+ "STS": {
+ "spearman": []
+ },
"Summarization": {
- "spearman": [
- {
- "Model": "bm25s"
- }
- ]
+ "spearman": []
},
"InstructionRetrieval": {
"p-MRR": [
{
- "Model": "bm25s"
+ "Model": "Cohere-embed-english-v3.0",
+ "Core17InstructionRetrieval": 2.8,
+ "News21InstructionRetrieval": 0.2,
+ "Robust04InstructionRetrieval": -3.63
}
]
}