leaderboard_demo

Running

App Files Files Community

Muennighoff commited on Feb 28

Commit

cd84165

•

1 Parent(s): 51e109c

French MTEB

Browse files

Files changed (2) hide show

EXTERNAL_MODEL_RESULTS.json +0 -0
app.py +467 -61

EXTERNAL_MODEL_RESULTS.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -38,8 +38,6 @@ TASK_LIST_CLASSIFICATION = [
     "TweetSentimentExtractionClassification",
 ]
-TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
 TASK_LIST_CLASSIFICATION_DA = [
     "AngryTweetsClassification",
     "DanishPoliticalCommentsClassification",
@@ -51,6 +49,15 @@ TASK_LIST_CLASSIFICATION_DA = [
     "ScalaDaClassification",
 ]
 TASK_LIST_CLASSIFICATION_NB = [
     "NoRecClassification",
     "NordicLangClassification",
@@ -115,6 +122,16 @@ TASK_LIST_CLUSTERING_DE = [
     "TenKGnadClusteringS2S",
 ]
 TASK_LIST_CLUSTERING_PL = [
     "8TagsClustering",
 ]
@@ -132,6 +149,11 @@ TASK_LIST_PAIR_CLASSIFICATION = [
     "TwitterURLCorpus",
 ]
 TASK_LIST_PAIR_CLASSIFICATION_PL = [
     "CDSC-E",
     "PPC",
@@ -151,6 +173,11 @@ TASK_LIST_RERANKING = [
     "StackOverflowDupQuestions",
 ]
 TASK_LIST_RERANKING_ZH = [
     "CMedQAv1",
     "CMedQAv2",
@@ -176,6 +203,15 @@ TASK_LIST_RETRIEVAL = [
     "TRECCOVID",
 ]
 TASK_LIST_RETRIEVAL_PL = [
     "ArguAna-PL",
     "DBPedia-PL",
@@ -229,6 +265,12 @@ TASK_LIST_STS = [
     "STSBenchmark",
 ]
 TASK_LIST_STS_PL = [
     "CDSC-R",
     "SICK-R-PL",
@@ -247,11 +289,13 @@ TASK_LIST_STS_ZH = [
 ]
 TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
-TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
 TASK_LIST_SUMMARIZATION = ["SummEval",]
 TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
 TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
 TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
@@ -276,11 +320,22 @@ def make_clickable_model(model_name, link=None):
 # Models without metadata, thus we cannot fetch their results naturally
 EXTERNAL_MODELS = [
     "all-MiniLM-L12-v2",
     "all-MiniLM-L6-v2",
     "all-mpnet-base-v2",
     "allenai-specter",
-    "Baichuan-text-embedding",
     "bert-base-swedish-cased",
     "bert-base-uncased",
     "bge-base-zh-v1.5",
@@ -292,18 +347,21 @@ EXTERNAL_MODELS = [
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "distiluse-base-multilingual-cased-v2",
-    "DanskBERT",
     "e5-base",
     "e5-large",
-    "e5-small",
     "electra-small-nordic",
     "electra-small-swedish-cased-discriminator",
     "gbert-base",
     "gbert-large",
     "gelectra-base",
     "gelectra-large",
-    "gottbert-base",
     "glove.6B.300d",
     "gtr-t5-base",
     "gtr-t5-large",
     "gtr-t5-xl",
@@ -311,11 +369,11 @@ EXTERNAL_MODELS = [
     "herbert-base-retrieval-v2",
     "komninos",
     "luotuo-bert-medium",
-    "LASER2",
-    "LaBSE",
     "m3e-base",
-    "m3e-large",
     "msmarco-bert-co-condensor",
     "multilingual-e5-base",
     "multilingual-e5-large",
     "multilingual-e5-small",
@@ -330,14 +388,19 @@ EXTERNAL_MODELS = [
     "paraphrase-multilingual-MiniLM-L12-v2",
     "paraphrase-multilingual-mpnet-base-v2",
     "sentence-bert-swedish-cased",
     "sentence-t5-base",
     "sentence-t5-large",
     "sentence-t5-xl",
     "sentence-t5-xxl",
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
-    "st-polish-paraphrase-from-mpnet",
     "text2vec-base-chinese",
     "text2vec-large-chinese",
     "text-embedding-3-small",
     "text-embedding-3-large",
@@ -353,38 +416,62 @@ EXTERNAL_MODELS = [
     "text-search-curie-001",
     "text-search-davinci-001",
     "titan-embed-text-v1",
     "unsup-simcse-bert-base-uncased",
     "use-cmlm-multilingual",
     "voyage-lite-01-instruct",
-    "voyage-lite-02-instruct",
     "xlm-roberta-base",
-    "xlm-roberta-large",
 ]
 EXTERNAL_MODEL_TO_LINK = {
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
     "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
     "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
     "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
     "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
     "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
     "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
     "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
     "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
     "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
     "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
     "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
     "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
     "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2",
     "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "e5-base": "https://huggingface.co/intfloat/e5-base",
     "e5-large": "https://huggingface.co/intfloat/e5-large",
     "e5-small": "https://huggingface.co/intfloat/e5-small",
     "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
     "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
     "gbert-base": "https://huggingface.co/deepset/gbert-base",
     "gbert-large": "https://huggingface.co/deepset/gbert-large",
     "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
@@ -402,7 +489,9 @@ EXTERNAL_MODEL_TO_LINK = {
     "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
     "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
     "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
     "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
     "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
     "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large",
     "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
@@ -414,13 +503,18 @@ EXTERNAL_MODEL_TO_LINK = {
     "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
     "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
     "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
     "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
     "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
     "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased",
     "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base",
     "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large",
     "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
     "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
     "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
@@ -441,8 +535,14 @@ EXTERNAL_MODEL_TO_LINK = {
     "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
     "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
     "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
     "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
     "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
     "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
     "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
@@ -450,28 +550,46 @@ EXTERNAL_MODEL_TO_LINK = {
 }
 EXTERNAL_MODEL_TO_DIM = {
     "all-MiniLM-L12-v2": 384,
     "all-MiniLM-L6-v2": 384,
     "all-mpnet-base-v2": 768,
     "allenai-specter": 768,
     "Baichuan-text-embedding": 1024,
     "bert-base-swedish-cased": 768,
     "bert-base-uncased": 768,
     "bge-base-zh-v1.5": 768,
     "bge-large-zh-v1.5": 1024,
     "bge-large-zh-noinstruct": 1024,
     "bge-small-zh-v1.5": 512,
     "contriever-base-msmarco": 768,
     "cross-en-de-roberta-sentence-transformer": 768,
     "DanskBERT": 768,
     "distiluse-base-multilingual-cased-v2": 512,
     "dfm-encoder-large-v1": 1024,
     "dfm-sentence-encoder-large-1": 1024,
     "e5-base": 768,
     "e5-small": 384,
-    "e5-large": 1024,
     "electra-small-nordic": 256,
     "electra-small-swedish-cased-discriminator": 256,
     "luotuo-bert-medium": 768,
     "LASER2": 1024,
     "LaBSE": 768,
@@ -489,7 +607,9 @@ EXTERNAL_MODEL_TO_DIM = {
     "komninos": 300,
     "m3e-base": 768,
     "m3e-large": 768,
     "msmarco-bert-co-condensor": 768,
     "multilingual-e5-base": 768,
     "multilingual-e5-small": 384,
     "multilingual-e5-large": 1024,
@@ -501,13 +621,18 @@ EXTERNAL_MODEL_TO_DIM = {
     "nomic-embed-text-v1.5-512": 512,
     "norbert3-base": 768,
     "norbert3-large": 1024,
     "paraphrase-multilingual-MiniLM-L12-v2": 384,
     "paraphrase-multilingual-mpnet-base-v2": 768,
     "sentence-bert-swedish-cased": 768,
     "sentence-t5-base": 768,
     "sentence-t5-large": 768,
     "sentence-t5-xl": 768,
     "sentence-t5-xxl": 768,
     "sup-simcse-bert-base-uncased": 768,
     "st-polish-paraphrase-from-distilroberta": 768,
     "st-polish-paraphrase-from-mpnet": 768,
@@ -528,8 +653,14 @@ EXTERNAL_MODEL_TO_DIM = {
     "text-search-curie-001": 4096,
     "text-search-davinci-001": 12288,
     "titan-embed-text-v1": 1536,
     "unsup-simcse-bert-base-uncased": 768,
     "use-cmlm-multilingual": 768,
     "voyage-lite-01-instruct": 1024,
     "voyage-lite-02-instruct": 1024,
     "xlm-roberta-base":  768,
@@ -537,28 +668,46 @@ EXTERNAL_MODEL_TO_DIM = {
 }
 EXTERNAL_MODEL_TO_SEQLEN = {
     "all-MiniLM-L12-v2": 512,
     "all-MiniLM-L6-v2": 512,
     "all-mpnet-base-v2": 514,
     "allenai-specter": 512,
     "Baichuan-text-embedding": 512,
     "bert-base-swedish-cased": 512,
     "bert-base-uncased": 512,
     "bge-base-zh-v1.5": 512,
     "bge-large-zh-v1.5": 512,
     "bge-large-zh-noinstruct": 512,
-    "bge-small-zh-v1.5": 512,
     "contriever-base-msmarco": 512,
     "cross-en-de-roberta-sentence-transformer": 514,
     "DanskBERT": 514,
     "dfm-encoder-large-v1": 512,
     "dfm-sentence-encoder-large-1": 512,
     "distiluse-base-multilingual-cased-v2": 512,
     "e5-base": 512,
     "e5-large": 512,
     "e5-small": 512,
     "electra-small-nordic": 512,
     "electra-small-swedish-cased-discriminator": 512,
     "gbert-base": 512,
     "gbert-large": 512,
     "gelectra-base": 512,
@@ -575,8 +724,10 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "LASER2": "N/A",
     "LaBSE": 512,
     "m3e-base": 512,
-    "m3e-large": 512,
     "msmarco-bert-co-condensor": 512,
     "multilingual-e5-base": 514,
     "multilingual-e5-large": 514,
     "multilingual-e5-small": 512,
@@ -588,13 +739,18 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "nomic-embed-text-v1.5-512": 8192,
     "norbert3-base": 512,
     "norbert3-large": 512,
     "paraphrase-multilingual-MiniLM-L12-v2": 512,
     "paraphrase-multilingual-mpnet-base-v2": 514,
     "sentence-bert-swedish-cased": 512,
     "sentence-t5-base": 512,
     "sentence-t5-large": 512,
     "sentence-t5-xl": 512,
     "sentence-t5-xxl": 512,
     "sup-simcse-bert-base-uncased": 512,
     "st-polish-paraphrase-from-distilroberta": 514,
     "st-polish-paraphrase-from-mpnet": 514,
@@ -615,8 +771,14 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "text-search-curie-001": 2046,
     "text-search-davinci-001": 2046,
     "titan-embed-text-v1": 8000,
     "use-cmlm-multilingual": 512,
     "unsup-simcse-bert-base-uncased": 512,
     "voyage-lite-01-instruct": 4000,
     "voyage-lite-02-instruct": 4000,
     "xlm-roberta-base": 514,
@@ -628,23 +790,39 @@ EXTERNAL_MODEL_TO_SIZE = {
     "all-MiniLM-L12-v2": 0.13,
     "all-MiniLM-L6-v2": 0.09,
     "all-mpnet-base-v2": 0.44,
     "bert-base-uncased": 0.44,
     "bert-base-swedish-cased": 0.50,
     "bge-base-zh-v1.5": 0.41,
     "bge-large-zh-v1.5": 1.30,
     "bge-large-zh-noinstruct": 1.30,
-    "bge-small-zh-v1.5": 0.10,
     "cross-en-de-roberta-sentence-transformer": 1.11,
     "contriever-base-msmarco": 0.44,
     "DanskBERT": 0.50,
     "distiluse-base-multilingual-cased-v2": 0.54,
     "dfm-encoder-large-v1": 1.42,
     "dfm-sentence-encoder-large-1": 1.63,
     "e5-base": 0.44,
-    "e5-small": 0.13,
     "e5-large": 1.34,
     "electra-small-nordic": 0.09,
     "electra-small-swedish-cased-discriminator": 0.06,
     "gbert-base": 0.44,
     "gbert-large": 1.35,
     "gelectra-base": 0.44,
@@ -663,6 +841,7 @@ EXTERNAL_MODEL_TO_SIZE = {
     "m3e-base": 0.41,
     "m3e-large": 0.41,
     "msmarco-bert-co-condensor": 0.44,
     "multilingual-e5-base": 1.11,
     "multilingual-e5-small": 0.47,
     "multilingual-e5-large": 2.24,
@@ -676,11 +855,15 @@ EXTERNAL_MODEL_TO_SIZE = {
     "norbert3-large": 1.47,
     "paraphrase-multilingual-mpnet-base-v2": 1.11,
     "paraphrase-multilingual-MiniLM-L12-v2": 0.47,
     "sentence-bert-swedish-cased": 0.50,
     "sentence-t5-base": 0.22,
     "sentence-t5-large": 0.67,
     "sentence-t5-xl": 2.48,
     "sentence-t5-xxl": 9.73,
     "sup-simcse-bert-base-uncased": 0.44,
     "st-polish-paraphrase-from-distilroberta": 0.50,
     "st-polish-paraphrase-from-mpnet": 0.50,
@@ -807,16 +990,9 @@ MODELS_TO_SKIP = {
     "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy
     "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy
     "TitanML/jina-v2-base-en-embed", # Copy
-    "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant
 }
-if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
-    with open("EXTERNAL_MODEL_RESULTS.json") as f:
-        EXTERNAL_MODEL_RESULTS = json.load(f)
-else:
-    EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
 def add_lang(examples):
     if not(examples["eval_language"]):
         examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"]
@@ -824,48 +1000,62 @@ def add_lang(examples):
         examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})'
     return examples
 def add_task(examples):
     # Could be added to the dataset loading script instead
-    if examples["mteb_dataset_name"] in TASK_LIST_CLASSIFICATION_NORM + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH:
         examples["mteb_task"] = "Classification"
-    elif examples["mteb_dataset_name"] in TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH:
         examples["mteb_task"] = "Clustering"
-    elif examples["mteb_dataset_name"] in TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH:
         examples["mteb_task"] = "PairClassification"
-    elif examples["mteb_dataset_name"] in TASK_LIST_RERANKING + TASK_LIST_RERANKING_ZH:
         examples["mteb_task"] = "Reranking"
-    elif examples["mteb_dataset_name"] in TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH:
         examples["mteb_task"] = "Retrieval"
-    elif examples["mteb_dataset_name"] in TASK_LIST_STS_NORM + TASK_LIST_STS_PL + TASK_LIST_STS_ZH:
         examples["mteb_task"] = "STS"
-    elif examples["mteb_dataset_name"] in TASK_LIST_SUMMARIZATION:
         examples["mteb_task"] = "Summarization"
-    elif examples["mteb_dataset_name"] in [x.split(" ")[0] for x in TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_OTHER]:
         examples["mteb_task"] = "BitextMining"
     else:
         print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
         examples["mteb_task"] = "Unknown"
     return examples
-if not(os.path.exists("EXTERNAL_MODEL_RESULTS.json")):
-    pbar = tqdm(EXTERNAL_MODELS, desc="Fetching external model results")
-    for model in pbar:
-        pbar.set_description(f"Fetching external model results for {model!r}")
-        ds = load_dataset("mteb/results", model, trust_remote_code=True)
-        # For local debugging:
-        #, download_mode='force_redownload', verification_mode="no_checks")
-        ds = ds.map(add_lang)
-        ds = ds.map(add_task)
-        base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
-        # For now only one metric per task - Could add more metrics lateron
-        for task, metric in TASK_TO_METRIC.items():
-            ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
-            ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
-            EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
-    # Save & cache EXTERNAL_MODEL_RESULTS
-    with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
-        json.dump(EXTERNAL_MODEL_RESULTS, f)
 def get_dim_seq_size(model):
     filenames = [sib.rfilename for sib in model.siblings]
@@ -1136,6 +1326,68 @@ def get_mteb_average_zh():
     return DATA_OVERALL_ZH
 def get_mteb_average_pl():
     global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
     DATA_OVERALL_PL = get_mteb_data(
@@ -1191,6 +1443,7 @@ def get_mteb_average_pl():
     return DATA_OVERALL_PL
 get_mteb_average()
 get_mteb_average_pl()
 get_mteb_average_zh()
 DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
@@ -1212,6 +1465,7 @@ for d in [
     DATA_BITEXT_MINING_OTHER,
     DATA_CLASSIFICATION_EN,
     DATA_CLASSIFICATION_DA,
     DATA_CLASSIFICATION_NB,
     DATA_CLASSIFICATION_PL,
     DATA_CLASSIFICATION_SV,
@@ -1219,21 +1473,27 @@ for d in [
     DATA_CLASSIFICATION_OTHER,
     DATA_CLUSTERING,
     DATA_CLUSTERING_DE,
     DATA_CLUSTERING_PL,
     DATA_CLUSTERING_ZH,
     DATA_PAIR_CLASSIFICATION,
     DATA_PAIR_CLASSIFICATION_PL,
     DATA_PAIR_CLASSIFICATION_ZH,
     DATA_RERANKING,
     DATA_RERANKING_ZH,
     DATA_RETRIEVAL,
     DATA_RETRIEVAL_PL,
     DATA_RETRIEVAL_ZH,
     DATA_STS_EN,
     DATA_STS_PL,
     DATA_STS_ZH,
     DATA_STS_OTHER,
     DATA_SUMMARIZATION,
 ]:
     # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
     cols_to_ignore = 3 if "Average" in d.columns else 2
@@ -1308,7 +1568,26 @@ with block:
                     )
                 with gr.Row():
                     data_run_overall_zh = gr.Button("Refresh")
-                    data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
@@ -1433,6 +1712,27 @@ with block:
                         partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
                         outputs=data_run_classification_da,
                     )
             with gr.TabItem("Norwegian"):
                 with gr.Row():
                     gr.Markdown("""
@@ -1558,6 +1858,27 @@ with block:
                         partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
                         outputs=data_clustering_zh,
                     )
             with gr.TabItem("German"):
                 with gr.Row():
                     gr.Markdown("""
@@ -1642,6 +1963,27 @@ with block:
                         partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
                         outputs=data_pair_classification_zh,
                     )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
@@ -1705,6 +2047,27 @@ with block:
                         partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
                         outputs=data_reranking_zh,
                     )
         with gr.TabItem("Retrieval"):
             with gr.TabItem("English"):
                 with gr.Row():
@@ -1737,18 +2100,40 @@ with block:
                     - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
                     """)
                 with gr.Row():
-                    data_retrieval_zh = gr.components.Dataframe(
-                        DATA_RETRIEVAL_ZH,
                         # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_ZH.columns) * 2,
                         type="pandas",
                     )
                 with gr.Row():
-                    data_run_retrieval_zh = gr.Button("Refresh")
-                    data_run_retrieval_zh.click(
-                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH),
-                        outputs=data_retrieval_zh,
                     )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
@@ -1813,6 +2198,27 @@ with block:
                         partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
                         outputs=data_sts_zh,
                     )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""

     "TweetSentimentExtractionClassification",
 ]
 TASK_LIST_CLASSIFICATION_DA = [
     "AngryTweetsClassification",
     "DanishPoliticalCommentsClassification",
     "ScalaDaClassification",
 ]
+TASK_LIST_CLASSIFICATION_FR = [
+    "AmazonReviewsClassification (fr)",
+    "MasakhaNEWSClassification (fra)",
+    "MassiveIntentClassification (fr)",
+    "MassiveScenarioClassification (fr)",
+    "MTOPDomainClassification (fr)",
+    "MTOPIntentClassification (fr)",
+]
 TASK_LIST_CLASSIFICATION_NB = [
     "NoRecClassification",
     "NordicLangClassification",
     "TenKGnadClusteringS2S",
 ]
+TASK_LIST_CLUSTERING_FR = [
+    "AlloProfClusteringP2P",
+    "AlloProfClusteringS2S",
+    "HALClusteringS2S",
+    "MLSUMClusteringP2P",
+    "MLSUMClusteringS2S",
+    "MasakhaNEWSClusteringP2P (fra)",
+    "MasakhaNEWSClusteringS2S (fra)",
+]
 TASK_LIST_CLUSTERING_PL = [
     "8TagsClustering",
 ]
     "TwitterURLCorpus",
 ]
+TASK_LIST_PAIR_CLASSIFICATION_FR = [
+    "OpusparcusPC (fr)",
+    "PawsX (fr)",
+]
 TASK_LIST_PAIR_CLASSIFICATION_PL = [
     "CDSC-E",
     "PPC",
     "StackOverflowDupQuestions",
 ]
+TASK_LIST_RERANKING_FR = [
+    "AlloprofReranking",
+    "SyntecReranking",
+]
 TASK_LIST_RERANKING_ZH = [
     "CMedQAv1",
     "CMedQAv2",
     "TRECCOVID",
 ]
+TASK_LIST_RETRIEVAL_FR = [
+    "AlloprofRetrieval",
+    "BSARDRetrieval",
+    "MintakaRetrieval (fr)",
+#    "MultiLongDocRetrieval",
+    "SyntecRetrieval",
+    "XPQARetrieval (fr)",
+]
 TASK_LIST_RETRIEVAL_PL = [
     "ArguAna-PL",
     "DBPedia-PL",
     "STSBenchmark",
 ]
+TASK_LIST_STS_FR = [
+    "STS22 (fr)",
+    "STSBenchmarkMultilingualSTS (fr)",
+    "SICKFr",
+]
 TASK_LIST_STS_PL = [
     "CDSC-R",
     "SICK-R-PL",
 ]
 TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
 TASK_LIST_SUMMARIZATION = ["SummEval",]
+TASK_LIST_SUMMARIZATION_FR = ["SummEvalFr"]
 TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
+TASK_LIST_FR = TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR
 TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
 TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
 # Models without metadata, thus we cannot fetch their results naturally
 EXTERNAL_MODELS = [
+    "Baichuan-text-embedding",
+    "Cohere-embed-multilingual-v3.0",
+    "Cohere-embed-multilingual-light-v3.0",
+    "DanskBERT",
+    "LASER2",
+    "LaBSE",
+    "OpenSearch-text-hybrid",
     "all-MiniLM-L12-v2",
     "all-MiniLM-L6-v2",
     "all-mpnet-base-v2",
     "allenai-specter",
+    "bert-base-10lang-cased",
+    "bert-base-15lang-cased",
+    "bert-base-25lang-cased",
+    "bert-base-multilingual-cased",
+    "bert-base-multilingual-uncased",
     "bert-base-swedish-cased",
     "bert-base-uncased",
     "bge-base-zh-v1.5",
     "dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1",
     "distiluse-base-multilingual-cased-v2",
     "e5-base",
     "e5-large",
+    "e5-mistral-7b-instruct",
+    "e5-small",
     "electra-small-nordic",
     "electra-small-swedish-cased-discriminator",
+    "flaubert_base_cased",
+    "flaubert_base_uncased",
+    "flaubert_large_cased",
     "gbert-base",
     "gbert-large",
     "gelectra-base",
     "gelectra-large",
     "glove.6B.300d",
+    "gottbert-base",
     "gtr-t5-base",
     "gtr-t5-large",
     "gtr-t5-xl",
     "herbert-base-retrieval-v2",
     "komninos",
     "luotuo-bert-medium",
     "m3e-base",
+    "m3e-large",
+    "mistral-embed",
     "msmarco-bert-co-condensor",
+    "multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base",
     "multilingual-e5-large",
     "multilingual-e5-small",
     "paraphrase-multilingual-MiniLM-L12-v2",
     "paraphrase-multilingual-mpnet-base-v2",
     "sentence-bert-swedish-cased",
+    "sentence-camembert-base",
+    "sentence-camembert-large",
+    "sentence-croissant-llm-base",
     "sentence-t5-base",
     "sentence-t5-large",
     "sentence-t5-xl",
     "sentence-t5-xxl",
+    "silver-retriever-base-v1",
     "sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta",
+    "st-polish-paraphrase-from-mpnet",
     "text2vec-base-chinese",
+    "text2vec-base-multilingual",
     "text2vec-large-chinese",
     "text-embedding-3-small",
     "text-embedding-3-large",
     "text-search-curie-001",
     "text-search-davinci-001",
     "titan-embed-text-v1",
+    "udever-bloom-1b1",
+    "udever-bloom-560m",
+    "universal-sentence-encoder-multilingual-3",
+    "universal-sentence-encoder-multilingual-large-3",
     "unsup-simcse-bert-base-uncased",
     "use-cmlm-multilingual",
+    "voyage-2",
+    "voyage-code-2",
     "voyage-lite-01-instruct",
+    "voyage-lite-02-instruct",
     "xlm-roberta-base",
+    "xlm-roberta-large",
 ]
 EXTERNAL_MODEL_TO_LINK = {
+    "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
+    "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
     "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
     "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
     "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
     "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
+    "bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
+    "bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
+    "bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
+    "bert-base-multilingual-cased": "https://huggingface.co/google-bert/bert-base-multilingual-cased",
+    "bert-base-multilingual-uncased": "https://huggingface.co/google-bert/bert-base-multilingual-uncased",
     "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
     "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
     "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
     "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
     "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
     "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
+    "camembert-base": "https://huggingface.co/almanach/camembert-base",
+    "camembert-large": "https://huggingface.co/almanach/camembert-large",
     "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
     "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
     "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
+    "distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
+    "distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
+    "distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
+    "distilbert-base-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-fr-cased",
+    "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased",
     "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2",
     "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
     "e5-base": "https://huggingface.co/intfloat/e5-base",
     "e5-large": "https://huggingface.co/intfloat/e5-large",
+    "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
     "e5-small": "https://huggingface.co/intfloat/e5-small",
     "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
     "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
+    "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
+    "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
+    "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
     "gbert-base": "https://huggingface.co/deepset/gbert-base",
     "gbert-large": "https://huggingface.co/deepset/gbert-large",
     "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
     "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
     "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
     "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
+    "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
     "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
+    "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
     "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
     "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large",
     "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
     "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
     "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
     "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
+    "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
     "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
     "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+    "sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
+    "sentence-camembert-large": "https://huggingface.co/dangvantuan/sentence-camembert-large",
+    "sentence-croissant-llm-base": "https://huggingface.co/Wissam42/sentence-croissant-llm-base",
     "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased",
     "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base",
     "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large",
     "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
     "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
+    "silver-retriever-base-v1": "https://huggingface.co/ipipan/silver-retriever-base-v1",
     "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
     "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
     "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
     "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
     "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
     "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
+    "udever-bloom-1b1": "https://huggingface.co/izhx/udever-bloom-1b1",
+    "udever-bloom-560m": "https://huggingface.co/izhx/udever-bloom-560m",
+    "universal-sentence-encoder-multilingual-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3",
+    "universal-sentence-encoder-multilingual-large-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3",
     "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
     "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
+    "voyage-2": "https://docs.voyageai.com/embeddings/",
+    "voyage-code-2": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
     "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
 }
 EXTERNAL_MODEL_TO_DIM = {
+    "Cohere-embed-multilingual-v3.0": 1024,
+    "Cohere-embed-multilingual-light-v3.0": 384,
     "all-MiniLM-L12-v2": 384,
     "all-MiniLM-L6-v2": 384,
     "all-mpnet-base-v2": 768,
     "allenai-specter": 768,
     "Baichuan-text-embedding": 1024,
+    "bert-base-10lang-cased": 768,
+    "bert-base-15lang-cased": 768,
+    "bert-base-25lang-cased": 768,
+    "bert-base-multilingual-cased": 768,
+    "bert-base-multilingual-uncased": 768,
     "bert-base-swedish-cased": 768,
     "bert-base-uncased": 768,
     "bge-base-zh-v1.5": 768,
     "bge-large-zh-v1.5": 1024,
     "bge-large-zh-noinstruct": 1024,
     "bge-small-zh-v1.5": 512,
+    "camembert-base": 512,
+    "camembert-large": 768,
     "contriever-base-msmarco": 768,
     "cross-en-de-roberta-sentence-transformer": 768,
     "DanskBERT": 768,
+    "distilbert-base-25lang-cased": 768,
+    "distilbert-base-en-fr-cased": 768,
+    "distilbert-base-en-fr-es-pt-it-cased": 768,
+    "distilbert-base-fr-cased": 768,
+    "distilbert-base-uncased": 768,
     "distiluse-base-multilingual-cased-v2": 512,
     "dfm-encoder-large-v1": 1024,
     "dfm-sentence-encoder-large-1": 1024,
     "e5-base": 768,
+    "e5-large": 1024,
+    "e5-mistral-7b-instruct": 4096,
     "e5-small": 384,
     "electra-small-nordic": 256,
     "electra-small-swedish-cased-discriminator": 256,
+    "flaubert_base_cased": 768,
+    "flaubert_base_uncased": 768,
+    "flaubert_large_cased": 1024,
     "luotuo-bert-medium": 768,
     "LASER2": 1024,
     "LaBSE": 768,
     "komninos": 300,
     "m3e-base": 768,
     "m3e-large": 768,
+    "mistral-embed": 1024,
     "msmarco-bert-co-condensor": 768,
+    "multi-qa-MiniLM-L6-cos-v1": 384,
     "multilingual-e5-base": 768,
     "multilingual-e5-small": 384,
     "multilingual-e5-large": 1024,
     "nomic-embed-text-v1.5-512": 512,
     "norbert3-base": 768,
     "norbert3-large": 1024,
+    "OpenSearch-text-hybrid": 1792,
     "paraphrase-multilingual-MiniLM-L12-v2": 384,
     "paraphrase-multilingual-mpnet-base-v2": 768,
+    "sentence-camembert-base": 768,
+    "sentence-camembert-large": 1024,
+    "sentence-croissant-llm-base": 2048,
     "sentence-bert-swedish-cased": 768,
     "sentence-t5-base": 768,
     "sentence-t5-large": 768,
     "sentence-t5-xl": 768,
     "sentence-t5-xxl": 768,
+    "silver-retriever-base-v1": 768,
     "sup-simcse-bert-base-uncased": 768,
     "st-polish-paraphrase-from-distilroberta": 768,
     "st-polish-paraphrase-from-mpnet": 768,
     "text-search-curie-001": 4096,
     "text-search-davinci-001": 12288,
     "titan-embed-text-v1": 1536,
+    "udever-bloom-1b1": 1536,
+    "udever-bloom-560m": 1024,
+    "universal-sentence-encoder-multilingual-3": 512,
+    "universal-sentence-encoder-multilingual-large-3": 512,
     "unsup-simcse-bert-base-uncased": 768,
     "use-cmlm-multilingual": 768,
+    "voyage-2": 1024,
+    "voyage-code-2": 1536,
     "voyage-lite-01-instruct": 1024,
     "voyage-lite-02-instruct": 1024,
     "xlm-roberta-base":  768,
 }
 EXTERNAL_MODEL_TO_SEQLEN = {
+    "Cohere-embed-multilingual-v3.0": 512,
+    "Cohere-embed-multilingual-light-v3.0": 512,
     "all-MiniLM-L12-v2": 512,
     "all-MiniLM-L6-v2": 512,
     "all-mpnet-base-v2": 514,
     "allenai-specter": 512,
     "Baichuan-text-embedding": 512,
+    "bert-base-10lang-cased": 512,
+    "bert-base-15lang-cased": 512,
+    "bert-base-25lang-cased": 512,
+    "bert-base-multilingual-cased": 512,
+    "bert-base-multilingual-uncased": 512,
     "bert-base-swedish-cased": 512,
     "bert-base-uncased": 512,
     "bge-base-zh-v1.5": 512,
     "bge-large-zh-v1.5": 512,
     "bge-large-zh-noinstruct": 512,
+    "bge-small-zh-v1.5": 512,
+    "camembert-base": 512,
+    "camembert-large": 512,
     "contriever-base-msmarco": 512,
     "cross-en-de-roberta-sentence-transformer": 514,
+    "distilbert-base-25lang-cased": 512,
+    "distilbert-base-en-fr-cased": 512,
+    "distilbert-base-en-fr-es-pt-it-cased": 512,
+    "distilbert-base-fr-cased": 512,
+    "distilbert-base-uncased": 512,
     "DanskBERT": 514,
     "dfm-encoder-large-v1": 512,
     "dfm-sentence-encoder-large-1": 512,
     "distiluse-base-multilingual-cased-v2": 512,
     "e5-base": 512,
     "e5-large": 512,
+    "e5-mistral-7b-instruct": 32768,
     "e5-small": 512,
     "electra-small-nordic": 512,
     "electra-small-swedish-cased-discriminator": 512,
+    "flaubert_base_cased": 512,
+    "flaubert_base_uncased": 512,
+    "flaubert_large_cased": 512,
     "gbert-base": 512,
     "gbert-large": 512,
     "gelectra-base": 512,
     "LASER2": "N/A",
     "LaBSE": 512,
     "m3e-base": 512,
+    "m3e-large": 512,
+#    "mistral-embed": "?",
     "msmarco-bert-co-condensor": 512,
+    "multi-qa-MiniLM-L6-cos-v1": 512,
     "multilingual-e5-base": 514,
     "multilingual-e5-large": 514,
     "multilingual-e5-small": 512,
     "nomic-embed-text-v1.5-512": 8192,
     "norbert3-base": 512,
     "norbert3-large": 512,
+    "OpenSearch-text-hybrid": 512,
     "paraphrase-multilingual-MiniLM-L12-v2": 512,
     "paraphrase-multilingual-mpnet-base-v2": 514,
+    "sentence-camembert-base": 512,
+    "sentence-camembert-large": 512,
+    "sentence-croissant-llm-base": 2048,
     "sentence-bert-swedish-cased": 512,
     "sentence-t5-base": 512,
     "sentence-t5-large": 512,
     "sentence-t5-xl": 512,
     "sentence-t5-xxl": 512,
+    "silver-retriever-base-v1": 514,
     "sup-simcse-bert-base-uncased": 512,
     "st-polish-paraphrase-from-distilroberta": 514,
     "st-polish-paraphrase-from-mpnet": 514,
     "text-search-curie-001": 2046,
     "text-search-davinci-001": 2046,
     "titan-embed-text-v1": 8000,
+    "udever-bloom-1b1": 2048,
+    "udever-bloom-560m": 2048,
+    "universal-sentence-encoder-multilingual-3": 512,
+    "universal-sentence-encoder-multilingual-large-3": 512,
     "use-cmlm-multilingual": 512,
     "unsup-simcse-bert-base-uncased": 512,
+    "voyage-2": 1024,
+    "voyage-code-2": 16000,
     "voyage-lite-01-instruct": 4000,
     "voyage-lite-02-instruct": 4000,
     "xlm-roberta-base": 514,
     "all-MiniLM-L12-v2": 0.13,
     "all-MiniLM-L6-v2": 0.09,
     "all-mpnet-base-v2": 0.44,
+    "bert-base-10lang-cased": 0.61,
+    "bert-base-15lang-cased": 0.61,
+    "bert-base-25lang-cased": 0.61,
+    "bert-base-multilingual-cased": 0.71,
+    "bert-base-multilingual-uncased": 0.67,
     "bert-base-uncased": 0.44,
     "bert-base-swedish-cased": 0.50,
     "bge-base-zh-v1.5": 0.41,
     "bge-large-zh-v1.5": 1.30,
     "bge-large-zh-noinstruct": 1.30,
+    "bge-small-zh-v1.5": 0.10,
+    "camembert-base": 0.45,
+    "camembert-large": 1.35,
     "cross-en-de-roberta-sentence-transformer": 1.11,
     "contriever-base-msmarco": 0.44,
+    "distilbert-base-25lang-cased": 0.44,
+    "distilbert-base-en-fr-cased": 0.44,
+    "distilbert-base-en-fr-es-pt-it-cased": 0.44,
+    "distilbert-base-fr-cased": 0.44,
+    "distilbert-base-uncased": 0.44,
     "DanskBERT": 0.50,
     "distiluse-base-multilingual-cased-v2": 0.54,
     "dfm-encoder-large-v1": 1.42,
     "dfm-sentence-encoder-large-1": 1.63,
     "e5-base": 0.44,
     "e5-large": 1.34,
+    "e5-mistral-7b-instruct": 14.22,
+    "e5-small": 0.13,
     "electra-small-nordic": 0.09,
     "electra-small-swedish-cased-discriminator": 0.06,
+    "flaubert_base_cased": 0.55,
+    "flaubert_base_uncased": 0.55,
+    "flaubert_large_cased": 1.49,
     "gbert-base": 0.44,
     "gbert-large": 1.35,
     "gelectra-base": 0.44,
     "m3e-base": 0.41,
     "m3e-large": 0.41,
     "msmarco-bert-co-condensor": 0.44,
+    "multi-qa-MiniLM-L6-cos-v1": 0.09,
     "multilingual-e5-base": 1.11,
     "multilingual-e5-small": 0.47,
     "multilingual-e5-large": 2.24,
     "norbert3-large": 1.47,
     "paraphrase-multilingual-mpnet-base-v2": 1.11,
     "paraphrase-multilingual-MiniLM-L12-v2": 0.47,
+    "sentence-camembert-base": 0.44,
+    "sentence-camembert-large": 1.35,
+    "sentence-croissant-llm-base": 5.12,
     "sentence-bert-swedish-cased": 0.50,
     "sentence-t5-base": 0.22,
     "sentence-t5-large": 0.67,
     "sentence-t5-xl": 2.48,
     "sentence-t5-xxl": 9.73,
+    "silver-retriever-base-v1": 0.50,
     "sup-simcse-bert-base-uncased": 0.44,
     "st-polish-paraphrase-from-distilroberta": 0.50,
     "st-polish-paraphrase-from-mpnet": 0.50,
     "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy
     "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy
     "TitanML/jina-v2-base-en-embed", # Copy
+    "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant
 }
 def add_lang(examples):
     if not(examples["eval_language"]):
         examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"]
         examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})'
     return examples
+def norm(names): return set([name.split(" ")[0] for name in names])
 def add_task(examples):
     # Could be added to the dataset loading script instead
+    if examples["mteb_dataset_name"] in norm(TASK_LIST_CLASSIFICATION + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH):
         examples["mteb_task"] = "Classification"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_FR + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH):
         examples["mteb_task"] = "Clustering"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH):
         examples["mteb_task"] = "PairClassification"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
         examples["mteb_task"] = "Reranking"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH):
         examples["mteb_task"] = "Retrieval"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
         examples["mteb_task"] = "STS"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_SUMMARIZATION + TASK_LIST_SUMMARIZATION_FR):
         examples["mteb_task"] = "Summarization"
+    elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_OTHER):
         examples["mteb_task"] = "BitextMining"
     else:
         print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
         examples["mteb_task"] = "Unknown"
     return examples
+if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
+    with open("EXTERNAL_MODEL_RESULTS.json") as f:
+        EXTERNAL_MODEL_RESULTS = json.load(f)
+    # Update with models not contained
+    models_to_run = []
+    for model in EXTERNAL_MODELS:
+        if model not in EXTERNAL_MODEL_RESULTS:
+            models_to_run.append(model)
+            EXTERNAL_MODEL_RESULTS[model] = {k: {v: []} for k, v in TASK_TO_METRIC.items()}
+else:
+    EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
+    models_to_run = EXTERNAL_MODELS
+pbar = tqdm(models_to_run, desc="Fetching external model results")
+for model in pbar:
+    pbar.set_description(f"Fetching external model results for {model!r}")
+    ds = load_dataset("mteb/results", model, trust_remote_code=True)
+    # For local debugging:
+    #, download_mode='force_redownload', verification_mode="no_checks")
+    ds = ds.map(add_lang)
+    ds = ds.map(add_task)
+    base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
+    # For now only one metric per task - Could add more metrics lateron
+    for task, metric in TASK_TO_METRIC.items():
+        ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
+        ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
+        EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
+# Save & cache EXTERNAL_MODEL_RESULTS
+with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
+    json.dump(EXTERNAL_MODEL_RESULTS, f)
 def get_dim_seq_size(model):
     filenames = [sib.rfilename for sib in model.siblings]
     return DATA_OVERALL_ZH
+def get_mteb_average_fr():
+    global DATA_OVERALL_FR, DATA_CLASSIFICATION_FR, DATA_CLUSTERING_FR, DATA_PAIR_CLASSIFICATION_FR, DATA_RERANKING_FR, DATA_RETRIEVAL_FR, DATA_STS_FR, DATA_SUMMARIZATION_FR
+    DATA_OVERALL_FR = get_mteb_data(
+        tasks=[
+            "Classification",
+            "Clustering",
+            "PairClassification",
+            "Reranking",
+            "Retrieval",
+            "STS",
+            "Summarization"
+        ],
+        datasets=TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR,
+        fillna=False,
+        add_emb_dim=True,
+        rank=False,
+    )
+    # Debugging:
+    # DATA_OVERALL_FR.to_csv("overall.csv")
+    DATA_OVERALL_FR.insert(1, f"Average ({len(TASK_LIST_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLASSIFICATION_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLUSTERING_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_PAIR_CLASSIFICATION_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RERANKING_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RETRIEVAL_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(7, f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_STS_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)", DATA_OVERALL_FR[TASK_LIST_SUMMARIZATION_FR].mean(axis=1, skipna=False))
+    DATA_OVERALL_FR.sort_values(f"Average ({len(TASK_LIST_FR)} datasets)", ascending=False, inplace=True)
+    # Start ranking from 1
+    DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
+    DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
+    DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLASSIFICATION_FR])
+    DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLUSTERING_FR])
+    DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
+    DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RERANKING_FR])
+    DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RETRIEVAL_FR])
+    DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_STS_FR])
+    DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 2:].ne("").any(axis=1)]
+    DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_SUMMARIZATION_FR])
+    DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
+    # Fill NaN after averaging
+    DATA_OVERALL_FR.fillna("", inplace=True)
+    DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]]
+    DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
+    return DATA_OVERALL_FR
 def get_mteb_average_pl():
     global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
     DATA_OVERALL_PL = get_mteb_data(
     return DATA_OVERALL_PL
 get_mteb_average()
+get_mteb_average_fr()
 get_mteb_average_pl()
 get_mteb_average_zh()
 DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
     DATA_BITEXT_MINING_OTHER,
     DATA_CLASSIFICATION_EN,
     DATA_CLASSIFICATION_DA,
+    DATA_CLASSIFICATION_FR,
     DATA_CLASSIFICATION_NB,
     DATA_CLASSIFICATION_PL,
     DATA_CLASSIFICATION_SV,
     DATA_CLASSIFICATION_OTHER,
     DATA_CLUSTERING,
     DATA_CLUSTERING_DE,
+    DATA_CLUSTERING_FR,
     DATA_CLUSTERING_PL,
     DATA_CLUSTERING_ZH,
     DATA_PAIR_CLASSIFICATION,
+    DATA_PAIR_CLASSIFICATION_FR,
     DATA_PAIR_CLASSIFICATION_PL,
     DATA_PAIR_CLASSIFICATION_ZH,
     DATA_RERANKING,
+    DATA_RERANKING_FR,
     DATA_RERANKING_ZH,
     DATA_RETRIEVAL,
+    DATA_RETRIEVAL_FR,
     DATA_RETRIEVAL_PL,
     DATA_RETRIEVAL_ZH,
     DATA_STS_EN,
+    DATA_STS_FR,
     DATA_STS_PL,
     DATA_STS_ZH,
     DATA_STS_OTHER,
     DATA_SUMMARIZATION,
+    DATA_SUMMARIZATION_FR,
 ]:
     # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
     cols_to_ignore = 3 if "Average" in d.columns else 2
                     )
                 with gr.Row():
                     data_run_overall_zh = gr.Button("Refresh")
+                    data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Overall MTEB French leaderboard (F-MTEB)** 🔮🇫🇷
+                    - **Metric:** Various, refer to task tabs
+                    - **Languages:** French
+                    - **Credits:** [Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [wissam-sib](https://github.com/wissam-sib), [Sunalwing](https://github.com/Sunalwing)
+                    """)
+                with gr.Row():
+                    data_overall_fr = gr.components.Dataframe(
+                        DATA_OVERALL_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_FR.columns),
+                        type="pandas",
+                        height=600,
+                    )
+                with gr.Row():
+                    data_overall_fr = gr.Button("Refresh")
+                    data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
                         partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
                         outputs=data_run_classification_da,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Classification French Leaderboard** 💙🇫🇷
+                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_classification_fr = gr.components.Dataframe(
+                        DATA_CLASSIFICATION_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_FR.columns),
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_classification_fr = gr.Button("Refresh")
+                    data_run_classification_fr.click(
+                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_FR),
+                        outputs=data_run_classification_fr,
+                    )
             with gr.TabItem("Norwegian"):
                 with gr.Row():
                     gr.Markdown("""
                         partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
                         outputs=data_clustering_zh,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Clustering French Leaderboard** ✨🇫🇷
+                    - **Metric:** Validity Measure (v_measure)
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_clustering_fr = gr.components.Dataframe(
+                        DATA_CLUSTERING_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_FR.columns),
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_clustering_fr = gr.Button("Refresh")
+                    data_run_clustering_fr.click(
+                        partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_FR),
+                        outputs=data_clustering_fr,
+                    )
             with gr.TabItem("German"):
                 with gr.Row():
                     gr.Markdown("""
                         partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
                         outputs=data_pair_classification_zh,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Pair Classification French Leaderboard** 🎭🇫🇷
+                    - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_pair_classification_fr = gr.components.Dataframe(
+                        DATA_PAIR_CLASSIFICATION_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION_FR.columns),
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_pair_classification_fr = gr.Button("Refresh")
+                    data_run_pair_classification_fr.click(
+                        partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_FR),
+                        outputs=data_pair_classification_fr,
+                    )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
                         partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
                         outputs=data_reranking_zh,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Reranking French Leaderboard** 🥈🇫🇷
+                    - **Metric:** Mean Average Precision (MAP)
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_reranking_fr = gr.components.Dataframe(
+                        DATA_RERANKING_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_RERANKING_FR.columns),
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_reranking_fr = gr.Button("Refresh")
+                    data_run_reranking_fr.click(
+                        partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_FR),
+                        outputs=data_reranking_fr,
+                    )
         with gr.TabItem("Retrieval"):
             with gr.TabItem("English"):
                 with gr.Row():
                     - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
                     """)
                 with gr.Row():
+                    data_retrieval_fr = gr.components.Dataframe(
+                        DATA_RETRIEVAL_FR,
                         # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_FR.columns) * 2,
                         type="pandas",
                     )
                 with gr.Row():
+                    data_run_retrieval_fr = gr.Button("Refresh")
+                    data_run_retrieval_fr.click(
+                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
+                        outputs=data_retrieval_fr,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **Retrieval French Leaderboard** 🔎🇫🇷
+                    - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_retrieval_fr = gr.components.Dataframe(
+                        DATA_RETRIEVAL_FR,
+                        # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_FR.columns) * 2,
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_retrieval_fr = gr.Button("Refresh")
+                    data_run_retrieval_fr.click(
+                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
+                        outputs=data_retrieval_fr,
+                    )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""
                         partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
                         outputs=data_sts_zh,
                     )
+            with gr.TabItem("French"):
+                with gr.Row():
+                    gr.Markdown("""
+                    **STS French Leaderboard** 🤖🇫🇷
+                    - **Metric:** Spearman correlation based on cosine similarity
+                    - **Languages:** French
+                    - **Credits:**
+                    """)
+                with gr.Row():
+                    data_sts_fr = gr.components.Dataframe(
+                        DATA_STS_FR,
+                        datatype=["number", "markdown"] + ["number"] * len(DATA_STS_FR.columns),
+                        type="pandas",
+                    )
+                with gr.Row():
+                    data_run_sts_fr = gr.Button("Refresh")
+                    data_run_sts_fr.click(
+                        partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_FR),
+                        outputs=data_sts_fr,
+                    )
             with gr.TabItem("Polish"):
                 with gr.Row():
                     gr.Markdown("""