Spaces:
Running
Running
orionweller
commited on
Commit
•
f1fa713
1
Parent(s):
83fe3f0
add_followir_tab (#102)
Browse files- add instruction following (cf7ddc6fa21dbe43cd4c5f806946100e72ba8b72)
- update (0d0563c24895d35047cbd4018d9f86afb7e8a239)
- merge in main (aeb9d6091824165ded58f9a5f3b230a4434986b5)
- minor cleanup (b5c28bdf082216a290351c60305b599a952c7a73)
- add bi-encoder button (77cc9e7a65257c5af5784bb60a3dac2073e7fe05)
- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +39 -7
- config.yaml +25 -0
- model_meta.yaml +134 -0
EXTERNAL_MODEL_RESULTS.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -17,6 +17,11 @@ TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
|
|
17 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
18 |
|
19 |
TASKS = list(TASKS_CONFIG.keys())
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
|
22 |
|
@@ -34,18 +39,30 @@ EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["model_meta"].items()
|
|
34 |
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
|
35 |
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
|
36 |
PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
|
|
|
|
|
37 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
|
38 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
|
|
|
|
39 |
|
40 |
PROPRIETARY_MODELS = {
|
41 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
42 |
for model in PROPRIETARY_MODELS
|
43 |
}
|
44 |
-
|
45 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
46 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
47 |
for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
|
48 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
51 |
for board_config in BOARDS_CONFIG.values():
|
@@ -164,7 +181,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
164 |
# Initialize list to models that we cannot fetch metadata from
|
165 |
df_list = []
|
166 |
for model in EXTERNAL_MODEL_RESULTS:
|
167 |
-
results_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
if len(datasets) > 0:
|
169 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
170 |
elif langs:
|
@@ -383,7 +406,10 @@ for task in TASKS:
|
|
383 |
data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
|
384 |
|
385 |
for board, board_config in BOARDS_CONFIG.items():
|
386 |
-
|
|
|
|
|
|
|
387 |
acronym = board_config.get("acronym", None)
|
388 |
board_icon = board_config.get("icon", None)
|
389 |
if board_icon is None:
|
@@ -439,7 +465,7 @@ function(goalUrlObject) {
|
|
439 |
def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
440 |
current_task_language["task"] = event.target.id
|
441 |
# Either use the cached language for this task or the 1st language
|
442 |
-
current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[
|
443 |
return current_task_language, language_per_task
|
444 |
|
445 |
def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
@@ -461,6 +487,8 @@ MODEL_TYPES = [
|
|
461 |
"Open",
|
462 |
"Proprietary",
|
463 |
"Sentence Transformers",
|
|
|
|
|
464 |
]
|
465 |
|
466 |
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
@@ -484,6 +512,10 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
484 |
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
485 |
elif model_type == "Sentence Transformers":
|
486 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
|
|
|
|
|
|
|
|
487 |
if masks:
|
488 |
df = df[reduce(lambda a, b: a | b, masks)]
|
489 |
else:
|
@@ -535,16 +567,16 @@ with gr.Blocks(css=css) as block:
|
|
535 |
with gr.Tabs() as outer_tabs:
|
536 |
# Store the tabs for updating them on load based on URL parameters
|
537 |
tabs.append(outer_tabs)
|
538 |
-
|
539 |
for task, task_values in data.items():
|
540 |
metric = task_values["metric"]
|
541 |
task_tab_id = task.lower().replace(" ", "-")
|
542 |
|
543 |
# Overall, Bitext Mining, Classification, etc.
|
544 |
-
|
|
|
545 |
# For updating the 'task' in the URL
|
546 |
task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
|
547 |
-
|
548 |
with gr.Tabs() as task_tabs:
|
549 |
# Store the task tabs for updating them on load based on URL parameters
|
550 |
tabs.append(task_tabs)
|
|
|
17 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
18 |
|
19 |
TASKS = list(TASKS_CONFIG.keys())
|
20 |
+
PRETTY_NAMES = {
|
21 |
+
"InstructionRetrieval": "Retrieval w/Instructions",
|
22 |
+
"PairClassification": "Pair Classification",
|
23 |
+
"BitextMining": "Bitext Mining",
|
24 |
+
}
|
25 |
|
26 |
TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
|
27 |
|
|
|
39 |
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
|
40 |
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
|
41 |
PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
|
42 |
+
TASK_DESCRIPTIONS = {k: v["task_description"] for k,v in TASKS_CONFIG.items()}
|
43 |
+
TASK_DESCRIPTIONS["Overall"] = "Overall performance across MTEB tasks."
|
44 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
|
45 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
46 |
+
CROSS_ENCODERS = MODEL_META["cross_encoders"]
|
47 |
+
BI_ENCODERS = [k for k, _ in MODEL_META["model_meta"].items() if k not in CROSS_ENCODERS + ["bm25"]]
|
48 |
|
49 |
PROPRIETARY_MODELS = {
|
50 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
51 |
for model in PROPRIETARY_MODELS
|
52 |
}
|
|
|
53 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
54 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
55 |
for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
|
56 |
}
|
57 |
+
CROSS_ENCODERS = {
|
58 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
59 |
+
for model in CROSS_ENCODERS
|
60 |
+
}
|
61 |
+
BI_ENCODERS = {
|
62 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
63 |
+
for model in BI_ENCODERS
|
64 |
+
}
|
65 |
+
|
66 |
|
67 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
68 |
for board_config in BOARDS_CONFIG.values():
|
|
|
181 |
# Initialize list to models that we cannot fetch metadata from
|
182 |
df_list = []
|
183 |
for model in EXTERNAL_MODEL_RESULTS:
|
184 |
+
results_list = []
|
185 |
+
for task in tasks:
|
186 |
+
# Not all models have InstructionRetrieval, other new tasks
|
187 |
+
if task not in EXTERNAL_MODEL_RESULTS[model]:
|
188 |
+
continue
|
189 |
+
results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
|
190 |
+
|
191 |
if len(datasets) > 0:
|
192 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
193 |
elif langs:
|
|
|
406 |
data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
|
407 |
|
408 |
for board, board_config in BOARDS_CONFIG.items():
|
409 |
+
init_name = board_config["title"]
|
410 |
+
if init_name in PRETTY_NAMES:
|
411 |
+
init_name = PRETTY_NAMES[init_name]
|
412 |
+
board_pretty_name = f"{init_name} leaderboard"
|
413 |
acronym = board_config.get("acronym", None)
|
414 |
board_icon = board_config.get("icon", None)
|
415 |
if board_icon is None:
|
|
|
465 |
def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
466 |
current_task_language["task"] = event.target.id
|
467 |
# Either use the cached language for this task or the 1st language
|
468 |
+
current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
|
469 |
return current_task_language, language_per_task
|
470 |
|
471 |
def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
|
|
487 |
"Open",
|
488 |
"Proprietary",
|
489 |
"Sentence Transformers",
|
490 |
+
"Cross-Encoders",
|
491 |
+
"Bi-Encoders"
|
492 |
]
|
493 |
|
494 |
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
|
512 |
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
513 |
elif model_type == "Sentence Transformers":
|
514 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
515 |
+
elif model_type == "Cross-Encoders":
|
516 |
+
masks.append(df["Model"].isin(CROSS_ENCODERS))
|
517 |
+
elif model_type == "Bi-Encoders":
|
518 |
+
masks.append(df["Model"].isin(BI_ENCODERS))
|
519 |
if masks:
|
520 |
df = df[reduce(lambda a, b: a | b, masks)]
|
521 |
else:
|
|
|
567 |
with gr.Tabs() as outer_tabs:
|
568 |
# Store the tabs for updating them on load based on URL parameters
|
569 |
tabs.append(outer_tabs)
|
|
|
570 |
for task, task_values in data.items():
|
571 |
metric = task_values["metric"]
|
572 |
task_tab_id = task.lower().replace(" ", "-")
|
573 |
|
574 |
# Overall, Bitext Mining, Classification, etc.
|
575 |
+
pretty_task_name = task if task not in PRETTY_NAMES.keys() else PRETTY_NAMES[task]
|
576 |
+
with gr.Tab(pretty_task_name, id=task_tab_id) as task_tab:
|
577 |
# For updating the 'task' in the URL
|
578 |
task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
|
579 |
+
gr.Markdown(TASK_DESCRIPTIONS[task])
|
580 |
with gr.Tabs() as task_tabs:
|
581 |
# Store the task tabs for updating them on load based on URL parameters
|
582 |
tabs.append(task_tabs)
|
config.yaml
CHANGED
@@ -7,34 +7,47 @@ tasks:
|
|
7 |
icon: "🎌"
|
8 |
metric: f1
|
9 |
metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
|
|
|
10 |
Classification:
|
11 |
icon: "❤️"
|
12 |
metric: accuracy
|
13 |
metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
|
|
|
14 |
Clustering:
|
15 |
icon: "✨"
|
16 |
metric: v_measure
|
17 |
metric_description: "Validity Measure (v_measure)"
|
|
|
18 |
PairClassification:
|
19 |
icon: "🎭"
|
20 |
metric: cos_sim_ap
|
21 |
metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
|
|
|
22 |
Reranking:
|
23 |
icon: "🥈"
|
24 |
metric: map
|
25 |
metric_description: "Mean Average Precision (MAP)"
|
|
|
26 |
Retrieval:
|
27 |
icon: "🔎"
|
28 |
metric: ndcg_at_10
|
29 |
metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
|
|
|
30 |
STS:
|
31 |
icon: "🤖"
|
32 |
metric: cos_sim_spearman
|
33 |
metric_description: "Spearman correlation based on cosine similarity"
|
|
|
34 |
Summarization:
|
35 |
icon: "📜"
|
36 |
metric: cos_sim_spearman
|
37 |
metric_description: "Spearman correlation based on cosine similarity"
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
boards:
|
39 |
en:
|
40 |
title: English
|
@@ -250,6 +263,18 @@ boards:
|
|
250 |
- MassiveIntentClassification (nb)
|
251 |
- MassiveScenarioClassification (nb)
|
252 |
- ScalaNbClassification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
law:
|
254 |
title: Law
|
255 |
language_long: "English, German, Chinese"
|
|
|
7 |
icon: "🎌"
|
8 |
metric: f1
|
9 |
metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
|
10 |
+
task_description: "Bitext mining is the task of finding parallel sentences in two languages."
|
11 |
Classification:
|
12 |
icon: "❤️"
|
13 |
metric: accuracy
|
14 |
metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
|
15 |
+
task_description: "Classification is the task of assigning a label to a text."
|
16 |
Clustering:
|
17 |
icon: "✨"
|
18 |
metric: v_measure
|
19 |
metric_description: "Validity Measure (v_measure)"
|
20 |
+
task_description: "Clustering is the task of grouping similar documents together."
|
21 |
PairClassification:
|
22 |
icon: "🎭"
|
23 |
metric: cos_sim_ap
|
24 |
metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
|
25 |
+
task_description: "Pair classification is the task of determining whether two texts are similar."
|
26 |
Reranking:
|
27 |
icon: "🥈"
|
28 |
metric: map
|
29 |
metric_description: "Mean Average Precision (MAP)"
|
30 |
+
task_description: "Reranking is the task of reordering a list of documents to improve relevance."
|
31 |
Retrieval:
|
32 |
icon: "🔎"
|
33 |
metric: ndcg_at_10
|
34 |
metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
|
35 |
+
task_description: "Retrieval is the task of finding relevant documents for a query."
|
36 |
STS:
|
37 |
icon: "🤖"
|
38 |
metric: cos_sim_spearman
|
39 |
metric_description: "Spearman correlation based on cosine similarity"
|
40 |
+
task_description: "Semantic Textual Similarity is the task of determining how similar two texts are."
|
41 |
Summarization:
|
42 |
icon: "📜"
|
43 |
metric: cos_sim_spearman
|
44 |
metric_description: "Spearman correlation based on cosine similarity"
|
45 |
+
task_description: "Summarization is the task of generating a summary of a text."
|
46 |
+
InstructionRetrieval:
|
47 |
+
icon: "🔎📋"
|
48 |
+
metric: "p-MRR"
|
49 |
+
metric_description: "paired mean reciprocal rank"
|
50 |
+
task_description: "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions."
|
51 |
boards:
|
52 |
en:
|
53 |
title: English
|
|
|
263 |
- MassiveIntentClassification (nb)
|
264 |
- MassiveScenarioClassification (nb)
|
265 |
- ScalaNbClassification
|
266 |
+
instructions:
|
267 |
+
title: English
|
268 |
+
language_long: "English"
|
269 |
+
has_overall: false
|
270 |
+
acronym: null
|
271 |
+
icon: null
|
272 |
+
credits: "[Orion Weller, FollowIR](https://arxiv.org/abs/2403.15246)"
|
273 |
+
tasks:
|
274 |
+
InstructionRetrieval:
|
275 |
+
- Robust04InstructionRetrieval
|
276 |
+
- News21InstructionRetrieval
|
277 |
+
- Core17InstructionRetrieval
|
278 |
law:
|
279 |
title: Law
|
280 |
language_long: "English, German, Chinese"
|
model_meta.yaml
CHANGED
@@ -47,6 +47,20 @@ model_meta:
|
|
47 |
is_external: true
|
48 |
is_proprietary: false
|
49 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
LASER2:
|
51 |
link: https://github.com/facebookresearch/LASER
|
52 |
seq_len: N/A
|
@@ -263,6 +277,12 @@ model_meta:
|
|
263 |
is_external: true
|
264 |
is_proprietary: false
|
265 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
camembert-base:
|
267 |
link: https://huggingface.co/almanach/camembert-base
|
268 |
seq_len: 512
|
@@ -359,6 +379,14 @@ model_meta:
|
|
359 |
is_external: true
|
360 |
is_proprietary: false
|
361 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
e5-base:
|
363 |
link: https://huggingface.co/intfloat/e5-base
|
364 |
seq_len: 512
|
@@ -367,6 +395,14 @@ model_meta:
|
|
367 |
is_external: true
|
368 |
is_proprietary: false
|
369 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
e5-large:
|
371 |
link: https://huggingface.co/intfloat/e5-large
|
372 |
seq_len: 512
|
@@ -407,6 +443,22 @@ model_meta:
|
|
407 |
is_external: true
|
408 |
is_proprietary: false
|
409 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
flaubert_base_cased:
|
411 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
412 |
seq_len: 512
|
@@ -535,6 +587,22 @@ model_meta:
|
|
535 |
is_external: true
|
536 |
is_proprietary: false
|
537 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
komninos:
|
539 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
540 |
seq_len: N/A
|
@@ -543,6 +611,14 @@ model_meta:
|
|
543 |
is_external: true
|
544 |
is_proprietary: false
|
545 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
luotuo-bert-medium:
|
547 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
548 |
seq_len: 512
|
@@ -567,6 +643,14 @@ model_meta:
|
|
567 |
is_external: true
|
568 |
is_proprietary: false
|
569 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
mistral-embed:
|
571 |
link: https://docs.mistral.ai/guides/embeddings
|
572 |
seq_len: null
|
@@ -575,6 +659,30 @@ model_meta:
|
|
575 |
is_external: true
|
576 |
is_proprietary: true
|
577 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
msmarco-bert-co-condensor:
|
579 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
580 |
seq_len: 512
|
@@ -903,6 +1011,22 @@ model_meta:
|
|
903 |
is_external: true
|
904 |
is_proprietary: true
|
905 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
906 |
text2vec-base-chinese:
|
907 |
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
908 |
seq_len: 512
|
@@ -1184,3 +1308,13 @@ models_to_skip:
|
|
1184 |
- michaelfeil/ct2fast-gte-large
|
1185 |
- gizmo-ai/Cohere-embed-multilingual-v3.0
|
1186 |
- McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
is_external: true
|
48 |
is_proprietary: false
|
49 |
is_sentence_transformers_compatible: true
|
50 |
+
FollowIR-7B:
|
51 |
+
link: https://huggingface.co/jhu-clsp/FollowIR-7B
|
52 |
+
seq_len: 4096
|
53 |
+
size: 7240
|
54 |
+
is_external: true
|
55 |
+
is_propietary: false
|
56 |
+
is_sentence_transformer_compatible: false
|
57 |
+
GritLM-7B:
|
58 |
+
link: https://huggingface.co/GritLM/GritLM-7B
|
59 |
+
seq_len: 4096
|
60 |
+
size: 7240
|
61 |
+
is_external: true
|
62 |
+
is_propietary: false
|
63 |
+
is_sentence_transformer_compatible: false
|
64 |
LASER2:
|
65 |
link: https://github.com/facebookresearch/LASER
|
66 |
seq_len: N/A
|
|
|
277 |
is_external: true
|
278 |
is_proprietary: false
|
279 |
is_sentence_transformers_compatible: true
|
280 |
+
bm25:
|
281 |
+
link: https://en.wikipedia.org/wiki/Okapi_BM25
|
282 |
+
size: 0
|
283 |
+
is_external: true
|
284 |
+
is_proprietary: false
|
285 |
+
is_sentence_transformers_compatible: false
|
286 |
camembert-base:
|
287 |
link: https://huggingface.co/almanach/camembert-base
|
288 |
seq_len: 512
|
|
|
379 |
is_external: true
|
380 |
is_proprietary: false
|
381 |
is_sentence_transformers_compatible: true
|
382 |
+
e5-base-v2:
|
383 |
+
link: https://huggingface.co/intfloat/e5-base-v2
|
384 |
+
seq_len: 512
|
385 |
+
size: 110
|
386 |
+
dim: 768
|
387 |
+
is_external: true
|
388 |
+
is_proprietary: false
|
389 |
+
is_sentence_transformers_compatible: true
|
390 |
e5-base:
|
391 |
link: https://huggingface.co/intfloat/e5-base
|
392 |
seq_len: 512
|
|
|
395 |
is_external: true
|
396 |
is_proprietary: false
|
397 |
is_sentence_transformers_compatible: true
|
398 |
+
e5-large-v2:
|
399 |
+
link: https://huggingface.co/intfloat/e5-large-v2
|
400 |
+
seq_len: 512
|
401 |
+
size: 335
|
402 |
+
dim: 1024
|
403 |
+
is_external: true
|
404 |
+
is_proprietary: false
|
405 |
+
is_sentence_transformers_compatible: true
|
406 |
e5-large:
|
407 |
link: https://huggingface.co/intfloat/e5-large
|
408 |
seq_len: 512
|
|
|
443 |
is_external: true
|
444 |
is_proprietary: false
|
445 |
is_sentence_transformers_compatible: true
|
446 |
+
flan-t5-base:
|
447 |
+
link: https://huggingface.co/google/flan-t5-base
|
448 |
+
seq_len: 512
|
449 |
+
size: 220
|
450 |
+
dim: -1
|
451 |
+
is_external: true
|
452 |
+
is_proprietary: false
|
453 |
+
is_sentence_transformers_compatible: true
|
454 |
+
flan-t5-large:
|
455 |
+
link: https://huggingface.co/google/flan-t5-large
|
456 |
+
seq_len: 512
|
457 |
+
size: 770
|
458 |
+
dim: -1
|
459 |
+
is_external: true
|
460 |
+
is_proprietary: false
|
461 |
+
is_sentence_transformers_compatible: true
|
462 |
flaubert_base_cased:
|
463 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
464 |
seq_len: 512
|
|
|
587 |
is_external: true
|
588 |
is_proprietary: false
|
589 |
is_sentence_transformers_compatible: true
|
590 |
+
instructor-base:
|
591 |
+
link: https://huggingface.co/hkunlp/instructor-base
|
592 |
+
seq_len: N/A
|
593 |
+
size: 110
|
594 |
+
dim: 768
|
595 |
+
is_external: true
|
596 |
+
is_proprietary: false
|
597 |
+
is_sentence_transformers_compatible: true
|
598 |
+
instructor-xl:
|
599 |
+
link: https://huggingface.co/hkunlp/instructor-xl
|
600 |
+
seq_len: N/A
|
601 |
+
size: 1241
|
602 |
+
dim: 768
|
603 |
+
is_external: true
|
604 |
+
is_proprietary: false
|
605 |
+
is_sentence_transformers_compatible: true
|
606 |
komninos:
|
607 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
608 |
seq_len: N/A
|
|
|
611 |
is_external: true
|
612 |
is_proprietary: false
|
613 |
is_sentence_transformers_compatible: true
|
614 |
+
llama-2-7b-chat:
|
615 |
+
link: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
|
616 |
+
seq_len: 4096
|
617 |
+
size: 7000
|
618 |
+
dim: -1
|
619 |
+
is_external: true
|
620 |
+
is_proprietary: false
|
621 |
+
is_sentence_transformers_compatible: false
|
622 |
luotuo-bert-medium:
|
623 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
624 |
seq_len: 512
|
|
|
643 |
is_external: true
|
644 |
is_proprietary: false
|
645 |
is_sentence_transformers_compatible: true
|
646 |
+
mistral-7b-instruct-v0.2:
|
647 |
+
link: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
|
648 |
+
seq_len: 4096
|
649 |
+
size: 7240
|
650 |
+
dim: -1
|
651 |
+
is_external: true
|
652 |
+
is_proprietary: false
|
653 |
+
is_sentence_transformers_compatible: false
|
654 |
mistral-embed:
|
655 |
link: https://docs.mistral.ai/guides/embeddings
|
656 |
seq_len: null
|
|
|
659 |
is_external: true
|
660 |
is_proprietary: true
|
661 |
is_sentence_transformers_compatible: false
|
662 |
+
monobert-large-msmarco:
|
663 |
+
link: https://huggingface.co/castorini/monobert-large-msmarco
|
664 |
+
seq_len: 512
|
665 |
+
size: 770
|
666 |
+
dim: -1
|
667 |
+
is_external: true
|
668 |
+
is_proprietary: false
|
669 |
+
is_sentence_transformers_compatible: false
|
670 |
+
monot5-3b-msmarco-10k:
|
671 |
+
link: https://huggingface.co/castorini/monot5-3b-msmarco-10k
|
672 |
+
seq_len: 512
|
673 |
+
size: 2480
|
674 |
+
dim: -1
|
675 |
+
is_external: true
|
676 |
+
is_proprietary: false
|
677 |
+
is_sentence_transformers_compatible: false
|
678 |
+
monot5-base-msmarco-10k:
|
679 |
+
link: https://huggingface.co/castorini/monot5-base-msmarco-10k
|
680 |
+
seq_len: 512
|
681 |
+
size: 220
|
682 |
+
dim: -1
|
683 |
+
is_external: true
|
684 |
+
is_proprietary: false
|
685 |
+
is_sentence_transformers_compatible: false
|
686 |
msmarco-bert-co-condensor:
|
687 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
688 |
seq_len: 512
|
|
|
1011 |
is_external: true
|
1012 |
is_proprietary: true
|
1013 |
is_sentence_transformers_compatible: false
|
1014 |
+
tart-dual-contriever-msmarco:
|
1015 |
+
link: https://huggingface.co/orionweller/tart-dual-contriever-msmarco
|
1016 |
+
seq_len: 512
|
1017 |
+
size: 110
|
1018 |
+
dim: 768
|
1019 |
+
is_external: true
|
1020 |
+
is_proprietary: false
|
1021 |
+
is_sentence_transformers_compatible: false
|
1022 |
+
tart-full-flan-t5-xl:
|
1023 |
+
link: https://huggingface.co/facebook/tart-full-flan-t5-xl
|
1024 |
+
seq_len: 512
|
1025 |
+
size: 2480
|
1026 |
+
dim: -1
|
1027 |
+
is_external: true
|
1028 |
+
is_proprietary: false
|
1029 |
+
is_sentence_transformers_compatible: false
|
1030 |
text2vec-base-chinese:
|
1031 |
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
1032 |
seq_len: 512
|
|
|
1308 |
- michaelfeil/ct2fast-gte-large
|
1309 |
- gizmo-ai/Cohere-embed-multilingual-v3.0
|
1310 |
- McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
|
1311 |
+
cross_encoders:
|
1312 |
+
- FollowIR-7B
|
1313 |
+
- flan-t5-base
|
1314 |
+
- flan-t5-large
|
1315 |
+
- monobert-large-msmarco
|
1316 |
+
- monot5-3b-msmarco-10k
|
1317 |
+
- monot5-base-msmarco-10k
|
1318 |
+
- llama-2-7b-chat
|
1319 |
+
- mistral-7b-instruct-v0.2
|
1320 |
+
- tart-full-flan-t5-xl
|