Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
fd4838e
1
Parent(s):
21c6649
minor fixes
Browse files- app.py +10 -10
- config.yaml +1 -2
- envs.py +1 -0
- model_meta.yaml +230 -230
app.py
CHANGED
@@ -28,13 +28,13 @@ def make_clickable_model(model_name, link=None):
|
|
28 |
f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'
|
29 |
)
|
30 |
|
31 |
-
EXTERNAL_MODELS = {k for k,v in MODEL_META["
|
32 |
-
EXTERNAL_MODEL_TO_LINK = {k: v["link"] for k,v in MODEL_META["
|
33 |
-
EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["
|
34 |
-
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["
|
35 |
-
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["
|
36 |
-
PROPRIETARY_MODELS = {k for k,v in MODEL_META["
|
37 |
-
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["
|
38 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
39 |
|
40 |
PROPRIETARY_MODELS = {
|
@@ -48,8 +48,8 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
|
48 |
}
|
49 |
|
50 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
51 |
-
for
|
52 |
-
for task_category, task_list in
|
53 |
TASK_TO_TASK_TYPE[task_category].extend(task_list)
|
54 |
|
55 |
def add_lang(examples):
|
@@ -365,7 +365,7 @@ for board, board_config in BOARDS_CONFIG.items():
|
|
365 |
"language_long": board_config["language_long"],
|
366 |
"description": f"**Overall MTEB {overall_pretty_name}** 🔮{board_icon}",
|
367 |
"data": boards_data[board]["data_overall"],
|
368 |
-
"refresh": partial(get_mteb_average, board_config["tasks"]),
|
369 |
"credits": credits,
|
370 |
})
|
371 |
for task_category, task_category_list in board_config["tasks"].items():
|
|
|
28 |
f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'
|
29 |
)
|
30 |
|
31 |
+
EXTERNAL_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_external", False)}
|
32 |
+
EXTERNAL_MODEL_TO_LINK = {k: v["link"] for k,v in MODEL_META["model_meta"].items() if v.get("link", False)}
|
33 |
+
EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["model_meta"].items() if v.get("dim", False)}
|
34 |
+
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
|
35 |
+
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
|
36 |
+
PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
|
37 |
+
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
|
38 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
39 |
|
40 |
PROPRIETARY_MODELS = {
|
|
|
48 |
}
|
49 |
|
50 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
51 |
+
for board_config in BOARDS_CONFIG.values():
|
52 |
+
for task_category, task_list in board_config["tasks"].items():
|
53 |
TASK_TO_TASK_TYPE[task_category].extend(task_list)
|
54 |
|
55 |
def add_lang(examples):
|
|
|
365 |
"language_long": board_config["language_long"],
|
366 |
"description": f"**Overall MTEB {overall_pretty_name}** 🔮{board_icon}",
|
367 |
"data": boards_data[board]["data_overall"],
|
368 |
+
"refresh": lambda: get_mteb_average(board_config["tasks"])[0],#partial(get_mteb_average, board_config["tasks"]),
|
369 |
"credits": credits,
|
370 |
})
|
371 |
for task_category, task_category_list in board_config["tasks"].items():
|
config.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
version: 0.0.1
|
2 |
config:
|
3 |
REPO_ID: "mteb/leaderboard"
|
4 |
RESULTS_REPO: mteb/results
|
@@ -111,7 +110,7 @@ boards:
|
|
111 |
- SummEval
|
112 |
en-x:
|
113 |
title: "English-X"
|
114 |
-
language_long: "
|
115 |
has_overall: false
|
116 |
acronym: null
|
117 |
icon: null
|
|
|
|
|
1 |
config:
|
2 |
REPO_ID: "mteb/leaderboard"
|
3 |
RESULTS_REPO: mteb/results
|
|
|
110 |
- SummEval
|
111 |
en-x:
|
112 |
title: "English-X"
|
113 |
+
language_long: "117 (Pairs of: English & other language)"
|
114 |
has_overall: false
|
115 |
acronym: null
|
116 |
icon: null
|
envs.py
CHANGED
@@ -37,6 +37,7 @@ RESULTS_REPO = get_config("RESULTS_REPO", "mteb/results")
|
|
37 |
CACHE_PATH=get_config("HF_HOME", ".")
|
38 |
os.environ["HF_HOME"] = CACHE_PATH
|
39 |
|
|
|
40 |
if not os.access(CACHE_PATH, os.W_OK):
|
41 |
print(f"No write access to HF_HOME: {CACHE_PATH}. Resetting to current directory.")
|
42 |
CACHE_PATH = "."
|
|
|
37 |
CACHE_PATH=get_config("HF_HOME", ".")
|
38 |
os.environ["HF_HOME"] = CACHE_PATH
|
39 |
|
40 |
+
# Check if it is using persistent storage
|
41 |
if not os.access(CACHE_PATH, os.W_OK):
|
42 |
print(f"No write access to HF_HOME: {CACHE_PATH}. Resetting to current directory.")
|
43 |
CACHE_PATH = "."
|
model_meta.yaml
CHANGED
@@ -1,1028 +1,1028 @@
|
|
1 |
-
|
2 |
Baichuan-text-embedding:
|
3 |
-
dim: 1024
|
4 |
-
is_external: true
|
5 |
-
is_proprietary: true
|
6 |
-
is_sentence_transformers_compatible: false
|
7 |
link: https://platform.baichuan-ai.com/docs/text-Embedding
|
8 |
seq_len: 512
|
9 |
size: null
|
10 |
-
Cohere-embed-english-v3.0:
|
11 |
dim: 1024
|
12 |
is_external: true
|
13 |
is_proprietary: true
|
14 |
is_sentence_transformers_compatible: false
|
|
|
15 |
link: https://huggingface.co/Cohere/Cohere-embed-english-v3.0
|
16 |
seq_len: 512
|
17 |
size: null
|
18 |
-
|
19 |
-
dim: 384
|
20 |
is_external: true
|
21 |
is_proprietary: true
|
22 |
is_sentence_transformers_compatible: false
|
|
|
23 |
link: https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0
|
24 |
seq_len: 512
|
25 |
size: null
|
26 |
-
|
27 |
-
dim: 1024
|
28 |
is_external: true
|
29 |
is_proprietary: true
|
30 |
is_sentence_transformers_compatible: false
|
|
|
31 |
link: https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0
|
32 |
seq_len: 512
|
33 |
size: null
|
34 |
-
|
35 |
-
dim: 768
|
36 |
is_external: true
|
37 |
-
is_proprietary:
|
38 |
-
is_sentence_transformers_compatible:
|
|
|
39 |
link: https://huggingface.co/vesteinn/DanskBERT
|
40 |
seq_len: 514
|
41 |
size: 125
|
42 |
-
|
43 |
-
dim: 1024
|
44 |
is_external: true
|
45 |
is_proprietary: false
|
46 |
-
is_sentence_transformers_compatible:
|
|
|
47 |
link: https://github.com/facebookresearch/LASER
|
48 |
seq_len: N/A
|
49 |
size: 43
|
50 |
-
|
51 |
-
dim: 4096
|
52 |
is_external: true
|
53 |
is_proprietary: false
|
54 |
is_sentence_transformers_compatible: false
|
|
|
55 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised
|
56 |
seq_len: 4096
|
57 |
size: 6607
|
58 |
-
LLM2Vec-Llama-unsupervised:
|
59 |
dim: 4096
|
60 |
is_external: true
|
61 |
is_proprietary: false
|
62 |
is_sentence_transformers_compatible: false
|
|
|
63 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp
|
64 |
seq_len: 4096
|
65 |
size: 6607
|
66 |
-
LLM2Vec-Mistral-supervised:
|
67 |
dim: 4096
|
68 |
is_external: true
|
69 |
is_proprietary: false
|
70 |
is_sentence_transformers_compatible: false
|
|
|
71 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised
|
72 |
seq_len: 32768
|
73 |
size: 7111
|
74 |
-
LLM2Vec-Mistral-unsupervised:
|
75 |
dim: 4096
|
76 |
is_external: true
|
77 |
is_proprietary: false
|
78 |
is_sentence_transformers_compatible: false
|
|
|
79 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp
|
80 |
seq_len: 32768
|
81 |
size: 7111
|
82 |
-
|
83 |
-
dim: 2048
|
84 |
is_external: true
|
85 |
is_proprietary: false
|
86 |
is_sentence_transformers_compatible: false
|
|
|
87 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised
|
88 |
seq_len: 4096
|
89 |
size: 1280
|
90 |
-
LLM2Vec-Sheared-Llama-unsupervised:
|
91 |
dim: 2048
|
92 |
is_external: true
|
93 |
is_proprietary: false
|
94 |
is_sentence_transformers_compatible: false
|
|
|
95 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp
|
96 |
seq_len: 4096
|
97 |
size: 1280
|
98 |
-
|
99 |
-
dim: 768
|
100 |
is_external: true
|
101 |
is_proprietary: false
|
102 |
-
is_sentence_transformers_compatible:
|
|
|
103 |
link: https://huggingface.co/sentence-transformers/LaBSE
|
104 |
seq_len: 512
|
105 |
size: 471
|
|
|
|
|
|
|
|
|
106 |
OpenSearch-text-hybrid:
|
|
|
|
|
|
|
107 |
dim: 1792
|
108 |
is_external: true
|
109 |
is_proprietary: true
|
110 |
is_sentence_transformers_compatible: false
|
111 |
-
link: https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval
|
112 |
-
seq_len: 512
|
113 |
-
size: null
|
114 |
all-MiniLM-L12-v2:
|
115 |
-
dim: 384
|
116 |
-
is_external: true
|
117 |
-
is_proprietary: false
|
118 |
-
is_sentence_transformers_compatible: true
|
119 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
120 |
seq_len: 512
|
121 |
size: 33
|
122 |
-
all-MiniLM-L6-v2:
|
123 |
dim: 384
|
124 |
is_external: true
|
125 |
is_proprietary: false
|
126 |
is_sentence_transformers_compatible: true
|
|
|
127 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
|
128 |
seq_len: 512
|
129 |
size: 23
|
130 |
-
|
131 |
-
dim: 768
|
132 |
is_external: true
|
133 |
is_proprietary: false
|
134 |
is_sentence_transformers_compatible: true
|
|
|
135 |
link: https://huggingface.co/sentence-transformers/all-mpnet-base-v2
|
136 |
seq_len: 514
|
137 |
size: 110
|
138 |
-
allenai-specter:
|
139 |
dim: 768
|
140 |
is_external: true
|
141 |
is_proprietary: false
|
142 |
is_sentence_transformers_compatible: true
|
|
|
143 |
link: https://huggingface.co/sentence-transformers/allenai-specter
|
144 |
seq_len: 512
|
145 |
size: 110
|
146 |
-
bert-base-10lang-cased:
|
147 |
dim: 768
|
148 |
is_external: true
|
149 |
is_proprietary: false
|
150 |
is_sentence_transformers_compatible: true
|
|
|
151 |
link: https://huggingface.co/Geotrend/bert-base-10lang-cased
|
152 |
seq_len: 512
|
153 |
size: 138
|
154 |
-
bert-base-15lang-cased:
|
155 |
dim: 768
|
156 |
is_external: true
|
157 |
is_proprietary: false
|
158 |
is_sentence_transformers_compatible: true
|
|
|
159 |
link: https://huggingface.co/Geotrend/bert-base-15lang-cased
|
160 |
seq_len: 512
|
161 |
size: 138
|
162 |
-
bert-base-25lang-cased:
|
163 |
dim: 768
|
164 |
is_external: true
|
165 |
is_proprietary: false
|
166 |
is_sentence_transformers_compatible: true
|
|
|
167 |
link: https://huggingface.co/Geotrend/bert-base-25lang-cased
|
168 |
seq_len: 512
|
169 |
size: 138
|
170 |
-
bert-base-multilingual-cased:
|
171 |
dim: 768
|
172 |
is_external: true
|
173 |
is_proprietary: false
|
174 |
is_sentence_transformers_compatible: true
|
|
|
175 |
link: https://huggingface.co/google-bert/bert-base-multilingual-cased
|
176 |
seq_len: 512
|
177 |
size: 179
|
178 |
-
bert-base-multilingual-uncased:
|
179 |
dim: 768
|
180 |
is_external: true
|
181 |
is_proprietary: false
|
182 |
is_sentence_transformers_compatible: true
|
|
|
183 |
link: https://huggingface.co/google-bert/bert-base-multilingual-uncased
|
184 |
seq_len: 512
|
185 |
size: 168
|
186 |
-
bert-base-swedish-cased:
|
187 |
dim: 768
|
188 |
is_external: true
|
189 |
is_proprietary: false
|
190 |
is_sentence_transformers_compatible: true
|
|
|
191 |
link: https://huggingface.co/KB/bert-base-swedish-cased
|
192 |
seq_len: 512
|
193 |
size: 125
|
194 |
-
bert-base-uncased:
|
195 |
dim: 768
|
196 |
is_external: true
|
197 |
is_proprietary: false
|
198 |
is_sentence_transformers_compatible: true
|
|
|
199 |
link: https://huggingface.co/bert-base-uncased
|
200 |
seq_len: 512
|
201 |
size: 110
|
202 |
-
bge-base-zh-v1.5:
|
203 |
dim: 768
|
204 |
is_external: true
|
205 |
is_proprietary: false
|
206 |
is_sentence_transformers_compatible: true
|
|
|
207 |
link: https://huggingface.co/BAAI/bge-base-zh-v1.5
|
208 |
seq_len: 512
|
209 |
size: 102
|
210 |
-
|
211 |
-
dim: 1024
|
212 |
is_external: true
|
213 |
is_proprietary: false
|
214 |
-
is_sentence_transformers_compatible:
|
|
|
215 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
216 |
seq_len: 512
|
217 |
size: null
|
218 |
-
bge-large-zh-noinstruct:
|
219 |
dim: 1024
|
220 |
is_external: true
|
221 |
is_proprietary: false
|
222 |
-
is_sentence_transformers_compatible:
|
|
|
223 |
link: https://huggingface.co/BAAI/bge-large-zh-noinstruct
|
224 |
seq_len: 512
|
225 |
size: 326
|
226 |
-
bge-large-zh-v1.5:
|
227 |
dim: 1024
|
228 |
is_external: true
|
229 |
is_proprietary: false
|
230 |
is_sentence_transformers_compatible: true
|
|
|
231 |
link: https://huggingface.co/BAAI/bge-large-zh-v1.5
|
232 |
seq_len: 512
|
233 |
size: 326
|
234 |
-
|
235 |
-
dim: 512
|
236 |
is_external: true
|
237 |
is_proprietary: false
|
238 |
is_sentence_transformers_compatible: true
|
|
|
239 |
link: https://huggingface.co/BAAI/bge-small-zh-v1.5
|
240 |
seq_len: 512
|
241 |
size: 24
|
242 |
-
camembert-base:
|
243 |
dim: 512
|
244 |
-
is_external:
|
245 |
is_proprietary: false
|
246 |
is_sentence_transformers_compatible: true
|
|
|
247 |
link: https://huggingface.co/almanach/camembert-base
|
248 |
seq_len: 512
|
249 |
size: 111
|
250 |
-
|
251 |
-
dim: 768
|
252 |
is_external: false
|
253 |
is_proprietary: false
|
254 |
is_sentence_transformers_compatible: true
|
|
|
255 |
link: https://huggingface.co/almanach/camembert-large
|
256 |
seq_len: 512
|
257 |
size: 338
|
258 |
-
contriever-base-msmarco:
|
259 |
dim: 768
|
260 |
-
is_external:
|
261 |
is_proprietary: false
|
262 |
is_sentence_transformers_compatible: true
|
|
|
263 |
link: https://huggingface.co/nthakur/contriever-base-msmarco
|
264 |
seq_len: 512
|
265 |
size: 110
|
266 |
-
cross-en-de-roberta-sentence-transformer:
|
267 |
dim: 768
|
268 |
is_external: true
|
269 |
is_proprietary: false
|
270 |
is_sentence_transformers_compatible: true
|
|
|
271 |
link: https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer
|
272 |
seq_len: 514
|
273 |
size: 278
|
274 |
-
|
275 |
-
dim: 1024
|
276 |
is_external: true
|
277 |
is_proprietary: false
|
278 |
is_sentence_transformers_compatible: true
|
|
|
279 |
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
280 |
seq_len: 512
|
281 |
size: 355
|
282 |
-
dfm-sentence-encoder-large-1:
|
283 |
dim: 1024
|
284 |
is_external: true
|
285 |
is_proprietary: false
|
286 |
is_sentence_transformers_compatible: true
|
|
|
287 |
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
288 |
seq_len: 512
|
289 |
size: 355
|
290 |
-
|
291 |
-
|
292 |
-
is_external: false
|
293 |
is_proprietary: false
|
294 |
is_sentence_transformers_compatible: true
|
|
|
295 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
296 |
seq_len: 512
|
297 |
size: 110
|
298 |
-
distilbert-base-en-fr-cased:
|
299 |
dim: 768
|
300 |
is_external: false
|
301 |
is_proprietary: false
|
302 |
is_sentence_transformers_compatible: true
|
|
|
303 |
link: https://huggingface.co/Geotrend/distilbert-base-en-fr-cased
|
304 |
seq_len: 512
|
305 |
size: 110
|
306 |
-
distilbert-base-en-fr-es-pt-it-cased:
|
307 |
dim: 768
|
308 |
is_external: false
|
309 |
is_proprietary: false
|
310 |
is_sentence_transformers_compatible: true
|
|
|
311 |
link: https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased
|
312 |
seq_len: 512
|
313 |
size: 110
|
314 |
-
distilbert-base-fr-cased:
|
315 |
dim: 768
|
316 |
is_external: false
|
317 |
is_proprietary: false
|
318 |
is_sentence_transformers_compatible: true
|
|
|
319 |
link: https://huggingface.co/Geotrend/distilbert-base-fr-cased
|
320 |
seq_len: 512
|
321 |
size: 110
|
322 |
-
distilbert-base-uncased:
|
323 |
dim: 768
|
324 |
is_external: false
|
325 |
is_proprietary: false
|
326 |
is_sentence_transformers_compatible: true
|
|
|
327 |
link: https://huggingface.co/distilbert-base-uncased
|
328 |
seq_len: 512
|
329 |
size: 110
|
330 |
-
|
331 |
-
|
332 |
-
is_external: true
|
333 |
is_proprietary: false
|
334 |
is_sentence_transformers_compatible: true
|
|
|
335 |
link: https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2
|
336 |
seq_len: 512
|
337 |
size: 135
|
338 |
-
|
339 |
-
dim: 768
|
340 |
is_external: true
|
341 |
is_proprietary: false
|
342 |
is_sentence_transformers_compatible: true
|
|
|
343 |
link: https://huggingface.co/intfloat/e5-base
|
344 |
seq_len: 512
|
345 |
size: 110
|
346 |
-
|
347 |
-
dim: 1024
|
348 |
is_external: true
|
349 |
is_proprietary: false
|
350 |
is_sentence_transformers_compatible: true
|
|
|
351 |
link: https://huggingface.co/intfloat/e5-large
|
352 |
seq_len: 512
|
353 |
size: 335
|
354 |
-
|
355 |
-
dim: 4096
|
356 |
is_external: true
|
357 |
is_proprietary: false
|
358 |
is_sentence_transformers_compatible: true
|
|
|
359 |
link: https://huggingface.co/intfloat/e5-mistral-7b-instruct
|
360 |
seq_len: 32768
|
361 |
size: 7111
|
362 |
-
|
363 |
-
dim: 384
|
364 |
is_external: true
|
365 |
is_proprietary: false
|
366 |
is_sentence_transformers_compatible: true
|
|
|
367 |
link: https://huggingface.co/intfloat/e5-small
|
368 |
seq_len: 512
|
369 |
size: 33
|
370 |
-
|
371 |
-
dim: 256
|
372 |
is_external: true
|
373 |
is_proprietary: false
|
374 |
is_sentence_transformers_compatible: true
|
|
|
375 |
link: https://huggingface.co/jonfd/electra-small-nordic
|
376 |
seq_len: 512
|
377 |
size: 23
|
378 |
-
electra-small-swedish-cased-discriminator:
|
379 |
dim: 256
|
380 |
is_external: true
|
381 |
is_proprietary: false
|
382 |
is_sentence_transformers_compatible: true
|
|
|
383 |
link: https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator
|
384 |
seq_len: 512
|
385 |
size: 16
|
386 |
-
|
387 |
-
dim: 768
|
388 |
is_external: true
|
389 |
is_proprietary: false
|
390 |
is_sentence_transformers_compatible: true
|
|
|
391 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
392 |
seq_len: 512
|
393 |
size: 138
|
394 |
-
flaubert_base_uncased:
|
395 |
dim: 768
|
396 |
is_external: true
|
397 |
is_proprietary: false
|
398 |
is_sentence_transformers_compatible: true
|
|
|
399 |
link: https://huggingface.co/flaubert/flaubert_base_uncased
|
400 |
seq_len: 512
|
401 |
size: 138
|
402 |
-
|
403 |
-
dim: 1024
|
404 |
is_external: true
|
405 |
is_proprietary: false
|
406 |
is_sentence_transformers_compatible: true
|
|
|
407 |
link: https://huggingface.co/flaubert/flaubert_large_cased
|
408 |
seq_len: 512
|
409 |
size: 372
|
410 |
-
|
411 |
-
dim: 768
|
412 |
is_external: true
|
413 |
is_proprietary: false
|
414 |
is_sentence_transformers_compatible: true
|
|
|
415 |
link: https://huggingface.co/deepset/gbert-base
|
416 |
seq_len: 512
|
417 |
size: 110
|
418 |
-
|
419 |
-
dim: 1024
|
420 |
is_external: true
|
421 |
is_proprietary: false
|
422 |
is_sentence_transformers_compatible: true
|
|
|
423 |
link: https://huggingface.co/deepset/gbert-large
|
424 |
seq_len: 512
|
425 |
size: 337
|
426 |
-
|
427 |
-
dim: 768
|
428 |
is_external: true
|
429 |
is_proprietary: false
|
430 |
is_sentence_transformers_compatible: true
|
|
|
431 |
link: https://huggingface.co/deepset/gelectra-base
|
432 |
seq_len: 512
|
433 |
size: 110
|
434 |
-
|
435 |
-
dim: 1024
|
436 |
is_external: true
|
437 |
is_proprietary: false
|
438 |
is_sentence_transformers_compatible: true
|
|
|
439 |
link: https://huggingface.co/deepset/gelectra-large
|
440 |
seq_len: 512
|
441 |
size: 335
|
442 |
-
|
443 |
-
dim: 300
|
444 |
is_external: true
|
445 |
is_proprietary: false
|
446 |
is_sentence_transformers_compatible: true
|
|
|
447 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d
|
448 |
seq_len: N/A
|
449 |
size: 120
|
|
|
|
|
|
|
|
|
450 |
google-gecko-256.text-embedding-preview-0409:
|
|
|
|
|
|
|
451 |
dim: 256
|
452 |
is_external: true
|
453 |
is_proprietary: true
|
454 |
is_sentence_transformers_compatible: false
|
|
|
455 |
link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
|
456 |
seq_len: 2048
|
457 |
size: 1200
|
458 |
-
google-gecko.text-embedding-preview-0409:
|
459 |
dim: 768
|
460 |
is_external: true
|
461 |
is_proprietary: true
|
462 |
is_sentence_transformers_compatible: false
|
463 |
-
link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
|
464 |
-
seq_len: 2048
|
465 |
-
size: 1200
|
466 |
gottbert-base:
|
467 |
-
dim: 768
|
468 |
-
is_external: true
|
469 |
-
is_proprietary: false
|
470 |
-
is_sentence_transformers_compatible: true
|
471 |
link: https://huggingface.co/uklfr/gottbert-base
|
472 |
seq_len: 512
|
473 |
size: 127
|
474 |
-
gtr-t5-base:
|
475 |
dim: 768
|
476 |
is_external: true
|
477 |
is_proprietary: false
|
478 |
is_sentence_transformers_compatible: true
|
|
|
479 |
link: https://huggingface.co/sentence-transformers/gtr-t5-base
|
480 |
seq_len: 512
|
481 |
size: 110
|
482 |
-
gtr-t5-large:
|
483 |
dim: 768
|
484 |
is_external: true
|
485 |
is_proprietary: false
|
486 |
is_sentence_transformers_compatible: true
|
|
|
487 |
link: https://huggingface.co/sentence-transformers/gtr-t5-large
|
488 |
seq_len: 512
|
489 |
size: 168
|
490 |
-
gtr-t5-xl:
|
491 |
dim: 768
|
492 |
is_external: true
|
493 |
is_proprietary: false
|
494 |
is_sentence_transformers_compatible: true
|
|
|
495 |
link: https://huggingface.co/sentence-transformers/gtr-t5-xl
|
496 |
seq_len: 512
|
497 |
size: 1240
|
498 |
-
gtr-t5-xxl:
|
499 |
dim: 768
|
500 |
is_external: true
|
501 |
is_proprietary: false
|
502 |
is_sentence_transformers_compatible: true
|
|
|
503 |
link: https://huggingface.co/sentence-transformers/gtr-t5-xxl
|
504 |
seq_len: 512
|
505 |
size: 4865
|
506 |
-
herbert-base-retrieval-v2:
|
507 |
dim: 768
|
508 |
is_external: true
|
509 |
is_proprietary: false
|
510 |
is_sentence_transformers_compatible: true
|
|
|
511 |
link: https://huggingface.co/ipipan/herbert-base-retrieval-v2
|
512 |
seq_len: 514
|
513 |
size: 125
|
514 |
-
|
515 |
-
dim: 300
|
516 |
is_external: true
|
517 |
is_proprietary: false
|
518 |
is_sentence_transformers_compatible: true
|
|
|
519 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
520 |
seq_len: N/A
|
521 |
size: 134
|
522 |
-
|
523 |
-
dim: 768
|
524 |
is_external: true
|
525 |
is_proprietary: false
|
526 |
is_sentence_transformers_compatible: true
|
|
|
527 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
528 |
seq_len: 512
|
529 |
size: 328
|
530 |
-
m3e-base:
|
531 |
dim: 768
|
532 |
is_external: true
|
533 |
is_proprietary: false
|
534 |
is_sentence_transformers_compatible: true
|
|
|
535 |
link: https://huggingface.co/moka-ai/m3e-base
|
536 |
seq_len: 512
|
537 |
size: 102
|
538 |
-
m3e-large:
|
539 |
dim: 768
|
540 |
is_external: true
|
541 |
is_proprietary: false
|
542 |
is_sentence_transformers_compatible: true
|
|
|
543 |
link: https://huggingface.co/moka-ai/m3e-large
|
544 |
seq_len: 512
|
545 |
size: 102
|
|
|
|
|
|
|
|
|
546 |
mistral-embed:
|
|
|
|
|
|
|
547 |
dim: 1024
|
548 |
is_external: true
|
549 |
is_proprietary: true
|
550 |
is_sentence_transformers_compatible: false
|
551 |
-
link: https://docs.mistral.ai/guides/embeddings
|
552 |
-
seq_len: null
|
553 |
-
size: null
|
554 |
msmarco-bert-co-condensor:
|
555 |
-
dim: 768
|
556 |
-
is_external: true
|
557 |
-
is_proprietary: false
|
558 |
-
is_sentence_transformers_compatible: true
|
559 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
560 |
seq_len: 512
|
561 |
size: 110
|
562 |
-
|
563 |
-
dim: 384
|
564 |
is_external: true
|
565 |
is_proprietary: false
|
566 |
is_sentence_transformers_compatible: true
|
|
|
567 |
link: https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
568 |
seq_len: 512
|
569 |
size: 23
|
570 |
-
|
571 |
-
dim: 768
|
572 |
is_external: true
|
573 |
is_proprietary: false
|
574 |
is_sentence_transformers_compatible: true
|
|
|
575 |
link: https://huggingface.co/intfloat/multilingual-e5-base
|
576 |
seq_len: 514
|
577 |
size: 278
|
578 |
-
|
579 |
-
dim: 1024
|
580 |
is_external: true
|
581 |
is_proprietary: false
|
582 |
is_sentence_transformers_compatible: true
|
|
|
583 |
link: https://huggingface.co/intfloat/multilingual-e5-large
|
584 |
seq_len: 514
|
585 |
size: 560
|
586 |
-
|
587 |
-
dim: 384
|
588 |
is_external: true
|
589 |
is_proprietary: false
|
590 |
is_sentence_transformers_compatible: true
|
|
|
591 |
link: https://huggingface.co/intfloat/multilingual-e5-small
|
592 |
seq_len: 512
|
593 |
size: 118
|
594 |
-
|
595 |
-
dim: 768
|
596 |
is_external: true
|
597 |
is_proprietary: false
|
598 |
is_sentence_transformers_compatible: true
|
|
|
599 |
link: https://huggingface.co/NbAiLab/nb-bert-base
|
600 |
seq_len: 512
|
601 |
size: 179
|
602 |
-
|
603 |
-
dim: 1024
|
604 |
is_external: true
|
605 |
is_proprietary: false
|
606 |
is_sentence_transformers_compatible: true
|
|
|
607 |
link: https://huggingface.co/NbAiLab/nb-bert-large
|
608 |
seq_len: 512
|
609 |
size: 355
|
610 |
-
|
611 |
-
dim: 128
|
612 |
is_external: true
|
613 |
is_proprietary: false
|
614 |
is_sentence_transformers_compatible: true
|
|
|
615 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
616 |
seq_len: 8192
|
617 |
size: 138
|
618 |
-
|
619 |
-
dim: 256
|
620 |
is_external: true
|
621 |
is_proprietary: false
|
622 |
is_sentence_transformers_compatible: true
|
|
|
623 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
624 |
seq_len: 8192
|
625 |
size: 138
|
626 |
-
|
627 |
-
dim: 512
|
628 |
is_external: true
|
629 |
is_proprietary: false
|
630 |
is_sentence_transformers_compatible: true
|
|
|
631 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
632 |
seq_len: 8192
|
633 |
size: 138
|
634 |
-
|
635 |
-
dim: 64
|
636 |
is_external: true
|
637 |
is_proprietary: false
|
638 |
is_sentence_transformers_compatible: true
|
|
|
639 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
640 |
seq_len: 8192
|
641 |
size: 138
|
642 |
-
|
643 |
-
dim: 768
|
644 |
is_external: true
|
645 |
is_proprietary: false
|
646 |
is_sentence_transformers_compatible: true
|
|
|
647 |
link: https://huggingface.co/ltg/norbert3-base
|
648 |
seq_len: 512
|
649 |
size: 131
|
650 |
-
|
651 |
-
dim: 1024
|
652 |
is_external: true
|
653 |
is_proprietary: false
|
654 |
is_sentence_transformers_compatible: true
|
|
|
655 |
link: https://huggingface.co/ltg/norbert3-large
|
656 |
seq_len: 512
|
657 |
size: 368
|
658 |
-
|
659 |
-
dim: 384
|
660 |
is_external: true
|
661 |
is_proprietary: false
|
662 |
is_sentence_transformers_compatible: true
|
|
|
663 |
link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
664 |
seq_len: 512
|
665 |
size: 118
|
666 |
-
|
667 |
-
dim: 768
|
668 |
is_external: true
|
669 |
is_proprietary: false
|
670 |
is_sentence_transformers_compatible: true
|
|
|
671 |
link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
672 |
seq_len: 514
|
673 |
size: 278
|
674 |
-
sentence-bert-swedish-cased:
|
675 |
dim: 768
|
676 |
is_external: true
|
677 |
is_proprietary: false
|
678 |
is_sentence_transformers_compatible: true
|
|
|
679 |
link: https://huggingface.co/KBLab/sentence-bert-swedish-cased
|
680 |
seq_len: 512
|
681 |
size: 125
|
682 |
-
sentence-camembert-base:
|
683 |
dim: 768
|
684 |
is_external: true
|
685 |
is_proprietary: false
|
686 |
is_sentence_transformers_compatible: true
|
|
|
687 |
link: https://huggingface.co/dangvantuan/sentence-camembert-base
|
688 |
seq_len: 512
|
689 |
size: 110
|
690 |
-
|
691 |
-
dim: 1024
|
692 |
is_external: true
|
693 |
is_proprietary: false
|
694 |
is_sentence_transformers_compatible: true
|
|
|
695 |
link: https://huggingface.co/dangvantuan/sentence-camembert-large
|
696 |
seq_len: 512
|
697 |
size: 337
|
698 |
-
|
699 |
-
dim: 2048
|
700 |
is_external: true
|
701 |
is_proprietary: false
|
702 |
is_sentence_transformers_compatible: true
|
|
|
703 |
link: https://huggingface.co/Wissam42/sentence-croissant-llm-base
|
704 |
seq_len: 2048
|
705 |
size: 1280
|
706 |
-
|
707 |
-
dim: 768
|
708 |
is_external: true
|
709 |
is_proprietary: false
|
710 |
is_sentence_transformers_compatible: true
|
|
|
711 |
link: https://huggingface.co/sentence-transformers/sentence-t5-base
|
712 |
seq_len: 512
|
713 |
size: 110
|
714 |
-
sentence-t5-large:
|
715 |
dim: 768
|
716 |
is_external: true
|
717 |
is_proprietary: false
|
718 |
is_sentence_transformers_compatible: true
|
|
|
719 |
link: https://huggingface.co/sentence-transformers/sentence-t5-large
|
720 |
seq_len: 512
|
721 |
size: 168
|
722 |
-
sentence-t5-xl:
|
723 |
dim: 768
|
724 |
is_external: true
|
725 |
is_proprietary: false
|
726 |
is_sentence_transformers_compatible: true
|
|
|
727 |
link: https://huggingface.co/sentence-transformers/sentence-t5-xl
|
728 |
seq_len: 512
|
729 |
size: 1240
|
730 |
-
sentence-t5-xxl:
|
731 |
dim: 768
|
732 |
is_external: true
|
733 |
is_proprietary: false
|
734 |
is_sentence_transformers_compatible: true
|
|
|
735 |
link: https://huggingface.co/sentence-transformers/sentence-t5-xxl
|
736 |
seq_len: 512
|
737 |
size: 4865
|
738 |
-
silver-retriever-base-v1:
|
739 |
dim: 768
|
740 |
is_external: true
|
741 |
is_proprietary: false
|
742 |
is_sentence_transformers_compatible: true
|
|
|
743 |
link: https://huggingface.co/ipipan/silver-retriever-base-v1
|
744 |
seq_len: 514
|
745 |
size: 125
|
746 |
-
st-polish-paraphrase-from-distilroberta:
|
747 |
dim: 768
|
748 |
is_external: true
|
749 |
is_proprietary: false
|
750 |
is_sentence_transformers_compatible: true
|
|
|
751 |
link: https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta
|
752 |
seq_len: 514
|
753 |
size: 125
|
754 |
-
st-polish-paraphrase-from-mpnet:
|
755 |
dim: 768
|
756 |
is_external: true
|
757 |
is_proprietary: false
|
758 |
is_sentence_transformers_compatible: true
|
|
|
759 |
link: https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet
|
760 |
seq_len: 514
|
761 |
size: 125
|
762 |
-
sup-simcse-bert-base-uncased:
|
763 |
dim: 768
|
764 |
is_external: true
|
765 |
is_proprietary: false
|
766 |
is_sentence_transformers_compatible: true
|
|
|
767 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
768 |
seq_len: 512
|
769 |
size: 110
|
|
|
|
|
|
|
|
|
770 |
text-embedding-3-large:
|
|
|
|
|
|
|
771 |
dim: 3072
|
772 |
is_external: true
|
773 |
is_proprietary: true
|
774 |
is_sentence_transformers_compatible: false
|
|
|
775 |
link: https://openai.com/blog/new-embedding-models-and-api-updates
|
776 |
seq_len: 8191
|
777 |
size: null
|
778 |
-
text-embedding-3-large-256:
|
779 |
dim: 256
|
780 |
is_external: true
|
781 |
is_proprietary: true
|
782 |
is_sentence_transformers_compatible: false
|
|
|
783 |
link: https://openai.com/blog/new-embedding-models-and-api-updates
|
784 |
seq_len: 8191
|
785 |
size: null
|
786 |
-
text-embedding-3-small:
|
787 |
dim: 1536
|
788 |
is_external: true
|
789 |
is_proprietary: true
|
790 |
is_sentence_transformers_compatible: false
|
791 |
-
|
|
|
792 |
seq_len: 8191
|
793 |
size: null
|
794 |
-
text-embedding-ada-002:
|
795 |
dim: 1536
|
796 |
is_external: true
|
797 |
is_proprietary: true
|
798 |
is_sentence_transformers_compatible: false
|
799 |
-
link: https://openai.com/blog/new-and-improved-embedding-model
|
800 |
-
seq_len: 8191
|
801 |
-
size: null
|
802 |
text-search-ada-001:
|
|
|
|
|
|
|
803 |
dim: 1024
|
804 |
is_external: true
|
805 |
is_proprietary: true
|
806 |
is_sentence_transformers_compatible: false
|
|
|
807 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
808 |
seq_len: 2046
|
809 |
size: null
|
810 |
-
text-search-ada-doc-001:
|
811 |
dim: 1024
|
812 |
is_external: true
|
813 |
is_proprietary: true
|
814 |
is_sentence_transformers_compatible: false
|
|
|
815 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
816 |
seq_len: 2046
|
817 |
size: null
|
818 |
-
text-search-ada-query-001:
|
819 |
dim: 1024
|
820 |
is_external: false
|
821 |
is_proprietary: true
|
822 |
is_sentence_transformers_compatible: false
|
|
|
823 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
824 |
seq_len: 2046
|
825 |
size: null
|
826 |
-
text-search-babbage-001:
|
827 |
dim: 2048
|
828 |
is_external: true
|
829 |
is_proprietary: true
|
830 |
is_sentence_transformers_compatible: false
|
|
|
831 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
832 |
seq_len: 2046
|
833 |
size: null
|
834 |
-
text-search-curie-001:
|
835 |
dim: 4096
|
836 |
is_external: true
|
837 |
is_proprietary: true
|
838 |
is_sentence_transformers_compatible: false
|
|
|
839 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
840 |
seq_len: 2046
|
841 |
size: null
|
842 |
-
text-search-davinci-001:
|
843 |
dim: 12288
|
844 |
is_external: true
|
845 |
is_proprietary: true
|
846 |
is_sentence_transformers_compatible: false
|
|
|
847 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
848 |
seq_len: 2046
|
849 |
size: null
|
850 |
-
text-similarity-ada-001:
|
851 |
dim: 1024
|
852 |
is_external: true
|
853 |
is_proprietary: true
|
854 |
is_sentence_transformers_compatible: false
|
|
|
855 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
856 |
seq_len: 2046
|
857 |
size: null
|
858 |
-
text-similarity-babbage-001:
|
859 |
dim: 2048
|
860 |
is_external: true
|
861 |
is_proprietary: true
|
862 |
is_sentence_transformers_compatible: false
|
|
|
863 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
864 |
seq_len: 2046
|
865 |
size: null
|
866 |
-
text-similarity-curie-001:
|
867 |
dim: 4096
|
868 |
is_external: true
|
869 |
is_proprietary: true
|
870 |
is_sentence_transformers_compatible: false
|
|
|
871 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
872 |
seq_len: 2046
|
873 |
size: null
|
874 |
-
text-similarity-davinci-001:
|
875 |
dim: 12288
|
876 |
is_external: true
|
877 |
is_proprietary: true
|
878 |
is_sentence_transformers_compatible: false
|
879 |
-
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
880 |
-
seq_len: 2046
|
881 |
-
size: null
|
882 |
text2vec-base-chinese:
|
|
|
|
|
|
|
883 |
dim: 768
|
884 |
is_external: true
|
885 |
is_proprietary: false
|
886 |
is_sentence_transformers_compatible: true
|
887 |
-
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
888 |
-
seq_len: 512
|
889 |
-
size: 102
|
890 |
text2vec-base-multilingual:
|
|
|
|
|
|
|
891 |
dim: null
|
892 |
is_external: true
|
893 |
is_proprietary: false
|
894 |
is_sentence_transformers_compatible: false
|
895 |
-
link: null
|
896 |
-
seq_len: null
|
897 |
-
size: null
|
898 |
text2vec-large-chinese:
|
|
|
|
|
|
|
899 |
dim: 1024
|
900 |
is_external: true
|
901 |
is_proprietary: false
|
902 |
is_sentence_transformers_compatible: true
|
903 |
-
link: https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
904 |
-
seq_len: 512
|
905 |
-
size: 326
|
906 |
titan-embed-text-v1:
|
907 |
-
dim: 1536
|
908 |
-
is_external: true
|
909 |
-
is_proprietary: true
|
910 |
-
is_sentence_transformers_compatible: false
|
911 |
link: https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html
|
912 |
seq_len: 8000
|
913 |
size: null
|
914 |
-
udever-bloom-1b1:
|
915 |
dim: 1536
|
916 |
is_external: true
|
917 |
-
is_proprietary:
|
918 |
-
is_sentence_transformers_compatible:
|
|
|
919 |
link: https://huggingface.co/izhx/udever-bloom-1b1
|
920 |
seq_len: 2048
|
921 |
size: null
|
922 |
-
|
923 |
-
dim: 1024
|
924 |
is_external: true
|
925 |
is_proprietary: false
|
926 |
is_sentence_transformers_compatible: true
|
|
|
927 |
link: https://huggingface.co/izhx/udever-bloom-560m
|
928 |
seq_len: 2048
|
929 |
size: null
|
930 |
-
|
931 |
-
dim: 512
|
932 |
is_external: true
|
933 |
is_proprietary: false
|
934 |
is_sentence_transformers_compatible: true
|
|
|
935 |
link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3
|
936 |
seq_len: 512
|
937 |
size: null
|
938 |
-
universal-sentence-encoder-multilingual-large-3:
|
939 |
dim: 512
|
940 |
is_external: true
|
941 |
is_proprietary: false
|
942 |
is_sentence_transformers_compatible: true
|
|
|
943 |
link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3
|
944 |
seq_len: 512
|
945 |
size: null
|
946 |
-
|
947 |
-
dim: 768
|
948 |
is_external: true
|
949 |
is_proprietary: false
|
950 |
is_sentence_transformers_compatible: true
|
|
|
951 |
link: https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased
|
952 |
seq_len: 512
|
953 |
size: 110
|
954 |
-
use-cmlm-multilingual:
|
955 |
dim: 768
|
956 |
is_external: true
|
957 |
is_proprietary: false
|
958 |
is_sentence_transformers_compatible: true
|
|
|
959 |
link: https://huggingface.co/sentence-transformers/use-cmlm-multilingual
|
960 |
seq_len: 512
|
961 |
size: 472
|
|
|
|
|
|
|
|
|
962 |
voyage-2:
|
|
|
|
|
|
|
963 |
dim: 1024
|
964 |
is_external: true
|
965 |
is_proprietary: true
|
966 |
is_sentence_transformers_compatible: false
|
|
|
967 |
link: https://docs.voyageai.com/embeddings/
|
968 |
-
seq_len:
|
969 |
size: null
|
970 |
-
voyage-code-2:
|
971 |
dim: 1536
|
972 |
is_external: true
|
973 |
is_proprietary: true
|
974 |
is_sentence_transformers_compatible: false
|
|
|
975 |
link: https://docs.voyageai.com/embeddings/
|
976 |
seq_len: 16000
|
977 |
size: null
|
978 |
-
voyage-large-2-instruct:
|
979 |
dim: 1024
|
980 |
is_external: true
|
981 |
is_proprietary: false
|
982 |
is_sentence_transformers_compatible: false
|
|
|
983 |
link: https://docs.voyageai.com/embeddings/
|
984 |
-
seq_len:
|
985 |
size: null
|
986 |
-
voyage-law-2:
|
987 |
dim: 1024
|
988 |
is_external: true
|
989 |
is_proprietary: true
|
990 |
is_sentence_transformers_compatible: false
|
|
|
991 |
link: https://docs.voyageai.com/embeddings/
|
992 |
seq_len: 4000
|
993 |
size: null
|
994 |
-
voyage-lite-01-instruct:
|
995 |
dim: 1024
|
996 |
is_external: true
|
997 |
is_proprietary: true
|
998 |
is_sentence_transformers_compatible: false
|
|
|
999 |
link: https://docs.voyageai.com/embeddings/
|
1000 |
seq_len: 4000
|
1001 |
-
size:
|
1002 |
-
voyage-lite-02-instruct:
|
1003 |
dim: 1024
|
1004 |
is_external: true
|
1005 |
is_proprietary: true
|
1006 |
is_sentence_transformers_compatible: false
|
1007 |
-
link: https://docs.voyageai.com/embeddings/
|
1008 |
-
seq_len: 4000
|
1009 |
-
size: 1220
|
1010 |
xlm-roberta-base:
|
1011 |
-
dim: 768
|
1012 |
-
is_external: true
|
1013 |
-
is_proprietary: false
|
1014 |
-
is_sentence_transformers_compatible: true
|
1015 |
link: https://huggingface.co/xlm-roberta-base
|
1016 |
seq_len: 514
|
1017 |
size: 279
|
1018 |
-
|
1019 |
-
dim: 1024
|
1020 |
is_external: true
|
1021 |
is_proprietary: false
|
1022 |
is_sentence_transformers_compatible: true
|
|
|
1023 |
link: https://huggingface.co/xlm-roberta-large
|
1024 |
seq_len: 514
|
1025 |
size: 560
|
|
|
|
|
|
|
|
|
1026 |
models_to_skip:
|
1027 |
- michaelfeil/ct2fast-e5-large-v2
|
1028 |
- McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
|
|
|
1 |
+
model_meta:
|
2 |
Baichuan-text-embedding:
|
|
|
|
|
|
|
|
|
3 |
link: https://platform.baichuan-ai.com/docs/text-Embedding
|
4 |
seq_len: 512
|
5 |
size: null
|
|
|
6 |
dim: 1024
|
7 |
is_external: true
|
8 |
is_proprietary: true
|
9 |
is_sentence_transformers_compatible: false
|
10 |
+
Cohere-embed-english-v3.0:
|
11 |
link: https://huggingface.co/Cohere/Cohere-embed-english-v3.0
|
12 |
seq_len: 512
|
13 |
size: null
|
14 |
+
dim: 1024
|
|
|
15 |
is_external: true
|
16 |
is_proprietary: true
|
17 |
is_sentence_transformers_compatible: false
|
18 |
+
Cohere-embed-multilingual-light-v3.0:
|
19 |
link: https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0
|
20 |
seq_len: 512
|
21 |
size: null
|
22 |
+
dim: 384
|
|
|
23 |
is_external: true
|
24 |
is_proprietary: true
|
25 |
is_sentence_transformers_compatible: false
|
26 |
+
Cohere-embed-multilingual-v3.0:
|
27 |
link: https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0
|
28 |
seq_len: 512
|
29 |
size: null
|
30 |
+
dim: 1024
|
|
|
31 |
is_external: true
|
32 |
+
is_proprietary: true
|
33 |
+
is_sentence_transformers_compatible: false
|
34 |
+
DanskBERT:
|
35 |
link: https://huggingface.co/vesteinn/DanskBERT
|
36 |
seq_len: 514
|
37 |
size: 125
|
38 |
+
dim: 768
|
|
|
39 |
is_external: true
|
40 |
is_proprietary: false
|
41 |
+
is_sentence_transformers_compatible: true
|
42 |
+
LASER2:
|
43 |
link: https://github.com/facebookresearch/LASER
|
44 |
seq_len: N/A
|
45 |
size: 43
|
46 |
+
dim: 1024
|
|
|
47 |
is_external: true
|
48 |
is_proprietary: false
|
49 |
is_sentence_transformers_compatible: false
|
50 |
+
LLM2Vec-Llama-supervised:
|
51 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised
|
52 |
seq_len: 4096
|
53 |
size: 6607
|
|
|
54 |
dim: 4096
|
55 |
is_external: true
|
56 |
is_proprietary: false
|
57 |
is_sentence_transformers_compatible: false
|
58 |
+
LLM2Vec-Llama-unsupervised:
|
59 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp
|
60 |
seq_len: 4096
|
61 |
size: 6607
|
|
|
62 |
dim: 4096
|
63 |
is_external: true
|
64 |
is_proprietary: false
|
65 |
is_sentence_transformers_compatible: false
|
66 |
+
LLM2Vec-Mistral-supervised:
|
67 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised
|
68 |
seq_len: 32768
|
69 |
size: 7111
|
|
|
70 |
dim: 4096
|
71 |
is_external: true
|
72 |
is_proprietary: false
|
73 |
is_sentence_transformers_compatible: false
|
74 |
+
LLM2Vec-Mistral-unsupervised:
|
75 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp
|
76 |
seq_len: 32768
|
77 |
size: 7111
|
78 |
+
dim: 4096
|
|
|
79 |
is_external: true
|
80 |
is_proprietary: false
|
81 |
is_sentence_transformers_compatible: false
|
82 |
+
LLM2Vec-Sheared-Llama-supervised:
|
83 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised
|
84 |
seq_len: 4096
|
85 |
size: 1280
|
|
|
86 |
dim: 2048
|
87 |
is_external: true
|
88 |
is_proprietary: false
|
89 |
is_sentence_transformers_compatible: false
|
90 |
+
LLM2Vec-Sheared-Llama-unsupervised:
|
91 |
link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp
|
92 |
seq_len: 4096
|
93 |
size: 1280
|
94 |
+
dim: 2048
|
|
|
95 |
is_external: true
|
96 |
is_proprietary: false
|
97 |
+
is_sentence_transformers_compatible: false
|
98 |
+
LaBSE:
|
99 |
link: https://huggingface.co/sentence-transformers/LaBSE
|
100 |
seq_len: 512
|
101 |
size: 471
|
102 |
+
dim: 768
|
103 |
+
is_external: true
|
104 |
+
is_proprietary: false
|
105 |
+
is_sentence_transformers_compatible: true
|
106 |
OpenSearch-text-hybrid:
|
107 |
+
link: https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval
|
108 |
+
seq_len: 512
|
109 |
+
size: null
|
110 |
dim: 1792
|
111 |
is_external: true
|
112 |
is_proprietary: true
|
113 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
114 |
all-MiniLM-L12-v2:
|
|
|
|
|
|
|
|
|
115 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
116 |
seq_len: 512
|
117 |
size: 33
|
|
|
118 |
dim: 384
|
119 |
is_external: true
|
120 |
is_proprietary: false
|
121 |
is_sentence_transformers_compatible: true
|
122 |
+
all-MiniLM-L6-v2:
|
123 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
|
124 |
seq_len: 512
|
125 |
size: 23
|
126 |
+
dim: 384
|
|
|
127 |
is_external: true
|
128 |
is_proprietary: false
|
129 |
is_sentence_transformers_compatible: true
|
130 |
+
all-mpnet-base-v2:
|
131 |
link: https://huggingface.co/sentence-transformers/all-mpnet-base-v2
|
132 |
seq_len: 514
|
133 |
size: 110
|
|
|
134 |
dim: 768
|
135 |
is_external: true
|
136 |
is_proprietary: false
|
137 |
is_sentence_transformers_compatible: true
|
138 |
+
allenai-specter:
|
139 |
link: https://huggingface.co/sentence-transformers/allenai-specter
|
140 |
seq_len: 512
|
141 |
size: 110
|
|
|
142 |
dim: 768
|
143 |
is_external: true
|
144 |
is_proprietary: false
|
145 |
is_sentence_transformers_compatible: true
|
146 |
+
bert-base-10lang-cased:
|
147 |
link: https://huggingface.co/Geotrend/bert-base-10lang-cased
|
148 |
seq_len: 512
|
149 |
size: 138
|
|
|
150 |
dim: 768
|
151 |
is_external: true
|
152 |
is_proprietary: false
|
153 |
is_sentence_transformers_compatible: true
|
154 |
+
bert-base-15lang-cased:
|
155 |
link: https://huggingface.co/Geotrend/bert-base-15lang-cased
|
156 |
seq_len: 512
|
157 |
size: 138
|
|
|
158 |
dim: 768
|
159 |
is_external: true
|
160 |
is_proprietary: false
|
161 |
is_sentence_transformers_compatible: true
|
162 |
+
bert-base-25lang-cased:
|
163 |
link: https://huggingface.co/Geotrend/bert-base-25lang-cased
|
164 |
seq_len: 512
|
165 |
size: 138
|
|
|
166 |
dim: 768
|
167 |
is_external: true
|
168 |
is_proprietary: false
|
169 |
is_sentence_transformers_compatible: true
|
170 |
+
bert-base-multilingual-cased:
|
171 |
link: https://huggingface.co/google-bert/bert-base-multilingual-cased
|
172 |
seq_len: 512
|
173 |
size: 179
|
|
|
174 |
dim: 768
|
175 |
is_external: true
|
176 |
is_proprietary: false
|
177 |
is_sentence_transformers_compatible: true
|
178 |
+
bert-base-multilingual-uncased:
|
179 |
link: https://huggingface.co/google-bert/bert-base-multilingual-uncased
|
180 |
seq_len: 512
|
181 |
size: 168
|
|
|
182 |
dim: 768
|
183 |
is_external: true
|
184 |
is_proprietary: false
|
185 |
is_sentence_transformers_compatible: true
|
186 |
+
bert-base-swedish-cased:
|
187 |
link: https://huggingface.co/KB/bert-base-swedish-cased
|
188 |
seq_len: 512
|
189 |
size: 125
|
|
|
190 |
dim: 768
|
191 |
is_external: true
|
192 |
is_proprietary: false
|
193 |
is_sentence_transformers_compatible: true
|
194 |
+
bert-base-uncased:
|
195 |
link: https://huggingface.co/bert-base-uncased
|
196 |
seq_len: 512
|
197 |
size: 110
|
|
|
198 |
dim: 768
|
199 |
is_external: true
|
200 |
is_proprietary: false
|
201 |
is_sentence_transformers_compatible: true
|
202 |
+
bge-base-zh-v1.5:
|
203 |
link: https://huggingface.co/BAAI/bge-base-zh-v1.5
|
204 |
seq_len: 512
|
205 |
size: 102
|
206 |
+
dim: 768
|
|
|
207 |
is_external: true
|
208 |
is_proprietary: false
|
209 |
+
is_sentence_transformers_compatible: true
|
210 |
+
bge-large-en-v1.5:
|
211 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
212 |
seq_len: 512
|
213 |
size: null
|
|
|
214 |
dim: 1024
|
215 |
is_external: true
|
216 |
is_proprietary: false
|
217 |
+
is_sentence_transformers_compatible: false
|
218 |
+
bge-large-zh-noinstruct:
|
219 |
link: https://huggingface.co/BAAI/bge-large-zh-noinstruct
|
220 |
seq_len: 512
|
221 |
size: 326
|
|
|
222 |
dim: 1024
|
223 |
is_external: true
|
224 |
is_proprietary: false
|
225 |
is_sentence_transformers_compatible: true
|
226 |
+
bge-large-zh-v1.5:
|
227 |
link: https://huggingface.co/BAAI/bge-large-zh-v1.5
|
228 |
seq_len: 512
|
229 |
size: 326
|
230 |
+
dim: 1024
|
|
|
231 |
is_external: true
|
232 |
is_proprietary: false
|
233 |
is_sentence_transformers_compatible: true
|
234 |
+
bge-small-zh-v1.5:
|
235 |
link: https://huggingface.co/BAAI/bge-small-zh-v1.5
|
236 |
seq_len: 512
|
237 |
size: 24
|
|
|
238 |
dim: 512
|
239 |
+
is_external: true
|
240 |
is_proprietary: false
|
241 |
is_sentence_transformers_compatible: true
|
242 |
+
camembert-base:
|
243 |
link: https://huggingface.co/almanach/camembert-base
|
244 |
seq_len: 512
|
245 |
size: 111
|
246 |
+
dim: 512
|
|
|
247 |
is_external: false
|
248 |
is_proprietary: false
|
249 |
is_sentence_transformers_compatible: true
|
250 |
+
camembert-large:
|
251 |
link: https://huggingface.co/almanach/camembert-large
|
252 |
seq_len: 512
|
253 |
size: 338
|
|
|
254 |
dim: 768
|
255 |
+
is_external: false
|
256 |
is_proprietary: false
|
257 |
is_sentence_transformers_compatible: true
|
258 |
+
contriever-base-msmarco:
|
259 |
link: https://huggingface.co/nthakur/contriever-base-msmarco
|
260 |
seq_len: 512
|
261 |
size: 110
|
|
|
262 |
dim: 768
|
263 |
is_external: true
|
264 |
is_proprietary: false
|
265 |
is_sentence_transformers_compatible: true
|
266 |
+
cross-en-de-roberta-sentence-transformer:
|
267 |
link: https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer
|
268 |
seq_len: 514
|
269 |
size: 278
|
270 |
+
dim: 768
|
|
|
271 |
is_external: true
|
272 |
is_proprietary: false
|
273 |
is_sentence_transformers_compatible: true
|
274 |
+
dfm-encoder-large-v1:
|
275 |
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
276 |
seq_len: 512
|
277 |
size: 355
|
|
|
278 |
dim: 1024
|
279 |
is_external: true
|
280 |
is_proprietary: false
|
281 |
is_sentence_transformers_compatible: true
|
282 |
+
dfm-sentence-encoder-large-1:
|
283 |
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
284 |
seq_len: 512
|
285 |
size: 355
|
286 |
+
dim: 1024
|
287 |
+
is_external: true
|
|
|
288 |
is_proprietary: false
|
289 |
is_sentence_transformers_compatible: true
|
290 |
+
distilbert-base-25lang-cased:
|
291 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
292 |
seq_len: 512
|
293 |
size: 110
|
|
|
294 |
dim: 768
|
295 |
is_external: false
|
296 |
is_proprietary: false
|
297 |
is_sentence_transformers_compatible: true
|
298 |
+
distilbert-base-en-fr-cased:
|
299 |
link: https://huggingface.co/Geotrend/distilbert-base-en-fr-cased
|
300 |
seq_len: 512
|
301 |
size: 110
|
|
|
302 |
dim: 768
|
303 |
is_external: false
|
304 |
is_proprietary: false
|
305 |
is_sentence_transformers_compatible: true
|
306 |
+
distilbert-base-en-fr-es-pt-it-cased:
|
307 |
link: https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased
|
308 |
seq_len: 512
|
309 |
size: 110
|
|
|
310 |
dim: 768
|
311 |
is_external: false
|
312 |
is_proprietary: false
|
313 |
is_sentence_transformers_compatible: true
|
314 |
+
distilbert-base-fr-cased:
|
315 |
link: https://huggingface.co/Geotrend/distilbert-base-fr-cased
|
316 |
seq_len: 512
|
317 |
size: 110
|
|
|
318 |
dim: 768
|
319 |
is_external: false
|
320 |
is_proprietary: false
|
321 |
is_sentence_transformers_compatible: true
|
322 |
+
distilbert-base-uncased:
|
323 |
link: https://huggingface.co/distilbert-base-uncased
|
324 |
seq_len: 512
|
325 |
size: 110
|
326 |
+
dim: 768
|
327 |
+
is_external: false
|
|
|
328 |
is_proprietary: false
|
329 |
is_sentence_transformers_compatible: true
|
330 |
+
distiluse-base-multilingual-cased-v2:
|
331 |
link: https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2
|
332 |
seq_len: 512
|
333 |
size: 135
|
334 |
+
dim: 512
|
|
|
335 |
is_external: true
|
336 |
is_proprietary: false
|
337 |
is_sentence_transformers_compatible: true
|
338 |
+
e5-base:
|
339 |
link: https://huggingface.co/intfloat/e5-base
|
340 |
seq_len: 512
|
341 |
size: 110
|
342 |
+
dim: 768
|
|
|
343 |
is_external: true
|
344 |
is_proprietary: false
|
345 |
is_sentence_transformers_compatible: true
|
346 |
+
e5-large:
|
347 |
link: https://huggingface.co/intfloat/e5-large
|
348 |
seq_len: 512
|
349 |
size: 335
|
350 |
+
dim: 1024
|
|
|
351 |
is_external: true
|
352 |
is_proprietary: false
|
353 |
is_sentence_transformers_compatible: true
|
354 |
+
e5-mistral-7b-instruct:
|
355 |
link: https://huggingface.co/intfloat/e5-mistral-7b-instruct
|
356 |
seq_len: 32768
|
357 |
size: 7111
|
358 |
+
dim: 4096
|
|
|
359 |
is_external: true
|
360 |
is_proprietary: false
|
361 |
is_sentence_transformers_compatible: true
|
362 |
+
e5-small:
|
363 |
link: https://huggingface.co/intfloat/e5-small
|
364 |
seq_len: 512
|
365 |
size: 33
|
366 |
+
dim: 384
|
|
|
367 |
is_external: true
|
368 |
is_proprietary: false
|
369 |
is_sentence_transformers_compatible: true
|
370 |
+
electra-small-nordic:
|
371 |
link: https://huggingface.co/jonfd/electra-small-nordic
|
372 |
seq_len: 512
|
373 |
size: 23
|
|
|
374 |
dim: 256
|
375 |
is_external: true
|
376 |
is_proprietary: false
|
377 |
is_sentence_transformers_compatible: true
|
378 |
+
electra-small-swedish-cased-discriminator:
|
379 |
link: https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator
|
380 |
seq_len: 512
|
381 |
size: 16
|
382 |
+
dim: 256
|
|
|
383 |
is_external: true
|
384 |
is_proprietary: false
|
385 |
is_sentence_transformers_compatible: true
|
386 |
+
flaubert_base_cased:
|
387 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
388 |
seq_len: 512
|
389 |
size: 138
|
|
|
390 |
dim: 768
|
391 |
is_external: true
|
392 |
is_proprietary: false
|
393 |
is_sentence_transformers_compatible: true
|
394 |
+
flaubert_base_uncased:
|
395 |
link: https://huggingface.co/flaubert/flaubert_base_uncased
|
396 |
seq_len: 512
|
397 |
size: 138
|
398 |
+
dim: 768
|
|
|
399 |
is_external: true
|
400 |
is_proprietary: false
|
401 |
is_sentence_transformers_compatible: true
|
402 |
+
flaubert_large_cased:
|
403 |
link: https://huggingface.co/flaubert/flaubert_large_cased
|
404 |
seq_len: 512
|
405 |
size: 372
|
406 |
+
dim: 1024
|
|
|
407 |
is_external: true
|
408 |
is_proprietary: false
|
409 |
is_sentence_transformers_compatible: true
|
410 |
+
gbert-base:
|
411 |
link: https://huggingface.co/deepset/gbert-base
|
412 |
seq_len: 512
|
413 |
size: 110
|
414 |
+
dim: 768
|
|
|
415 |
is_external: true
|
416 |
is_proprietary: false
|
417 |
is_sentence_transformers_compatible: true
|
418 |
+
gbert-large:
|
419 |
link: https://huggingface.co/deepset/gbert-large
|
420 |
seq_len: 512
|
421 |
size: 337
|
422 |
+
dim: 1024
|
|
|
423 |
is_external: true
|
424 |
is_proprietary: false
|
425 |
is_sentence_transformers_compatible: true
|
426 |
+
gelectra-base:
|
427 |
link: https://huggingface.co/deepset/gelectra-base
|
428 |
seq_len: 512
|
429 |
size: 110
|
430 |
+
dim: 768
|
|
|
431 |
is_external: true
|
432 |
is_proprietary: false
|
433 |
is_sentence_transformers_compatible: true
|
434 |
+
gelectra-large:
|
435 |
link: https://huggingface.co/deepset/gelectra-large
|
436 |
seq_len: 512
|
437 |
size: 335
|
438 |
+
dim: 1024
|
|
|
439 |
is_external: true
|
440 |
is_proprietary: false
|
441 |
is_sentence_transformers_compatible: true
|
442 |
+
glove.6B.300d:
|
443 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d
|
444 |
seq_len: N/A
|
445 |
size: 120
|
446 |
+
dim: 300
|
447 |
+
is_external: true
|
448 |
+
is_proprietary: false
|
449 |
+
is_sentence_transformers_compatible: true
|
450 |
google-gecko-256.text-embedding-preview-0409:
|
451 |
+
link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
|
452 |
+
seq_len: 2048
|
453 |
+
size: 1200
|
454 |
dim: 256
|
455 |
is_external: true
|
456 |
is_proprietary: true
|
457 |
is_sentence_transformers_compatible: false
|
458 |
+
google-gecko.text-embedding-preview-0409:
|
459 |
link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
|
460 |
seq_len: 2048
|
461 |
size: 1200
|
|
|
462 |
dim: 768
|
463 |
is_external: true
|
464 |
is_proprietary: true
|
465 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
466 |
gottbert-base:
|
|
|
|
|
|
|
|
|
467 |
link: https://huggingface.co/uklfr/gottbert-base
|
468 |
seq_len: 512
|
469 |
size: 127
|
|
|
470 |
dim: 768
|
471 |
is_external: true
|
472 |
is_proprietary: false
|
473 |
is_sentence_transformers_compatible: true
|
474 |
+
gtr-t5-base:
|
475 |
link: https://huggingface.co/sentence-transformers/gtr-t5-base
|
476 |
seq_len: 512
|
477 |
size: 110
|
|
|
478 |
dim: 768
|
479 |
is_external: true
|
480 |
is_proprietary: false
|
481 |
is_sentence_transformers_compatible: true
|
482 |
+
gtr-t5-large:
|
483 |
link: https://huggingface.co/sentence-transformers/gtr-t5-large
|
484 |
seq_len: 512
|
485 |
size: 168
|
|
|
486 |
dim: 768
|
487 |
is_external: true
|
488 |
is_proprietary: false
|
489 |
is_sentence_transformers_compatible: true
|
490 |
+
gtr-t5-xl:
|
491 |
link: https://huggingface.co/sentence-transformers/gtr-t5-xl
|
492 |
seq_len: 512
|
493 |
size: 1240
|
|
|
494 |
dim: 768
|
495 |
is_external: true
|
496 |
is_proprietary: false
|
497 |
is_sentence_transformers_compatible: true
|
498 |
+
gtr-t5-xxl:
|
499 |
link: https://huggingface.co/sentence-transformers/gtr-t5-xxl
|
500 |
seq_len: 512
|
501 |
size: 4865
|
|
|
502 |
dim: 768
|
503 |
is_external: true
|
504 |
is_proprietary: false
|
505 |
is_sentence_transformers_compatible: true
|
506 |
+
herbert-base-retrieval-v2:
|
507 |
link: https://huggingface.co/ipipan/herbert-base-retrieval-v2
|
508 |
seq_len: 514
|
509 |
size: 125
|
510 |
+
dim: 768
|
|
|
511 |
is_external: true
|
512 |
is_proprietary: false
|
513 |
is_sentence_transformers_compatible: true
|
514 |
+
komninos:
|
515 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
516 |
seq_len: N/A
|
517 |
size: 134
|
518 |
+
dim: 300
|
|
|
519 |
is_external: true
|
520 |
is_proprietary: false
|
521 |
is_sentence_transformers_compatible: true
|
522 |
+
luotuo-bert-medium:
|
523 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
524 |
seq_len: 512
|
525 |
size: 328
|
|
|
526 |
dim: 768
|
527 |
is_external: true
|
528 |
is_proprietary: false
|
529 |
is_sentence_transformers_compatible: true
|
530 |
+
m3e-base:
|
531 |
link: https://huggingface.co/moka-ai/m3e-base
|
532 |
seq_len: 512
|
533 |
size: 102
|
|
|
534 |
dim: 768
|
535 |
is_external: true
|
536 |
is_proprietary: false
|
537 |
is_sentence_transformers_compatible: true
|
538 |
+
m3e-large:
|
539 |
link: https://huggingface.co/moka-ai/m3e-large
|
540 |
seq_len: 512
|
541 |
size: 102
|
542 |
+
dim: 768
|
543 |
+
is_external: true
|
544 |
+
is_proprietary: false
|
545 |
+
is_sentence_transformers_compatible: true
|
546 |
mistral-embed:
|
547 |
+
link: https://docs.mistral.ai/guides/embeddings
|
548 |
+
seq_len: null
|
549 |
+
size: null
|
550 |
dim: 1024
|
551 |
is_external: true
|
552 |
is_proprietary: true
|
553 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
554 |
msmarco-bert-co-condensor:
|
|
|
|
|
|
|
|
|
555 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
556 |
seq_len: 512
|
557 |
size: 110
|
558 |
+
dim: 768
|
|
|
559 |
is_external: true
|
560 |
is_proprietary: false
|
561 |
is_sentence_transformers_compatible: true
|
562 |
+
multi-qa-MiniLM-L6-cos-v1:
|
563 |
link: https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
564 |
seq_len: 512
|
565 |
size: 23
|
566 |
+
dim: 384
|
|
|
567 |
is_external: true
|
568 |
is_proprietary: false
|
569 |
is_sentence_transformers_compatible: true
|
570 |
+
multilingual-e5-base:
|
571 |
link: https://huggingface.co/intfloat/multilingual-e5-base
|
572 |
seq_len: 514
|
573 |
size: 278
|
574 |
+
dim: 768
|
|
|
575 |
is_external: true
|
576 |
is_proprietary: false
|
577 |
is_sentence_transformers_compatible: true
|
578 |
+
multilingual-e5-large:
|
579 |
link: https://huggingface.co/intfloat/multilingual-e5-large
|
580 |
seq_len: 514
|
581 |
size: 560
|
582 |
+
dim: 1024
|
|
|
583 |
is_external: true
|
584 |
is_proprietary: false
|
585 |
is_sentence_transformers_compatible: true
|
586 |
+
multilingual-e5-small:
|
587 |
link: https://huggingface.co/intfloat/multilingual-e5-small
|
588 |
seq_len: 512
|
589 |
size: 118
|
590 |
+
dim: 384
|
|
|
591 |
is_external: true
|
592 |
is_proprietary: false
|
593 |
is_sentence_transformers_compatible: true
|
594 |
+
nb-bert-base:
|
595 |
link: https://huggingface.co/NbAiLab/nb-bert-base
|
596 |
seq_len: 512
|
597 |
size: 179
|
598 |
+
dim: 768
|
|
|
599 |
is_external: true
|
600 |
is_proprietary: false
|
601 |
is_sentence_transformers_compatible: true
|
602 |
+
nb-bert-large:
|
603 |
link: https://huggingface.co/NbAiLab/nb-bert-large
|
604 |
seq_len: 512
|
605 |
size: 355
|
606 |
+
dim: 1024
|
|
|
607 |
is_external: true
|
608 |
is_proprietary: false
|
609 |
is_sentence_transformers_compatible: true
|
610 |
+
nomic-embed-text-v1.5-128:
|
611 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
612 |
seq_len: 8192
|
613 |
size: 138
|
614 |
+
dim: 128
|
|
|
615 |
is_external: true
|
616 |
is_proprietary: false
|
617 |
is_sentence_transformers_compatible: true
|
618 |
+
nomic-embed-text-v1.5-256:
|
619 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
620 |
seq_len: 8192
|
621 |
size: 138
|
622 |
+
dim: 256
|
|
|
623 |
is_external: true
|
624 |
is_proprietary: false
|
625 |
is_sentence_transformers_compatible: true
|
626 |
+
nomic-embed-text-v1.5-512:
|
627 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
628 |
seq_len: 8192
|
629 |
size: 138
|
630 |
+
dim: 512
|
|
|
631 |
is_external: true
|
632 |
is_proprietary: false
|
633 |
is_sentence_transformers_compatible: true
|
634 |
+
nomic-embed-text-v1.5-64:
|
635 |
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
|
636 |
seq_len: 8192
|
637 |
size: 138
|
638 |
+
dim: 64
|
|
|
639 |
is_external: true
|
640 |
is_proprietary: false
|
641 |
is_sentence_transformers_compatible: true
|
642 |
+
norbert3-base:
|
643 |
link: https://huggingface.co/ltg/norbert3-base
|
644 |
seq_len: 512
|
645 |
size: 131
|
646 |
+
dim: 768
|
|
|
647 |
is_external: true
|
648 |
is_proprietary: false
|
649 |
is_sentence_transformers_compatible: true
|
650 |
+
norbert3-large:
|
651 |
link: https://huggingface.co/ltg/norbert3-large
|
652 |
seq_len: 512
|
653 |
size: 368
|
654 |
+
dim: 1024
|
|
|
655 |
is_external: true
|
656 |
is_proprietary: false
|
657 |
is_sentence_transformers_compatible: true
|
658 |
+
paraphrase-multilingual-MiniLM-L12-v2:
|
659 |
link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
660 |
seq_len: 512
|
661 |
size: 118
|
662 |
+
dim: 384
|
|
|
663 |
is_external: true
|
664 |
is_proprietary: false
|
665 |
is_sentence_transformers_compatible: true
|
666 |
+
paraphrase-multilingual-mpnet-base-v2:
|
667 |
link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
668 |
seq_len: 514
|
669 |
size: 278
|
|
|
670 |
dim: 768
|
671 |
is_external: true
|
672 |
is_proprietary: false
|
673 |
is_sentence_transformers_compatible: true
|
674 |
+
sentence-bert-swedish-cased:
|
675 |
link: https://huggingface.co/KBLab/sentence-bert-swedish-cased
|
676 |
seq_len: 512
|
677 |
size: 125
|
|
|
678 |
dim: 768
|
679 |
is_external: true
|
680 |
is_proprietary: false
|
681 |
is_sentence_transformers_compatible: true
|
682 |
+
sentence-camembert-base:
|
683 |
link: https://huggingface.co/dangvantuan/sentence-camembert-base
|
684 |
seq_len: 512
|
685 |
size: 110
|
686 |
+
dim: 768
|
|
|
687 |
is_external: true
|
688 |
is_proprietary: false
|
689 |
is_sentence_transformers_compatible: true
|
690 |
+
sentence-camembert-large:
|
691 |
link: https://huggingface.co/dangvantuan/sentence-camembert-large
|
692 |
seq_len: 512
|
693 |
size: 337
|
694 |
+
dim: 1024
|
|
|
695 |
is_external: true
|
696 |
is_proprietary: false
|
697 |
is_sentence_transformers_compatible: true
|
698 |
+
sentence-croissant-llm-base:
|
699 |
link: https://huggingface.co/Wissam42/sentence-croissant-llm-base
|
700 |
seq_len: 2048
|
701 |
size: 1280
|
702 |
+
dim: 2048
|
|
|
703 |
is_external: true
|
704 |
is_proprietary: false
|
705 |
is_sentence_transformers_compatible: true
|
706 |
+
sentence-t5-base:
|
707 |
link: https://huggingface.co/sentence-transformers/sentence-t5-base
|
708 |
seq_len: 512
|
709 |
size: 110
|
|
|
710 |
dim: 768
|
711 |
is_external: true
|
712 |
is_proprietary: false
|
713 |
is_sentence_transformers_compatible: true
|
714 |
+
sentence-t5-large:
|
715 |
link: https://huggingface.co/sentence-transformers/sentence-t5-large
|
716 |
seq_len: 512
|
717 |
size: 168
|
|
|
718 |
dim: 768
|
719 |
is_external: true
|
720 |
is_proprietary: false
|
721 |
is_sentence_transformers_compatible: true
|
722 |
+
sentence-t5-xl:
|
723 |
link: https://huggingface.co/sentence-transformers/sentence-t5-xl
|
724 |
seq_len: 512
|
725 |
size: 1240
|
|
|
726 |
dim: 768
|
727 |
is_external: true
|
728 |
is_proprietary: false
|
729 |
is_sentence_transformers_compatible: true
|
730 |
+
sentence-t5-xxl:
|
731 |
link: https://huggingface.co/sentence-transformers/sentence-t5-xxl
|
732 |
seq_len: 512
|
733 |
size: 4865
|
|
|
734 |
dim: 768
|
735 |
is_external: true
|
736 |
is_proprietary: false
|
737 |
is_sentence_transformers_compatible: true
|
738 |
+
silver-retriever-base-v1:
|
739 |
link: https://huggingface.co/ipipan/silver-retriever-base-v1
|
740 |
seq_len: 514
|
741 |
size: 125
|
|
|
742 |
dim: 768
|
743 |
is_external: true
|
744 |
is_proprietary: false
|
745 |
is_sentence_transformers_compatible: true
|
746 |
+
st-polish-paraphrase-from-distilroberta:
|
747 |
link: https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta
|
748 |
seq_len: 514
|
749 |
size: 125
|
|
|
750 |
dim: 768
|
751 |
is_external: true
|
752 |
is_proprietary: false
|
753 |
is_sentence_transformers_compatible: true
|
754 |
+
st-polish-paraphrase-from-mpnet:
|
755 |
link: https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet
|
756 |
seq_len: 514
|
757 |
size: 125
|
|
|
758 |
dim: 768
|
759 |
is_external: true
|
760 |
is_proprietary: false
|
761 |
is_sentence_transformers_compatible: true
|
762 |
+
sup-simcse-bert-base-uncased:
|
763 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
764 |
seq_len: 512
|
765 |
size: 110
|
766 |
+
dim: 768
|
767 |
+
is_external: true
|
768 |
+
is_proprietary: false
|
769 |
+
is_sentence_transformers_compatible: true
|
770 |
text-embedding-3-large:
|
771 |
+
link: https://openai.com/blog/new-embedding-models-and-api-updates
|
772 |
+
seq_len: 8191
|
773 |
+
size: null
|
774 |
dim: 3072
|
775 |
is_external: true
|
776 |
is_proprietary: true
|
777 |
is_sentence_transformers_compatible: false
|
778 |
+
text-embedding-3-large-256:
|
779 |
link: https://openai.com/blog/new-embedding-models-and-api-updates
|
780 |
seq_len: 8191
|
781 |
size: null
|
|
|
782 |
dim: 256
|
783 |
is_external: true
|
784 |
is_proprietary: true
|
785 |
is_sentence_transformers_compatible: false
|
786 |
+
text-embedding-3-small:
|
787 |
link: https://openai.com/blog/new-embedding-models-and-api-updates
|
788 |
seq_len: 8191
|
789 |
size: null
|
|
|
790 |
dim: 1536
|
791 |
is_external: true
|
792 |
is_proprietary: true
|
793 |
is_sentence_transformers_compatible: false
|
794 |
+
text-embedding-ada-002:
|
795 |
+
link: https://openai.com/blog/new-and-improved-embedding-model
|
796 |
seq_len: 8191
|
797 |
size: null
|
|
|
798 |
dim: 1536
|
799 |
is_external: true
|
800 |
is_proprietary: true
|
801 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
802 |
text-search-ada-001:
|
803 |
+
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
804 |
+
seq_len: 2046
|
805 |
+
size: null
|
806 |
dim: 1024
|
807 |
is_external: true
|
808 |
is_proprietary: true
|
809 |
is_sentence_transformers_compatible: false
|
810 |
+
text-search-ada-doc-001:
|
811 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
812 |
seq_len: 2046
|
813 |
size: null
|
|
|
814 |
dim: 1024
|
815 |
is_external: true
|
816 |
is_proprietary: true
|
817 |
is_sentence_transformers_compatible: false
|
818 |
+
text-search-ada-query-001:
|
819 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
820 |
seq_len: 2046
|
821 |
size: null
|
|
|
822 |
dim: 1024
|
823 |
is_external: false
|
824 |
is_proprietary: true
|
825 |
is_sentence_transformers_compatible: false
|
826 |
+
text-search-babbage-001:
|
827 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
828 |
seq_len: 2046
|
829 |
size: null
|
|
|
830 |
dim: 2048
|
831 |
is_external: true
|
832 |
is_proprietary: true
|
833 |
is_sentence_transformers_compatible: false
|
834 |
+
text-search-curie-001:
|
835 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
836 |
seq_len: 2046
|
837 |
size: null
|
|
|
838 |
dim: 4096
|
839 |
is_external: true
|
840 |
is_proprietary: true
|
841 |
is_sentence_transformers_compatible: false
|
842 |
+
text-search-davinci-001:
|
843 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
844 |
seq_len: 2046
|
845 |
size: null
|
|
|
846 |
dim: 12288
|
847 |
is_external: true
|
848 |
is_proprietary: true
|
849 |
is_sentence_transformers_compatible: false
|
850 |
+
text-similarity-ada-001:
|
851 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
852 |
seq_len: 2046
|
853 |
size: null
|
|
|
854 |
dim: 1024
|
855 |
is_external: true
|
856 |
is_proprietary: true
|
857 |
is_sentence_transformers_compatible: false
|
858 |
+
text-similarity-babbage-001:
|
859 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
860 |
seq_len: 2046
|
861 |
size: null
|
|
|
862 |
dim: 2048
|
863 |
is_external: true
|
864 |
is_proprietary: true
|
865 |
is_sentence_transformers_compatible: false
|
866 |
+
text-similarity-curie-001:
|
867 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
868 |
seq_len: 2046
|
869 |
size: null
|
|
|
870 |
dim: 4096
|
871 |
is_external: true
|
872 |
is_proprietary: true
|
873 |
is_sentence_transformers_compatible: false
|
874 |
+
text-similarity-davinci-001:
|
875 |
link: https://openai.com/blog/introducing-text-and-code-embeddings
|
876 |
seq_len: 2046
|
877 |
size: null
|
|
|
878 |
dim: 12288
|
879 |
is_external: true
|
880 |
is_proprietary: true
|
881 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
882 |
text2vec-base-chinese:
|
883 |
+
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
884 |
+
seq_len: 512
|
885 |
+
size: 102
|
886 |
dim: 768
|
887 |
is_external: true
|
888 |
is_proprietary: false
|
889 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
890 |
text2vec-base-multilingual:
|
891 |
+
link: null
|
892 |
+
seq_len: null
|
893 |
+
size: null
|
894 |
dim: null
|
895 |
is_external: true
|
896 |
is_proprietary: false
|
897 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
898 |
text2vec-large-chinese:
|
899 |
+
link: https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
900 |
+
seq_len: 512
|
901 |
+
size: 326
|
902 |
dim: 1024
|
903 |
is_external: true
|
904 |
is_proprietary: false
|
905 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
906 |
titan-embed-text-v1:
|
|
|
|
|
|
|
|
|
907 |
link: https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html
|
908 |
seq_len: 8000
|
909 |
size: null
|
|
|
910 |
dim: 1536
|
911 |
is_external: true
|
912 |
+
is_proprietary: true
|
913 |
+
is_sentence_transformers_compatible: false
|
914 |
+
udever-bloom-1b1:
|
915 |
link: https://huggingface.co/izhx/udever-bloom-1b1
|
916 |
seq_len: 2048
|
917 |
size: null
|
918 |
+
dim: 1536
|
|
|
919 |
is_external: true
|
920 |
is_proprietary: false
|
921 |
is_sentence_transformers_compatible: true
|
922 |
+
udever-bloom-560m:
|
923 |
link: https://huggingface.co/izhx/udever-bloom-560m
|
924 |
seq_len: 2048
|
925 |
size: null
|
926 |
+
dim: 1024
|
|
|
927 |
is_external: true
|
928 |
is_proprietary: false
|
929 |
is_sentence_transformers_compatible: true
|
930 |
+
universal-sentence-encoder-multilingual-3:
|
931 |
link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3
|
932 |
seq_len: 512
|
933 |
size: null
|
|
|
934 |
dim: 512
|
935 |
is_external: true
|
936 |
is_proprietary: false
|
937 |
is_sentence_transformers_compatible: true
|
938 |
+
universal-sentence-encoder-multilingual-large-3:
|
939 |
link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3
|
940 |
seq_len: 512
|
941 |
size: null
|
942 |
+
dim: 512
|
|
|
943 |
is_external: true
|
944 |
is_proprietary: false
|
945 |
is_sentence_transformers_compatible: true
|
946 |
+
unsup-simcse-bert-base-uncased:
|
947 |
link: https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased
|
948 |
seq_len: 512
|
949 |
size: 110
|
|
|
950 |
dim: 768
|
951 |
is_external: true
|
952 |
is_proprietary: false
|
953 |
is_sentence_transformers_compatible: true
|
954 |
+
use-cmlm-multilingual:
|
955 |
link: https://huggingface.co/sentence-transformers/use-cmlm-multilingual
|
956 |
seq_len: 512
|
957 |
size: 472
|
958 |
+
dim: 768
|
959 |
+
is_external: true
|
960 |
+
is_proprietary: false
|
961 |
+
is_sentence_transformers_compatible: true
|
962 |
voyage-2:
|
963 |
+
link: https://docs.voyageai.com/embeddings/
|
964 |
+
seq_len: 1024
|
965 |
+
size: null
|
966 |
dim: 1024
|
967 |
is_external: true
|
968 |
is_proprietary: true
|
969 |
is_sentence_transformers_compatible: false
|
970 |
+
voyage-code-2:
|
971 |
link: https://docs.voyageai.com/embeddings/
|
972 |
+
seq_len: 16000
|
973 |
size: null
|
|
|
974 |
dim: 1536
|
975 |
is_external: true
|
976 |
is_proprietary: true
|
977 |
is_sentence_transformers_compatible: false
|
978 |
+
voyage-large-2-instruct:
|
979 |
link: https://docs.voyageai.com/embeddings/
|
980 |
seq_len: 16000
|
981 |
size: null
|
|
|
982 |
dim: 1024
|
983 |
is_external: true
|
984 |
is_proprietary: false
|
985 |
is_sentence_transformers_compatible: false
|
986 |
+
voyage-law-2:
|
987 |
link: https://docs.voyageai.com/embeddings/
|
988 |
+
seq_len: 4000
|
989 |
size: null
|
|
|
990 |
dim: 1024
|
991 |
is_external: true
|
992 |
is_proprietary: true
|
993 |
is_sentence_transformers_compatible: false
|
994 |
+
voyage-lite-01-instruct:
|
995 |
link: https://docs.voyageai.com/embeddings/
|
996 |
seq_len: 4000
|
997 |
size: null
|
|
|
998 |
dim: 1024
|
999 |
is_external: true
|
1000 |
is_proprietary: true
|
1001 |
is_sentence_transformers_compatible: false
|
1002 |
+
voyage-lite-02-instruct:
|
1003 |
link: https://docs.voyageai.com/embeddings/
|
1004 |
seq_len: 4000
|
1005 |
+
size: 1220
|
|
|
1006 |
dim: 1024
|
1007 |
is_external: true
|
1008 |
is_proprietary: true
|
1009 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
1010 |
xlm-roberta-base:
|
|
|
|
|
|
|
|
|
1011 |
link: https://huggingface.co/xlm-roberta-base
|
1012 |
seq_len: 514
|
1013 |
size: 279
|
1014 |
+
dim: 768
|
|
|
1015 |
is_external: true
|
1016 |
is_proprietary: false
|
1017 |
is_sentence_transformers_compatible: true
|
1018 |
+
xlm-roberta-large:
|
1019 |
link: https://huggingface.co/xlm-roberta-large
|
1020 |
seq_len: 514
|
1021 |
size: 560
|
1022 |
+
dim: 1024
|
1023 |
+
is_external: true
|
1024 |
+
is_proprietary: false
|
1025 |
+
is_sentence_transformers_compatible: true
|
1026 |
models_to_skip:
|
1027 |
- michaelfeil/ct2fast-e5-large-v2
|
1028 |
- McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
|