Spaces:
Running
Running
Muennighoff
commited on
Commit
•
735975d
1
Parent(s):
27a30e1
Update conf
Browse files- config.yaml +31 -31
- model_meta.yaml +62 -9
config.yaml
CHANGED
@@ -365,6 +365,37 @@ boards:
|
|
365 |
- CDSC-R
|
366 |
- SICK-R-PL
|
367 |
- STS22 (pl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
se:
|
369 |
title: Swedish
|
370 |
language_long: Swedish
|
@@ -452,34 +483,3 @@ boards:
|
|
452 |
- BrightRetrieval (aops)
|
453 |
- BrightRetrieval (theoremqa_theorems)
|
454 |
- BrightRetrieval (theoremqa_questions)
|
455 |
-
ru:
|
456 |
-
title: Russian
|
457 |
-
language_long: "Russian"
|
458 |
-
has_overall: true
|
459 |
-
acronym: null
|
460 |
-
icon: "🇷🇺"
|
461 |
-
special_icons: null
|
462 |
-
credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
|
463 |
-
tasks:
|
464 |
-
Classification:
|
465 |
-
- GeoreviewClassification
|
466 |
-
- HeadlineClassification
|
467 |
-
- InappropriatenessClassification
|
468 |
-
- KinopoiskClassification
|
469 |
-
- RuReviewsClassification
|
470 |
-
- RuSciBenchGRNTIClassification
|
471 |
-
- RuSciBenchOECDClassification
|
472 |
-
Clustering:
|
473 |
-
- GeoreviewClusteringP2P
|
474 |
-
- RuSciBenchGRNTIClusteringP2P
|
475 |
-
- RuSciBenchOECDClusteringP2P
|
476 |
-
PairClassification:
|
477 |
-
- TERRa
|
478 |
-
Reranking:
|
479 |
-
- RuBQReranking
|
480 |
-
Retrieval:
|
481 |
-
- RiaNewsRetrieval
|
482 |
-
- RuBQRetrieval
|
483 |
-
STS:
|
484 |
-
- RUParaPhraserSTS
|
485 |
-
- RuSTSBenchmarkSTS
|
|
|
365 |
- CDSC-R
|
366 |
- SICK-R-PL
|
367 |
- STS22 (pl)
|
368 |
+
ru:
|
369 |
+
title: Russian
|
370 |
+
language_long: "Russian"
|
371 |
+
has_overall: true
|
372 |
+
acronym: null
|
373 |
+
icon: "🇷🇺"
|
374 |
+
special_icons: null
|
375 |
+
credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
|
376 |
+
tasks:
|
377 |
+
Classification:
|
378 |
+
- GeoreviewClassification
|
379 |
+
- HeadlineClassification
|
380 |
+
- InappropriatenessClassification
|
381 |
+
- KinopoiskClassification
|
382 |
+
- RuReviewsClassification
|
383 |
+
- RuSciBenchGRNTIClassification
|
384 |
+
- RuSciBenchOECDClassification
|
385 |
+
Clustering:
|
386 |
+
- GeoreviewClusteringP2P
|
387 |
+
- RuSciBenchGRNTIClusteringP2P
|
388 |
+
- RuSciBenchOECDClusteringP2P
|
389 |
+
PairClassification:
|
390 |
+
- TERRa
|
391 |
+
Reranking:
|
392 |
+
- RuBQReranking
|
393 |
+
Retrieval:
|
394 |
+
- RiaNewsRetrieval
|
395 |
+
- RuBQRetrieval
|
396 |
+
STS:
|
397 |
+
- RUParaPhraserSTS
|
398 |
+
- RuSTSBenchmarkSTS
|
399 |
se:
|
400 |
title: Swedish
|
401 |
language_long: Swedish
|
|
|
483 |
- BrightRetrieval (aops)
|
484 |
- BrightRetrieval (theoremqa_theorems)
|
485 |
- BrightRetrieval (theoremqa_questions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_meta.yaml
CHANGED
@@ -212,6 +212,15 @@ model_meta:
|
|
212 |
is_proprietary: false
|
213 |
is_sentence_transformers_compatible: true
|
214 |
uses_instruct: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
all-MiniLM-L12-v2:
|
216 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
217 |
seq_len: 512
|
@@ -356,6 +365,24 @@ model_meta:
|
|
356 |
is_proprietary: false
|
357 |
is_sentence_transformers_compatible: true
|
358 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
bge-large-en-v1.5:
|
360 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
361 |
seq_len: 512
|
@@ -514,15 +541,6 @@ model_meta:
|
|
514 |
is_proprietary: false
|
515 |
is_sentence_transformers_compatible: true
|
516 |
uses_instruct: false
|
517 |
-
dfm-sentence-encoder-large-1:
|
518 |
-
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
519 |
-
seq_len: 512
|
520 |
-
size: 355
|
521 |
-
dim: 1024
|
522 |
-
is_external: false # no result in results repo
|
523 |
-
is_proprietary: false
|
524 |
-
is_sentence_transformers_compatible: true
|
525 |
-
uses_instruct: false
|
526 |
distilbert-base-25lang-cased:
|
527 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
528 |
seq_len: 512
|
@@ -820,6 +838,15 @@ model_meta:
|
|
820 |
is_proprietary: false
|
821 |
is_sentence_transformers_compatible: true
|
822 |
uses_instruct: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
823 |
gte-Qwen2-7B-instruct:
|
824 |
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
|
825 |
seq_len: 32768
|
@@ -1243,6 +1270,24 @@ model_meta:
|
|
1243 |
is_proprietary: false
|
1244 |
is_sentence_transformers_compatible: true
|
1245 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1246 |
sup-simcse-bert-base-uncased:
|
1247 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
1248 |
seq_len: 512
|
@@ -1874,6 +1919,14 @@ models_to_skip:
|
|
1874 |
- liddlefish/privacy_embedding_bge_small_synthetic
|
1875 |
- mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
|
1876 |
- leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1877 |
cross_encoders:
|
1878 |
- FollowIR-7B
|
1879 |
- flan-t5-base
|
|
|
212 |
is_proprietary: false
|
213 |
is_sentence_transformers_compatible: true
|
214 |
uses_instruct: true
|
215 |
+
SFR-Embedding-2_R:
|
216 |
+
link: https://huggingface.co/Salesforce/SFR-Embedding-2_R
|
217 |
+
seq_len: 32768
|
218 |
+
size: 7111
|
219 |
+
dim: 4096
|
220 |
+
is_external: false
|
221 |
+
is_proprietary: false
|
222 |
+
is_sentence_transformers_compatible: true
|
223 |
+
uses_instruct: true
|
224 |
all-MiniLM-L12-v2:
|
225 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
226 |
seq_len: 512
|
|
|
365 |
is_proprietary: false
|
366 |
is_sentence_transformers_compatible: true
|
367 |
uses_instruct: false
|
368 |
+
bge-en-icl:
|
369 |
+
link: https://huggingface.co/BAAI/bge-en-icl
|
370 |
+
seq_len: 32768
|
371 |
+
size: 7110
|
372 |
+
dim: 4096
|
373 |
+
is_external: false
|
374 |
+
is_proprietary: false
|
375 |
+
is_sentence_transformers_compatible: true
|
376 |
+
uses_instruct: true
|
377 |
+
bge-multilingual-gemma2:
|
378 |
+
link: https://huggingface.co/BAAI/bge-multilingual-gemma2
|
379 |
+
seq_len: 8192
|
380 |
+
size: 9240
|
381 |
+
dim: 3584
|
382 |
+
is_external: false
|
383 |
+
is_proprietary: false
|
384 |
+
is_sentence_transformers_compatible: false
|
385 |
+
uses_instruct: true
|
386 |
bge-large-en-v1.5:
|
387 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
388 |
seq_len: 512
|
|
|
541 |
is_proprietary: false
|
542 |
is_sentence_transformers_compatible: true
|
543 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
distilbert-base-25lang-cased:
|
545 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
546 |
seq_len: 512
|
|
|
838 |
is_proprietary: false
|
839 |
is_sentence_transformers_compatible: true
|
840 |
uses_instruct: true
|
841 |
+
gte-Qwen2-1.5B-instruct:
|
842 |
+
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
843 |
+
seq_len: 32768
|
844 |
+
size: 1780
|
845 |
+
dim: 1536
|
846 |
+
is_external: false
|
847 |
+
is_proprietary: false
|
848 |
+
is_sentence_transformers_compatible: true
|
849 |
+
uses_instruct: true
|
850 |
gte-Qwen2-7B-instruct:
|
851 |
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
|
852 |
seq_len: 32768
|
|
|
1270 |
is_proprietary: false
|
1271 |
is_sentence_transformers_compatible: true
|
1272 |
uses_instruct: false
|
1273 |
+
stella_en_1.5B_v5:
|
1274 |
+
link: https://huggingface.co/dunzhang/stella_en_1.5B_v5
|
1275 |
+
seq_len: 512
|
1276 |
+
size: 1500
|
1277 |
+
dim: 8192
|
1278 |
+
is_external: false
|
1279 |
+
is_proprietary: false
|
1280 |
+
is_sentence_transformers_compatible: true
|
1281 |
+
uses_instruct: true
|
1282 |
+
stella_en_400M_v5:
|
1283 |
+
link: https://huggingface.co/dunzhang/stella_en_400M_v5
|
1284 |
+
seq_len: 512
|
1285 |
+
size: 400
|
1286 |
+
dim: 8192
|
1287 |
+
is_external: false
|
1288 |
+
is_proprietary: false
|
1289 |
+
is_sentence_transformers_compatible: true
|
1290 |
+
uses_instruct: true
|
1291 |
sup-simcse-bert-base-uncased:
|
1292 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
1293 |
seq_len: 512
|
|
|
1919 |
- liddlefish/privacy_embedding_bge_small_synthetic
|
1920 |
- mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
|
1921 |
- leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
|
1922 |
+
- niancheng/gte-Qwen2-7B-instruct-Q4_K_M-GGUF
|
1923 |
+
- cleatherbury/gte-Qwen2-7B-instruct-Q5_K_M-GGUF
|
1924 |
+
- niancheng/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF
|
1925 |
+
- mxs980/b1ade-embed-Q8_0-GGUF
|
1926 |
+
- chihlunLee/NoInstruct-small-Embedding-v0-Q4_0-GGUF
|
1927 |
+
- corto-ai/nomic-embed-text-v1
|
1928 |
+
- bcastle/snowflake-arctic-embed-l-Q8_0-GGUF
|
1929 |
+
- Intel/neural-embedding-v1
|
1930 |
cross_encoders:
|
1931 |
- FollowIR-7B
|
1932 |
- flan-t5-base
|