Spaces:
Running
Running
Commit
·
735975d
1
Parent(s):
27a30e1
Update conf
Browse files- config.yaml +31 -31
- model_meta.yaml +62 -9
config.yaml
CHANGED
|
@@ -365,6 +365,37 @@ boards:
|
|
| 365 |
- CDSC-R
|
| 366 |
- SICK-R-PL
|
| 367 |
- STS22 (pl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
se:
|
| 369 |
title: Swedish
|
| 370 |
language_long: Swedish
|
|
@@ -452,34 +483,3 @@ boards:
|
|
| 452 |
- BrightRetrieval (aops)
|
| 453 |
- BrightRetrieval (theoremqa_theorems)
|
| 454 |
- BrightRetrieval (theoremqa_questions)
|
| 455 |
-
ru:
|
| 456 |
-
title: Russian
|
| 457 |
-
language_long: "Russian"
|
| 458 |
-
has_overall: true
|
| 459 |
-
acronym: null
|
| 460 |
-
icon: "🇷🇺"
|
| 461 |
-
special_icons: null
|
| 462 |
-
credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
|
| 463 |
-
tasks:
|
| 464 |
-
Classification:
|
| 465 |
-
- GeoreviewClassification
|
| 466 |
-
- HeadlineClassification
|
| 467 |
-
- InappropriatenessClassification
|
| 468 |
-
- KinopoiskClassification
|
| 469 |
-
- RuReviewsClassification
|
| 470 |
-
- RuSciBenchGRNTIClassification
|
| 471 |
-
- RuSciBenchOECDClassification
|
| 472 |
-
Clustering:
|
| 473 |
-
- GeoreviewClusteringP2P
|
| 474 |
-
- RuSciBenchGRNTIClusteringP2P
|
| 475 |
-
- RuSciBenchOECDClusteringP2P
|
| 476 |
-
PairClassification:
|
| 477 |
-
- TERRa
|
| 478 |
-
Reranking:
|
| 479 |
-
- RuBQReranking
|
| 480 |
-
Retrieval:
|
| 481 |
-
- RiaNewsRetrieval
|
| 482 |
-
- RuBQRetrieval
|
| 483 |
-
STS:
|
| 484 |
-
- RUParaPhraserSTS
|
| 485 |
-
- RuSTSBenchmarkSTS
|
|
|
|
| 365 |
- CDSC-R
|
| 366 |
- SICK-R-PL
|
| 367 |
- STS22 (pl)
|
| 368 |
+
ru:
|
| 369 |
+
title: Russian
|
| 370 |
+
language_long: "Russian"
|
| 371 |
+
has_overall: true
|
| 372 |
+
acronym: null
|
| 373 |
+
icon: "🇷🇺"
|
| 374 |
+
special_icons: null
|
| 375 |
+
credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
|
| 376 |
+
tasks:
|
| 377 |
+
Classification:
|
| 378 |
+
- GeoreviewClassification
|
| 379 |
+
- HeadlineClassification
|
| 380 |
+
- InappropriatenessClassification
|
| 381 |
+
- KinopoiskClassification
|
| 382 |
+
- RuReviewsClassification
|
| 383 |
+
- RuSciBenchGRNTIClassification
|
| 384 |
+
- RuSciBenchOECDClassification
|
| 385 |
+
Clustering:
|
| 386 |
+
- GeoreviewClusteringP2P
|
| 387 |
+
- RuSciBenchGRNTIClusteringP2P
|
| 388 |
+
- RuSciBenchOECDClusteringP2P
|
| 389 |
+
PairClassification:
|
| 390 |
+
- TERRa
|
| 391 |
+
Reranking:
|
| 392 |
+
- RuBQReranking
|
| 393 |
+
Retrieval:
|
| 394 |
+
- RiaNewsRetrieval
|
| 395 |
+
- RuBQRetrieval
|
| 396 |
+
STS:
|
| 397 |
+
- RUParaPhraserSTS
|
| 398 |
+
- RuSTSBenchmarkSTS
|
| 399 |
se:
|
| 400 |
title: Swedish
|
| 401 |
language_long: Swedish
|
|
|
|
| 483 |
- BrightRetrieval (aops)
|
| 484 |
- BrightRetrieval (theoremqa_theorems)
|
| 485 |
- BrightRetrieval (theoremqa_questions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_meta.yaml
CHANGED
|
@@ -212,6 +212,15 @@ model_meta:
|
|
| 212 |
is_proprietary: false
|
| 213 |
is_sentence_transformers_compatible: true
|
| 214 |
uses_instruct: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
all-MiniLM-L12-v2:
|
| 216 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
| 217 |
seq_len: 512
|
|
@@ -356,6 +365,24 @@ model_meta:
|
|
| 356 |
is_proprietary: false
|
| 357 |
is_sentence_transformers_compatible: true
|
| 358 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
bge-large-en-v1.5:
|
| 360 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
| 361 |
seq_len: 512
|
|
@@ -514,15 +541,6 @@ model_meta:
|
|
| 514 |
is_proprietary: false
|
| 515 |
is_sentence_transformers_compatible: true
|
| 516 |
uses_instruct: false
|
| 517 |
-
dfm-sentence-encoder-large-1:
|
| 518 |
-
link: https://huggingface.co/chcaa/dfm-encoder-large-v1
|
| 519 |
-
seq_len: 512
|
| 520 |
-
size: 355
|
| 521 |
-
dim: 1024
|
| 522 |
-
is_external: false # no result in results repo
|
| 523 |
-
is_proprietary: false
|
| 524 |
-
is_sentence_transformers_compatible: true
|
| 525 |
-
uses_instruct: false
|
| 526 |
distilbert-base-25lang-cased:
|
| 527 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
| 528 |
seq_len: 512
|
|
@@ -820,6 +838,15 @@ model_meta:
|
|
| 820 |
is_proprietary: false
|
| 821 |
is_sentence_transformers_compatible: true
|
| 822 |
uses_instruct: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
gte-Qwen2-7B-instruct:
|
| 824 |
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
|
| 825 |
seq_len: 32768
|
|
@@ -1243,6 +1270,24 @@ model_meta:
|
|
| 1243 |
is_proprietary: false
|
| 1244 |
is_sentence_transformers_compatible: true
|
| 1245 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1246 |
sup-simcse-bert-base-uncased:
|
| 1247 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
| 1248 |
seq_len: 512
|
|
@@ -1874,6 +1919,14 @@ models_to_skip:
|
|
| 1874 |
- liddlefish/privacy_embedding_bge_small_synthetic
|
| 1875 |
- mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
|
| 1876 |
- leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1877 |
cross_encoders:
|
| 1878 |
- FollowIR-7B
|
| 1879 |
- flan-t5-base
|
|
|
|
| 212 |
is_proprietary: false
|
| 213 |
is_sentence_transformers_compatible: true
|
| 214 |
uses_instruct: true
|
| 215 |
+
SFR-Embedding-2_R:
|
| 216 |
+
link: https://huggingface.co/Salesforce/SFR-Embedding-2_R
|
| 217 |
+
seq_len: 32768
|
| 218 |
+
size: 7111
|
| 219 |
+
dim: 4096
|
| 220 |
+
is_external: false
|
| 221 |
+
is_proprietary: false
|
| 222 |
+
is_sentence_transformers_compatible: true
|
| 223 |
+
uses_instruct: true
|
| 224 |
all-MiniLM-L12-v2:
|
| 225 |
link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
|
| 226 |
seq_len: 512
|
|
|
|
| 365 |
is_proprietary: false
|
| 366 |
is_sentence_transformers_compatible: true
|
| 367 |
uses_instruct: false
|
| 368 |
+
bge-en-icl:
|
| 369 |
+
link: https://huggingface.co/BAAI/bge-en-icl
|
| 370 |
+
seq_len: 32768
|
| 371 |
+
size: 7110
|
| 372 |
+
dim: 4096
|
| 373 |
+
is_external: false
|
| 374 |
+
is_proprietary: false
|
| 375 |
+
is_sentence_transformers_compatible: true
|
| 376 |
+
uses_instruct: true
|
| 377 |
+
bge-multilingual-gemma2:
|
| 378 |
+
link: https://huggingface.co/BAAI/bge-multilingual-gemma2
|
| 379 |
+
seq_len: 8192
|
| 380 |
+
size: 9240
|
| 381 |
+
dim: 3584
|
| 382 |
+
is_external: false
|
| 383 |
+
is_proprietary: false
|
| 384 |
+
is_sentence_transformers_compatible: false
|
| 385 |
+
uses_instruct: true
|
| 386 |
bge-large-en-v1.5:
|
| 387 |
link: https://huggingface.co/BAAI/bge-large-en-v1.5
|
| 388 |
seq_len: 512
|
|
|
|
| 541 |
is_proprietary: false
|
| 542 |
is_sentence_transformers_compatible: true
|
| 543 |
uses_instruct: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
distilbert-base-25lang-cased:
|
| 545 |
link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
|
| 546 |
seq_len: 512
|
|
|
|
| 838 |
is_proprietary: false
|
| 839 |
is_sentence_transformers_compatible: true
|
| 840 |
uses_instruct: true
|
| 841 |
+
gte-Qwen2-1.5B-instruct:
|
| 842 |
+
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
| 843 |
+
seq_len: 32768
|
| 844 |
+
size: 1780
|
| 845 |
+
dim: 1536
|
| 846 |
+
is_external: false
|
| 847 |
+
is_proprietary: false
|
| 848 |
+
is_sentence_transformers_compatible: true
|
| 849 |
+
uses_instruct: true
|
| 850 |
gte-Qwen2-7B-instruct:
|
| 851 |
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
|
| 852 |
seq_len: 32768
|
|
|
|
| 1270 |
is_proprietary: false
|
| 1271 |
is_sentence_transformers_compatible: true
|
| 1272 |
uses_instruct: false
|
| 1273 |
+
stella_en_1.5B_v5:
|
| 1274 |
+
link: https://huggingface.co/dunzhang/stella_en_1.5B_v5
|
| 1275 |
+
seq_len: 512
|
| 1276 |
+
size: 1500
|
| 1277 |
+
dim: 8192
|
| 1278 |
+
is_external: false
|
| 1279 |
+
is_proprietary: false
|
| 1280 |
+
is_sentence_transformers_compatible: true
|
| 1281 |
+
uses_instruct: true
|
| 1282 |
+
stella_en_400M_v5:
|
| 1283 |
+
link: https://huggingface.co/dunzhang/stella_en_400M_v5
|
| 1284 |
+
seq_len: 512
|
| 1285 |
+
size: 400
|
| 1286 |
+
dim: 8192
|
| 1287 |
+
is_external: false
|
| 1288 |
+
is_proprietary: false
|
| 1289 |
+
is_sentence_transformers_compatible: true
|
| 1290 |
+
uses_instruct: true
|
| 1291 |
sup-simcse-bert-base-uncased:
|
| 1292 |
link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
|
| 1293 |
seq_len: 512
|
|
|
|
| 1919 |
- liddlefish/privacy_embedding_bge_small_synthetic
|
| 1920 |
- mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
|
| 1921 |
- leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
|
| 1922 |
+
- niancheng/gte-Qwen2-7B-instruct-Q4_K_M-GGUF
|
| 1923 |
+
- cleatherbury/gte-Qwen2-7B-instruct-Q5_K_M-GGUF
|
| 1924 |
+
- niancheng/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF
|
| 1925 |
+
- mxs980/b1ade-embed-Q8_0-GGUF
|
| 1926 |
+
- chihlunLee/NoInstruct-small-Embedding-v0-Q4_0-GGUF
|
| 1927 |
+
- corto-ai/nomic-embed-text-v1
|
| 1928 |
+
- bcastle/snowflake-arctic-embed-l-Q8_0-GGUF
|
| 1929 |
+
- Intel/neural-embedding-v1
|
| 1930 |
cross_encoders:
|
| 1931 |
- FollowIR-7B
|
| 1932 |
- flan-t5-base
|