Spaces:
Runtime error
Runtime error
Update OpusparcusPC & LLM2Vec
Browse files- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +28 -27
EXTERNAL_MODEL_RESULTS.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
CHANGED
|
@@ -340,13 +340,13 @@ EXTERNAL_MODELS = [
|
|
| 340 |
"Cohere-embed-multilingual-light-v3.0",
|
| 341 |
"DanskBERT",
|
| 342 |
"LASER2",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
"LaBSE",
|
| 344 |
-
# "LLM2Vec-Llama-supervised",
|
| 345 |
-
# "LLM2Vec-Llama-unsupervised",
|
| 346 |
-
# "LLM2Vec-Mistral-supervised",
|
| 347 |
-
# "LLM2Vec-Mistral-unsupervised",
|
| 348 |
-
# "LLM2Vec-Sheared-Llama-supervised",
|
| 349 |
-
# "LLM2Vec-Sheared-Llama-unsupervised",
|
| 350 |
"OpenSearch-text-hybrid",
|
| 351 |
"all-MiniLM-L12-v2",
|
| 352 |
"all-MiniLM-L6-v2",
|
|
@@ -456,21 +456,25 @@ EXTERNAL_MODELS = [
|
|
| 456 |
]
|
| 457 |
|
| 458 |
EXTERNAL_MODEL_TO_LINK = {
|
|
|
|
| 459 |
"Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
|
| 460 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
| 461 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
|
|
|
|
|
|
| 462 |
"LLM2Vec-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
|
| 463 |
"LLM2Vec-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp",
|
| 464 |
"LLM2Vec-Mistral-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
|
| 465 |
"LLM2Vec-Mistral-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
|
| 466 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
| 467 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
|
|
|
|
|
|
| 468 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
| 469 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
| 470 |
"all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
|
| 471 |
"all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
|
| 472 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
| 473 |
-
"Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
|
| 474 |
"bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
|
| 475 |
"bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
|
| 476 |
"bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
|
|
@@ -487,7 +491,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 487 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
| 488 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
| 489 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
| 490 |
-
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
| 491 |
"distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
|
| 492 |
"distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
|
| 493 |
"distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
|
|
@@ -520,8 +523,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 520 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
| 521 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
| 522 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
| 523 |
-
"LASER2": "https://github.com/facebookresearch/LASER",
|
| 524 |
-
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
| 525 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
| 526 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
| 527 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
|
@@ -538,7 +539,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 538 |
"nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
|
| 539 |
"norbert3-base": "https://huggingface.co/ltg/norbert3-base",
|
| 540 |
"norbert3-large": "https://huggingface.co/ltg/norbert3-large",
|
| 541 |
-
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
| 542 |
"paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
| 543 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 544 |
"sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
|
|
@@ -586,20 +586,23 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 586 |
}
|
| 587 |
|
| 588 |
EXTERNAL_MODEL_TO_DIM = {
|
|
|
|
| 589 |
"Cohere-embed-english-v3.0": 1024,
|
| 590 |
"Cohere-embed-multilingual-v3.0": 1024,
|
| 591 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
|
|
|
|
|
|
| 592 |
"LLM2Vec-Llama-supervised": 4096,
|
| 593 |
"LLM2Vec-Llama-unsupervised": 4096,
|
| 594 |
"LLM2Vec-Mistral-supervised": 4096,
|
| 595 |
"LLM2Vec-Mistral-unsupervised": 4096,
|
| 596 |
"LLM2Vec-Sheared-Llama-supervised": 2048,
|
| 597 |
"LLM2Vec-Sheared-Llama-unsupervised": 2048,
|
|
|
|
| 598 |
"all-MiniLM-L12-v2": 384,
|
| 599 |
"all-MiniLM-L6-v2": 384,
|
| 600 |
"all-mpnet-base-v2": 768,
|
| 601 |
"allenai-specter": 768,
|
| 602 |
-
"Baichuan-text-embedding": 1024,
|
| 603 |
"bert-base-10lang-cased": 768,
|
| 604 |
"bert-base-15lang-cased": 768,
|
| 605 |
"bert-base-25lang-cased": 768,
|
|
@@ -616,7 +619,6 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 616 |
"camembert-large": 768,
|
| 617 |
"contriever-base-msmarco": 768,
|
| 618 |
"cross-en-de-roberta-sentence-transformer": 768,
|
| 619 |
-
"DanskBERT": 768,
|
| 620 |
"distilbert-base-25lang-cased": 768,
|
| 621 |
"distilbert-base-en-fr-cased": 768,
|
| 622 |
"distilbert-base-en-fr-es-pt-it-cased": 768,
|
|
@@ -635,8 +637,6 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 635 |
"flaubert_base_uncased": 768,
|
| 636 |
"flaubert_large_cased": 1024,
|
| 637 |
"luotuo-bert-medium": 768,
|
| 638 |
-
"LASER2": 1024,
|
| 639 |
-
"LaBSE": 768,
|
| 640 |
"gbert-base": 768,
|
| 641 |
"gbert-large": 1024,
|
| 642 |
"gelectra-base": 768,
|
|
@@ -715,20 +715,23 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 715 |
}
|
| 716 |
|
| 717 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
|
|
|
| 718 |
"Cohere-embed-english-v3.0": 512,
|
| 719 |
"Cohere-embed-multilingual-v3.0": 512,
|
| 720 |
-
"Cohere-embed-multilingual-light-v3.0": 512,
|
|
|
|
|
|
|
| 721 |
"LLM2Vec-Llama-supervised": 512,
|
| 722 |
"LLM2Vec-Llama-unsupervised": 512,
|
| 723 |
"LLM2Vec-Mistral-supervised": 512,
|
| 724 |
"LLM2Vec-Mistral-unsupervised": 512,
|
| 725 |
"LLM2Vec-Sheared-Llama-supervised": 512,
|
| 726 |
"LLM2Vec-Sheared-Llama-unsupervised": 512,
|
|
|
|
| 727 |
"all-MiniLM-L12-v2": 512,
|
| 728 |
"all-MiniLM-L6-v2": 512,
|
| 729 |
"all-mpnet-base-v2": 514,
|
| 730 |
"allenai-specter": 512,
|
| 731 |
-
"Baichuan-text-embedding": 512,
|
| 732 |
"bert-base-10lang-cased": 512,
|
| 733 |
"bert-base-15lang-cased": 512,
|
| 734 |
"bert-base-25lang-cased": 512,
|
|
@@ -749,8 +752,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 749 |
"distilbert-base-en-fr-cased": 512,
|
| 750 |
"distilbert-base-en-fr-es-pt-it-cased": 512,
|
| 751 |
"distilbert-base-fr-cased": 512,
|
| 752 |
-
"distilbert-base-uncased": 512,
|
| 753 |
-
"DanskBERT": 514,
|
| 754 |
"dfm-encoder-large-v1": 512,
|
| 755 |
"dfm-sentence-encoder-large-1": 512,
|
| 756 |
"distiluse-base-multilingual-cased-v2": 512,
|
|
@@ -778,8 +780,6 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 778 |
"herbert-base-retrieval-v2": 514,
|
| 779 |
"komninos": "N/A",
|
| 780 |
"luotuo-bert-medium": 512,
|
| 781 |
-
"LASER2": "N/A",
|
| 782 |
-
"LaBSE": 512,
|
| 783 |
"m3e-base": 512,
|
| 784 |
"m3e-large": 512,
|
| 785 |
# "mistral-embed": "?",
|
|
@@ -844,12 +844,15 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 844 |
}
|
| 845 |
|
| 846 |
EXTERNAL_MODEL_TO_SIZE = {
|
|
|
|
|
|
|
| 847 |
"LLM2Vec-Llama-supervised": 6607,
|
| 848 |
"LLM2Vec-Llama-unsupervised": 6607,
|
| 849 |
"LLM2Vec-Mistral-supervised": 7111,
|
| 850 |
"LLM2Vec-Mistral-unsupervised": 7111,
|
| 851 |
"LLM2Vec-Sheared-Llama-supervised": 1280,
|
| 852 |
"LLM2Vec-Sheared-Llama-unsupervised": 1280,
|
|
|
|
| 853 |
"allenai-specter": 110,
|
| 854 |
"all-MiniLM-L12-v2": 33,
|
| 855 |
"all-MiniLM-L6-v2": 23,
|
|
@@ -874,7 +877,6 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 874 |
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
| 875 |
"distilbert-base-fr-cased": 110,
|
| 876 |
"distilbert-base-uncased": 110,
|
| 877 |
-
"DanskBERT": 125,
|
| 878 |
"distiluse-base-multilingual-cased-v2": 135,
|
| 879 |
"dfm-encoder-large-v1": 355,
|
| 880 |
"dfm-sentence-encoder-large-1": 355,
|
|
@@ -901,9 +903,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 901 |
"gtr-t5-xxl": 4865,
|
| 902 |
"herbert-base-retrieval-v2": 125,
|
| 903 |
"komninos": 134,
|
| 904 |
-
"luotuo-bert-medium": 328,
|
| 905 |
-
"LASER2": 43,
|
| 906 |
-
"LaBSE": 471,
|
| 907 |
"m3e-base": 102,
|
| 908 |
"m3e-large": 102,
|
| 909 |
"msmarco-bert-co-condensor": 110,
|
|
@@ -944,12 +944,12 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 944 |
}
|
| 945 |
|
| 946 |
PROPRIETARY_MODELS = {
|
|
|
|
| 947 |
"Cohere-embed-english-v3.0",
|
| 948 |
"Cohere-embed-multilingual-v3.0",
|
| 949 |
"Cohere-embed-multilingual-light-v3.0",
|
| 950 |
-
"Baichuan-text-embedding",
|
| 951 |
-
"mistral-embed",
|
| 952 |
"OpenSearch-text-hybrid",
|
|
|
|
| 953 |
"text-embedding-3-small",
|
| 954 |
"text-embedding-3-large",
|
| 955 |
"text-embedding-3-large-256",
|
|
@@ -973,6 +973,7 @@ PROPRIETARY_MODELS = {
|
|
| 973 |
"google-gecko.text-embedding-preview-0409",
|
| 974 |
"google-gecko-256.text-embedding-preview-0409",
|
| 975 |
}
|
|
|
|
| 976 |
PROPRIETARY_MODELS = {
|
| 977 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
| 978 |
for model in PROPRIETARY_MODELS
|
|
|
|
| 340 |
"Cohere-embed-multilingual-light-v3.0",
|
| 341 |
"DanskBERT",
|
| 342 |
"LASER2",
|
| 343 |
+
"LLM2Vec-Llama-supervised",
|
| 344 |
+
"LLM2Vec-Llama-unsupervised",
|
| 345 |
+
"LLM2Vec-Mistral-supervised",
|
| 346 |
+
"LLM2Vec-Mistral-unsupervised",
|
| 347 |
+
"LLM2Vec-Sheared-Llama-supervised",
|
| 348 |
+
"LLM2Vec-Sheared-Llama-unsupervised",
|
| 349 |
"LaBSE",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
"OpenSearch-text-hybrid",
|
| 351 |
"all-MiniLM-L12-v2",
|
| 352 |
"all-MiniLM-L6-v2",
|
|
|
|
| 456 |
]
|
| 457 |
|
| 458 |
EXTERNAL_MODEL_TO_LINK = {
|
| 459 |
+
"Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
|
| 460 |
"Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
|
| 461 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
| 462 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
| 463 |
+
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
| 464 |
+
"LASER2": "https://github.com/facebookresearch/LASER",
|
| 465 |
"LLM2Vec-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
|
| 466 |
"LLM2Vec-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp",
|
| 467 |
"LLM2Vec-Mistral-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
|
| 468 |
"LLM2Vec-Mistral-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
|
| 469 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
| 470 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
| 471 |
+
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
| 472 |
+
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
| 473 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
| 474 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
| 475 |
"all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
|
| 476 |
"all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
|
| 477 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
|
|
|
| 478 |
"bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
|
| 479 |
"bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
|
| 480 |
"bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
|
|
|
|
| 491 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
| 492 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
| 493 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
|
|
|
| 494 |
"distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
|
| 495 |
"distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
|
| 496 |
"distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
|
|
|
|
| 523 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
| 524 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
| 525 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
|
|
|
|
|
|
| 526 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
| 527 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
| 528 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
|
|
|
| 539 |
"nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
|
| 540 |
"norbert3-base": "https://huggingface.co/ltg/norbert3-base",
|
| 541 |
"norbert3-large": "https://huggingface.co/ltg/norbert3-large",
|
|
|
|
| 542 |
"paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
| 543 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 544 |
"sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
|
|
|
|
| 586 |
}
|
| 587 |
|
| 588 |
EXTERNAL_MODEL_TO_DIM = {
|
| 589 |
+
"Baichuan-text-embedding": 1024,
|
| 590 |
"Cohere-embed-english-v3.0": 1024,
|
| 591 |
"Cohere-embed-multilingual-v3.0": 1024,
|
| 592 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
| 593 |
+
"DanskBERT": 768,
|
| 594 |
+
"LASER2": 1024,
|
| 595 |
"LLM2Vec-Llama-supervised": 4096,
|
| 596 |
"LLM2Vec-Llama-unsupervised": 4096,
|
| 597 |
"LLM2Vec-Mistral-supervised": 4096,
|
| 598 |
"LLM2Vec-Mistral-unsupervised": 4096,
|
| 599 |
"LLM2Vec-Sheared-Llama-supervised": 2048,
|
| 600 |
"LLM2Vec-Sheared-Llama-unsupervised": 2048,
|
| 601 |
+
"LaBSE": 768,
|
| 602 |
"all-MiniLM-L12-v2": 384,
|
| 603 |
"all-MiniLM-L6-v2": 384,
|
| 604 |
"all-mpnet-base-v2": 768,
|
| 605 |
"allenai-specter": 768,
|
|
|
|
| 606 |
"bert-base-10lang-cased": 768,
|
| 607 |
"bert-base-15lang-cased": 768,
|
| 608 |
"bert-base-25lang-cased": 768,
|
|
|
|
| 619 |
"camembert-large": 768,
|
| 620 |
"contriever-base-msmarco": 768,
|
| 621 |
"cross-en-de-roberta-sentence-transformer": 768,
|
|
|
|
| 622 |
"distilbert-base-25lang-cased": 768,
|
| 623 |
"distilbert-base-en-fr-cased": 768,
|
| 624 |
"distilbert-base-en-fr-es-pt-it-cased": 768,
|
|
|
|
| 637 |
"flaubert_base_uncased": 768,
|
| 638 |
"flaubert_large_cased": 1024,
|
| 639 |
"luotuo-bert-medium": 768,
|
|
|
|
|
|
|
| 640 |
"gbert-base": 768,
|
| 641 |
"gbert-large": 1024,
|
| 642 |
"gelectra-base": 768,
|
|
|
|
| 715 |
}
|
| 716 |
|
| 717 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
| 718 |
+
"Baichuan-text-embedding": 512,
|
| 719 |
"Cohere-embed-english-v3.0": 512,
|
| 720 |
"Cohere-embed-multilingual-v3.0": 512,
|
| 721 |
+
"Cohere-embed-multilingual-light-v3.0": 512,
|
| 722 |
+
"DanskBERT": 514,
|
| 723 |
+
"LASER2": "N/A",
|
| 724 |
"LLM2Vec-Llama-supervised": 512,
|
| 725 |
"LLM2Vec-Llama-unsupervised": 512,
|
| 726 |
"LLM2Vec-Mistral-supervised": 512,
|
| 727 |
"LLM2Vec-Mistral-unsupervised": 512,
|
| 728 |
"LLM2Vec-Sheared-Llama-supervised": 512,
|
| 729 |
"LLM2Vec-Sheared-Llama-unsupervised": 512,
|
| 730 |
+
"LaBSE": 512,
|
| 731 |
"all-MiniLM-L12-v2": 512,
|
| 732 |
"all-MiniLM-L6-v2": 512,
|
| 733 |
"all-mpnet-base-v2": 514,
|
| 734 |
"allenai-specter": 512,
|
|
|
|
| 735 |
"bert-base-10lang-cased": 512,
|
| 736 |
"bert-base-15lang-cased": 512,
|
| 737 |
"bert-base-25lang-cased": 512,
|
|
|
|
| 752 |
"distilbert-base-en-fr-cased": 512,
|
| 753 |
"distilbert-base-en-fr-es-pt-it-cased": 512,
|
| 754 |
"distilbert-base-fr-cased": 512,
|
| 755 |
+
"distilbert-base-uncased": 512,
|
|
|
|
| 756 |
"dfm-encoder-large-v1": 512,
|
| 757 |
"dfm-sentence-encoder-large-1": 512,
|
| 758 |
"distiluse-base-multilingual-cased-v2": 512,
|
|
|
|
| 780 |
"herbert-base-retrieval-v2": 514,
|
| 781 |
"komninos": "N/A",
|
| 782 |
"luotuo-bert-medium": 512,
|
|
|
|
|
|
|
| 783 |
"m3e-base": 512,
|
| 784 |
"m3e-large": 512,
|
| 785 |
# "mistral-embed": "?",
|
|
|
|
| 844 |
}
|
| 845 |
|
| 846 |
EXTERNAL_MODEL_TO_SIZE = {
|
| 847 |
+
"DanskBERT": 125,
|
| 848 |
+
"LASER2": 43,
|
| 849 |
"LLM2Vec-Llama-supervised": 6607,
|
| 850 |
"LLM2Vec-Llama-unsupervised": 6607,
|
| 851 |
"LLM2Vec-Mistral-supervised": 7111,
|
| 852 |
"LLM2Vec-Mistral-unsupervised": 7111,
|
| 853 |
"LLM2Vec-Sheared-Llama-supervised": 1280,
|
| 854 |
"LLM2Vec-Sheared-Llama-unsupervised": 1280,
|
| 855 |
+
"LaBSE": 471,
|
| 856 |
"allenai-specter": 110,
|
| 857 |
"all-MiniLM-L12-v2": 33,
|
| 858 |
"all-MiniLM-L6-v2": 23,
|
|
|
|
| 877 |
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
| 878 |
"distilbert-base-fr-cased": 110,
|
| 879 |
"distilbert-base-uncased": 110,
|
|
|
|
| 880 |
"distiluse-base-multilingual-cased-v2": 135,
|
| 881 |
"dfm-encoder-large-v1": 355,
|
| 882 |
"dfm-sentence-encoder-large-1": 355,
|
|
|
|
| 903 |
"gtr-t5-xxl": 4865,
|
| 904 |
"herbert-base-retrieval-v2": 125,
|
| 905 |
"komninos": 134,
|
| 906 |
+
"luotuo-bert-medium": 328,
|
|
|
|
|
|
|
| 907 |
"m3e-base": 102,
|
| 908 |
"m3e-large": 102,
|
| 909 |
"msmarco-bert-co-condensor": 110,
|
|
|
|
| 944 |
}
|
| 945 |
|
| 946 |
PROPRIETARY_MODELS = {
|
| 947 |
+
"Baichuan-text-embedding",
|
| 948 |
"Cohere-embed-english-v3.0",
|
| 949 |
"Cohere-embed-multilingual-v3.0",
|
| 950 |
"Cohere-embed-multilingual-light-v3.0",
|
|
|
|
|
|
|
| 951 |
"OpenSearch-text-hybrid",
|
| 952 |
+
"mistral-embed",
|
| 953 |
"text-embedding-3-small",
|
| 954 |
"text-embedding-3-large",
|
| 955 |
"text-embedding-3-large-256",
|
|
|
|
| 973 |
"google-gecko.text-embedding-preview-0409",
|
| 974 |
"google-gecko-256.text-embedding-preview-0409",
|
| 975 |
}
|
| 976 |
+
|
| 977 |
PROPRIETARY_MODELS = {
|
| 978 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
| 979 |
for model in PROPRIETARY_MODELS
|