Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
c2f342d
1
Parent(s):
405857a
Add LLama 4 e Deepseek V3 results
Browse files- README.md +6 -0
- external_models_results.json +70 -1
- model_list.txt +6 -0
README.md
CHANGED
|
@@ -56,6 +56,7 @@ models:
|
|
| 56 |
- AI-Sweden-Models/gpt-sw3-40b
|
| 57 |
- AI-Sweden-Models/gpt-sw3-6.7b
|
| 58 |
- AI-Sweden-Models/gpt-sw3-6.7b-v2
|
|
|
|
| 59 |
- AXCXEPT/EZO-Qwen2.5-32B-Instruct
|
| 60 |
- AdaptLLM/finance-LLM
|
| 61 |
- AdaptLLM/finance-LLM-13B
|
|
@@ -80,6 +81,8 @@ models:
|
|
| 80 |
- BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
|
| 81 |
- BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
|
| 82 |
- BAAI/OPI-Llama-3.1-8B-Instruct
|
|
|
|
|
|
|
| 83 |
- Bruno/Caramelinho
|
| 84 |
- Bruno/Caramelo_7B
|
| 85 |
- CausalLM/34b-beta
|
|
@@ -548,6 +551,7 @@ models:
|
|
| 548 |
- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
| 549 |
- deepseek-ai/DeepSeek-V2-Lite
|
| 550 |
- deepseek-ai/DeepSeek-V2-Lite-Chat
|
|
|
|
| 551 |
- deepseek-ai/deepseek-llm-7b-base
|
| 552 |
- deepseek-ai/deepseek-moe-16b-base
|
| 553 |
- deepseek-ai/deepseek-moe-16b-chat
|
|
@@ -744,6 +748,8 @@ models:
|
|
| 744 |
- meta-llama/Llama-3.2-3B-Instruct
|
| 745 |
- meta-llama/Llama-3.2-90B-Vision-Instruct
|
| 746 |
- meta-llama/Llama-3.3-70B-Instruct
|
|
|
|
|
|
|
| 747 |
- meta-llama/Meta-Llama-3-70B
|
| 748 |
- meta-llama/Meta-Llama-3-70B-Instruct
|
| 749 |
- meta-llama/Meta-Llama-3-8B
|
|
|
|
| 56 |
- AI-Sweden-Models/gpt-sw3-40b
|
| 57 |
- AI-Sweden-Models/gpt-sw3-6.7b
|
| 58 |
- AI-Sweden-Models/gpt-sw3-6.7b-v2
|
| 59 |
+
- AIDC-AI/Marco-LLM-ES
|
| 60 |
- AXCXEPT/EZO-Qwen2.5-32B-Instruct
|
| 61 |
- AdaptLLM/finance-LLM
|
| 62 |
- AdaptLLM/finance-LLM-13B
|
|
|
|
| 81 |
- BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
|
| 82 |
- BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
|
| 83 |
- BAAI/OPI-Llama-3.1-8B-Instruct
|
| 84 |
+
- BSC-LT/salamandra-2b
|
| 85 |
+
- BSC-LT/salamandra-7b
|
| 86 |
- Bruno/Caramelinho
|
| 87 |
- Bruno/Caramelo_7B
|
| 88 |
- CausalLM/34b-beta
|
|
|
|
| 551 |
- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
| 552 |
- deepseek-ai/DeepSeek-V2-Lite
|
| 553 |
- deepseek-ai/DeepSeek-V2-Lite-Chat
|
| 554 |
+
- deepseek-ai/DeepSeek-V3-0324
|
| 555 |
- deepseek-ai/deepseek-llm-7b-base
|
| 556 |
- deepseek-ai/deepseek-moe-16b-base
|
| 557 |
- deepseek-ai/deepseek-moe-16b-chat
|
|
|
|
| 748 |
- meta-llama/Llama-3.2-3B-Instruct
|
| 749 |
- meta-llama/Llama-3.2-90B-Vision-Instruct
|
| 750 |
- meta-llama/Llama-3.3-70B-Instruct
|
| 751 |
+
- meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
| 752 |
+
- meta-llama/Llama-4-Scout-17B-16E-Instruct
|
| 753 |
- meta-llama/Meta-Llama-3-70B
|
| 754 |
- meta-llama/Meta-Llama-3-70B-Instruct
|
| 755 |
- meta-llama/Meta-Llama-3-8B
|
external_models_results.json
CHANGED
|
@@ -443,6 +443,29 @@
|
|
| 443 |
"result_metrics_average": 0.8836610214313025,
|
| 444 |
"result_metrics_npm": 0.8134610556797854
|
| 445 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
{
|
| 447 |
"model": "qwen2-5-vl-72b-instruct",
|
| 448 |
"name": "Qwen/Qwen2.5-VL-72B-Instruct (API)",
|
|
@@ -538,7 +561,7 @@
|
|
| 538 |
"model": "claude-3-7-sonnet-20250219",
|
| 539 |
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
| 540 |
"link": "https://www.anthropic.com/",
|
| 541 |
-
"date": "2025-04-
|
| 542 |
"status": "full",
|
| 543 |
"main_language": "English",
|
| 544 |
"model_type": "proprietary",
|
|
@@ -555,5 +578,51 @@
|
|
| 555 |
},
|
| 556 |
"result_metrics_average": 0.8448598450650201,
|
| 557 |
"result_metrics_npm": 0.7622301724524201
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
}
|
| 559 |
]
|
|
|
|
| 443 |
"result_metrics_average": 0.8836610214313025,
|
| 444 |
"result_metrics_npm": 0.8134610556797854
|
| 445 |
},
|
| 446 |
+
{
|
| 447 |
+
"model": "deepSeek-v3-0324",
|
| 448 |
+
"name": "deepseek-ai/DeepSeek-V3-0324 (API)",
|
| 449 |
+
"link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
|
| 450 |
+
"date": "2025-04-03",
|
| 451 |
+
"status": "full",
|
| 452 |
+
"main_language": "English",
|
| 453 |
+
"model_type": "chat",
|
| 454 |
+
"params": 685.0,
|
| 455 |
+
"result_metrics": {
|
| 456 |
+
"enem_challenge": 0.8901329601119664,
|
| 457 |
+
"bluex": 0.8414464534075105,
|
| 458 |
+
"oab_exams": 0.7148063781321184,
|
| 459 |
+
"assin2_sts": 0.8145997097875548,
|
| 460 |
+
"assin2_rte": 0.9421860387625551,
|
| 461 |
+
"faquad_nli": 0.796751127001399,
|
| 462 |
+
"hatebr_offensive": 0.9060129756724185,
|
| 463 |
+
"portuguese_hate_speech": 0.7262480672025753,
|
| 464 |
+
"tweetsentbr": 0.7037326638925795
|
| 465 |
+
},
|
| 466 |
+
"result_metrics_average": 0.8151018193300753,
|
| 467 |
+
"result_metrics_npm": 0.7165435243787625
|
| 468 |
+
},
|
| 469 |
{
|
| 470 |
"model": "qwen2-5-vl-72b-instruct",
|
| 471 |
"name": "Qwen/Qwen2.5-VL-72B-Instruct (API)",
|
|
|
|
| 561 |
"model": "claude-3-7-sonnet-20250219",
|
| 562 |
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
| 563 |
"link": "https://www.anthropic.com/",
|
| 564 |
+
"date": "2025-04-04",
|
| 565 |
"status": "full",
|
| 566 |
"main_language": "English",
|
| 567 |
"model_type": "proprietary",
|
|
|
|
| 578 |
},
|
| 579 |
"result_metrics_average": 0.8448598450650201,
|
| 580 |
"result_metrics_npm": 0.7622301724524201
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"model": "llama-4-scout-16e",
|
| 584 |
+
"name": "meta-llama/Llama-4-Scout-17B-16E-Instruct (Groq API)",
|
| 585 |
+
"link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
| 586 |
+
"date": "2025-04-05",
|
| 587 |
+
"status": "full",
|
| 588 |
+
"main_language": "English",
|
| 589 |
+
"model_type": "chat",
|
| 590 |
+
"params": 109.0,
|
| 591 |
+
"result_metrics": {
|
| 592 |
+
"enem_challenge": 0.8054583624912526,
|
| 593 |
+
"bluex": 0.721835883171071,
|
| 594 |
+
"oab_exams": 0.6815489749430524,
|
| 595 |
+
"assin2_sts": 0.7741640227983941,
|
| 596 |
+
"assin2_rte": 0.9312877465954967,
|
| 597 |
+
"faquad_nli": 0.8567037452287072,
|
| 598 |
+
"hatebr_offensive": 0.8813700069483281,
|
| 599 |
+
"portuguese_hate_speech": 0.7009183720501475,
|
| 600 |
+
"tweetsentbr": 0.7277278145615887
|
| 601 |
+
},
|
| 602 |
+
"result_metrics_average": 0.7867794365320042,
|
| 603 |
+
"result_metrics_npm": 0.6811274967601382
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"model": "llama-4-maverick-128e",
|
| 607 |
+
"name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct (FireworksAI API)",
|
| 608 |
+
"link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
|
| 609 |
+
"date": "2025-04-05",
|
| 610 |
+
"status": "full",
|
| 611 |
+
"main_language": "English",
|
| 612 |
+
"model_type": "chat",
|
| 613 |
+
"params": 402.0,
|
| 614 |
+
"result_metrics": {
|
| 615 |
+
"enem_challenge": 0.8775367389783065,
|
| 616 |
+
"bluex": 0.8122392211404729,
|
| 617 |
+
"oab_exams": 0.7284738041002278,
|
| 618 |
+
"assin2_sts": 0.7333246903202654,
|
| 619 |
+
"assin2_rte": 0.9329419027588105,
|
| 620 |
+
"faquad_nli": 0.7823695413019562,
|
| 621 |
+
"hatebr_offensive": 0.9047550357833591,
|
| 622 |
+
"portuguese_hate_speech": 0.7231286908077994,
|
| 623 |
+
"tweetsentbr": 0.7165294511353842
|
| 624 |
+
},
|
| 625 |
+
"result_metrics_average": 0.8012554529251759,
|
| 626 |
+
"result_metrics_npm": 0.6997802853383734
|
| 627 |
}
|
| 628 |
]
|
model_list.txt
CHANGED
|
@@ -27,6 +27,7 @@
|
|
| 27 |
- AI-Sweden-Models/gpt-sw3-40b
|
| 28 |
- AI-Sweden-Models/gpt-sw3-6.7b
|
| 29 |
- AI-Sweden-Models/gpt-sw3-6.7b-v2
|
|
|
|
| 30 |
- AXCXEPT/EZO-Qwen2.5-32B-Instruct
|
| 31 |
- AdaptLLM/finance-LLM
|
| 32 |
- AdaptLLM/finance-LLM-13B
|
|
@@ -51,6 +52,8 @@
|
|
| 51 |
- BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
|
| 52 |
- BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
|
| 53 |
- BAAI/OPI-Llama-3.1-8B-Instruct
|
|
|
|
|
|
|
| 54 |
- Bruno/Caramelinho
|
| 55 |
- Bruno/Caramelo_7B
|
| 56 |
- CausalLM/34b-beta
|
|
@@ -519,6 +522,7 @@
|
|
| 519 |
- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
| 520 |
- deepseek-ai/DeepSeek-V2-Lite
|
| 521 |
- deepseek-ai/DeepSeek-V2-Lite-Chat
|
|
|
|
| 522 |
- deepseek-ai/deepseek-llm-7b-base
|
| 523 |
- deepseek-ai/deepseek-moe-16b-base
|
| 524 |
- deepseek-ai/deepseek-moe-16b-chat
|
|
@@ -715,6 +719,8 @@
|
|
| 715 |
- meta-llama/Llama-3.2-3B-Instruct
|
| 716 |
- meta-llama/Llama-3.2-90B-Vision-Instruct
|
| 717 |
- meta-llama/Llama-3.3-70B-Instruct
|
|
|
|
|
|
|
| 718 |
- meta-llama/Meta-Llama-3-70B
|
| 719 |
- meta-llama/Meta-Llama-3-70B-Instruct
|
| 720 |
- meta-llama/Meta-Llama-3-8B
|
|
|
|
| 27 |
- AI-Sweden-Models/gpt-sw3-40b
|
| 28 |
- AI-Sweden-Models/gpt-sw3-6.7b
|
| 29 |
- AI-Sweden-Models/gpt-sw3-6.7b-v2
|
| 30 |
+
- AIDC-AI/Marco-LLM-ES
|
| 31 |
- AXCXEPT/EZO-Qwen2.5-32B-Instruct
|
| 32 |
- AdaptLLM/finance-LLM
|
| 33 |
- AdaptLLM/finance-LLM-13B
|
|
|
|
| 52 |
- BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
|
| 53 |
- BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
|
| 54 |
- BAAI/OPI-Llama-3.1-8B-Instruct
|
| 55 |
+
- BSC-LT/salamandra-2b
|
| 56 |
+
- BSC-LT/salamandra-7b
|
| 57 |
- Bruno/Caramelinho
|
| 58 |
- Bruno/Caramelo_7B
|
| 59 |
- CausalLM/34b-beta
|
|
|
|
| 522 |
- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
| 523 |
- deepseek-ai/DeepSeek-V2-Lite
|
| 524 |
- deepseek-ai/DeepSeek-V2-Lite-Chat
|
| 525 |
+
- deepseek-ai/DeepSeek-V3-0324
|
| 526 |
- deepseek-ai/deepseek-llm-7b-base
|
| 527 |
- deepseek-ai/deepseek-moe-16b-base
|
| 528 |
- deepseek-ai/deepseek-moe-16b-chat
|
|
|
|
| 719 |
- meta-llama/Llama-3.2-3B-Instruct
|
| 720 |
- meta-llama/Llama-3.2-90B-Vision-Instruct
|
| 721 |
- meta-llama/Llama-3.3-70B-Instruct
|
| 722 |
+
- meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
| 723 |
+
- meta-llama/Llama-4-Scout-17B-16E-Instruct
|
| 724 |
- meta-llama/Meta-Llama-3-70B
|
| 725 |
- meta-llama/Meta-Llama-3-70B-Instruct
|
| 726 |
- meta-llama/Meta-Llama-3-8B
|