Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| [ | |
| { | |
| "model": "sabia-2-small", | |
| "name": "Sabiá-2 Small", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2024-04-12", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7172848145556333, | |
| "bluex": 0.5549374130737135, | |
| "oab_exams": 0.6364464692482916, | |
| "assin2_sts": 0.7053302344881672, | |
| "assin2_rte": 0.9121728362223306, | |
| "faquad_nli": 0.7575848453041435, | |
| "hatebr_offensive": 0.753800795680591, | |
| "portuguese_hate_speech": 0.6975326368290793, | |
| "tweetsentbr": 0.7119699374276466 | |
| }, | |
| "result_metrics_average": 0.7163399980921773, | |
| "result_metrics_npm": 0.5744541501392351 | |
| }, | |
| { | |
| "model": "sabia-2-medium", | |
| "name": "Sabiá-2 Medium", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2024-04-13", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8180545836249126, | |
| "bluex": 0.717663421418637, | |
| "oab_exams": 0.7321184510250569, | |
| "assin2_sts": 0.7804108376537757, | |
| "assin2_rte": 0.923459363368553, | |
| "faquad_nli": 0.7657657657657658, | |
| "hatebr_offensive": 0.8349989882997386, | |
| "portuguese_hate_speech": 0.7379326358571694, | |
| "tweetsentbr": 0.7269533040381798 | |
| }, | |
| "result_metrics_average": 0.7819285945613098, | |
| "result_metrics_npm": 0.6676121786922709 | |
| }, | |
| { | |
| "model": "gpt-3.5-turbo-0125", | |
| "name": "GPT-3.5 Turbo (0125)", | |
| "link": "https://www.openai.com/", | |
| "date": "2024-03-08", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7214835549335199, | |
| "bluex": 0.6244784422809457, | |
| "oab_exams": 0.5430523917995445, | |
| "assin2_sts": 0.7378460201077941, | |
| "assin2_rte": 0.8823038414050672, | |
| "faquad_nli": 0.746353108609074, | |
| "hatebr_offensive": 0.8056205941193919, | |
| "portuguese_hate_speech": 0.7363692688971499, | |
| "tweetsentbr": 0.7028981330613626 | |
| }, | |
| "result_metrics_average": 0.7222672616904278, | |
| "result_metrics_npm": 0.5841504766165372 | |
| }, | |
| { | |
| "model": "claude-3-haiku-20240307", | |
| "name": "Claude-3 Haiku (20240307)", | |
| "link": "https://www.claude.ai/", | |
| "date": "2024-04-13", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7718684394681595, | |
| "bluex": 0.6662030598052852, | |
| "oab_exams": 0.626879271070615, | |
| "assin2_sts": 0.7892124744168747, | |
| "assin2_rte": 0.9184462138121732, | |
| "faquad_nli": 0.6340996599941455, | |
| "hatebr_offensive": 0.8023698759439051, | |
| "portuguese_hate_speech": 0.7342166269560177, | |
| "tweetsentbr": 0.7303315733000207 | |
| }, | |
| "result_metrics_average": 0.7415141327519107, | |
| "result_metrics_npm": 0.6037151240886439 | |
| }, | |
| { | |
| "model": "gemini-1.0-pro", | |
| "name": "Gemini 1.0 Pro", | |
| "link": "https://ai.google.dev/", | |
| "date": "2024-03-08", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7130860741777467, | |
| "bluex": 0.5869262865090403, | |
| "oab_exams": 0.4988610478359909, | |
| "assin2_sts": 0.7058831239763663, | |
| "assin2_rte": 0.8945993304651698, | |
| "faquad_nli": 0.7070913567220611, | |
| "hatebr_offensive": 0.8086330094493972, | |
| "portuguese_hate_speech": 0.699119105113102, | |
| "tweetsentbr": 0.6803240476660983 | |
| }, | |
| "result_metrics_average": 0.6993914868794414, | |
| "result_metrics_npm": 0.551208000273598 | |
| }, | |
| { | |
| "model": "gemini-1.5-pro-preview-0409", | |
| "name": "Gemini 1.5 Pro Preview (0409)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2024-04-15", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8509447165850245, | |
| "bluex": 0.7719054242002782, | |
| "oab_exams": 0.6888382687927107, | |
| "assin2_sts": 0.8159702278408203, | |
| "assin2_rte": 0.9328989988467518, | |
| "faquad_nli": 0.7290756302521009, | |
| "hatebr_offensive": 0.8697698647467024, | |
| "portuguese_hate_speech": 0.7539414414414414, | |
| "tweetsentbr": 0.772785080895884 | |
| }, | |
| "result_metrics_average": 0.7984588504001905, | |
| "result_metrics_npm": 0.6908188311933006 | |
| }, | |
| { | |
| "model": "deepseek-v2-chat", | |
| "name": "DeepSeek-V2 Chat (API)", | |
| "link": "https://www.deepseek.com/", | |
| "date": "2024-05-18", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7844646606018194, | |
| "bluex": 0.6954102920723226, | |
| "oab_exams": 0.564009111617312, | |
| "assin2_sts": 0.8533174657651231, | |
| "assin2_rte": 0.9440170304568147, | |
| "faquad_nli": 0.7995469048381548, | |
| "hatebr_offensive": 0.8842986491071644, | |
| "portuguese_hate_speech": 0.7271736342651962, | |
| "tweetsentbr": 0.6835304759163984 | |
| }, | |
| "result_metrics_average": 0.7706409138489229, | |
| "result_metrics_npm": 0.655901521190756 | |
| }, | |
| { | |
| "model": "gemini-1.5-flash-preview-0514", | |
| "name": "Gemini 1.5 Flash Preview (0514)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2024-05-18", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8264520643806857, | |
| "bluex": 0.7482614742698191, | |
| "oab_exams": 0.6419134396355353, | |
| "assin2_sts": 0.841655158151231, | |
| "assin2_rte": 0.9362097477374545, | |
| "faquad_nli": 0.8092185592185592, | |
| "hatebr_offensive": 0.9099110141445836, | |
| "portuguese_hate_speech": 0.6875904275305673, | |
| "tweetsentbr": 0.7219800292667018 | |
| }, | |
| "result_metrics_average": 0.7914657682594597, | |
| "result_metrics_npm": 0.6834036936130392 | |
| }, | |
| { | |
| "model": "gemini-1.5-flash-001", | |
| "name": "Gemini 1.5 Flash (001)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2024-08-09", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8306508047585724, | |
| "bluex": 0.7579972183588317, | |
| "oab_exams": 0.6446469248291572, | |
| "assin2_sts": 0.838806085610371, | |
| "assin2_rte": 0.9366169973822607, | |
| "faquad_nli": 0.7963910785668922, | |
| "hatebr_offensive": 0.9092078461170015, | |
| "portuguese_hate_speech": 0.6932563987219857, | |
| "tweetsentbr": 0.7312948963367732 | |
| }, | |
| "result_metrics_average": 0.7932075834090939, | |
| "result_metrics_npm": 0.6855338135928848 | |
| }, | |
| { | |
| "model": "gpt-4o-mini-2024-07-18", | |
| "name": "GPT 4o Mini (2024-07-18)", | |
| "link": "https://www.openai.com/", | |
| "date": "2024-07-25", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7669699090272918, | |
| "bluex": 0.6842837273991655, | |
| "oab_exams": 0.6013667425968109, | |
| "assin2_sts": 0.7259038954527597, | |
| "assin2_rte": 0.942809846745341, | |
| "faquad_nli": 0.819807735300693, | |
| "hatebr_offensive": 0.8682357029532165, | |
| "portuguese_hate_speech": 0.7501413502853012, | |
| "tweetsentbr": 0.7509303825869922 | |
| }, | |
| "result_metrics_average": 0.7678276991497301, | |
| "result_metrics_npm": 0.6595966999910003 | |
| }, | |
| { | |
| "model": "nemotron-4-340b-instruct", | |
| "name": "nvidia/Nemotron-4-340B-Instruct (Nvidia API)", | |
| "link": "https://huggingface.co/nvidia/Nemotron-4-340B-Instruct", | |
| "date": "2024-06-30", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 340.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.6648005598320503, | |
| "bluex": 0.6578581363004172, | |
| "oab_exams": 0.7020501138952164, | |
| "assin2_sts": 0.7857731021403329, | |
| "assin2_rte": 0.9489354458928496, | |
| "faquad_nli": 0.8194444444444444, | |
| "hatebr_offensive": 0.8641580001234928, | |
| "portuguese_hate_speech": 0.7761835184102864, | |
| "tweetsentbr": 0.780880021326841 | |
| }, | |
| "result_metrics_average": 0.7777870380406591, | |
| "result_metrics_npm": 0.6740728488043128 | |
| }, | |
| { | |
| "model": "llama_405b_instruct", | |
| "name": "meta-llama/Llama-3.1-405B-Instruct (Vertex AI)", | |
| "link": "https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct", | |
| "date": "2024-08-20", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 406.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8523442967109867, | |
| "bluex": 0.8011126564673157, | |
| "oab_exams": 0.7640091116173121, | |
| "assin2_sts": 0.7888441732870783, | |
| "assin2_rte": 0.9476445477916471, | |
| "faquad_nli": 0.825063276593557, | |
| "hatebr_offensive": 0.9073940659389119, | |
| "portuguese_hate_speech": 0.7191480935512969, | |
| "tweetsentbr": 0.7821434639106575 | |
| }, | |
| "result_metrics_average": 0.8208559650965292, | |
| "result_metrics_npm": 0.7286932366792048 | |
| }, | |
| { | |
| "model": "sabia-3-2024-07-15", | |
| "name": "Sabiá-3 (2024-07-15)", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2024-08-20", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8789363191042687, | |
| "bluex": 0.7899860917941586, | |
| "oab_exams": 0.8391799544419134, | |
| "assin2_sts": 0.8253863689009022, | |
| "assin2_rte": 0.9477034821619312, | |
| "faquad_nli": 0.8243848812618203, | |
| "hatebr_offensive": 0.8278737774590023, | |
| "portuguese_hate_speech": 0.7241071428571428, | |
| "tweetsentbr": 0.7510613086648664 | |
| }, | |
| "result_metrics_average": 0.8231799251828895, | |
| "result_metrics_npm": 0.7241097388486535 | |
| }, | |
| { | |
| "model": "llama3_3_70b", | |
| "name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)", | |
| "link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 70.6, | |
| "result_metrics": { | |
| "enem_challenge": 0.8320503848845346, | |
| "bluex": 0.7593880389429764, | |
| "oab_exams": 0.6733485193621868, | |
| "assin2_sts": 0.7275578599896508, | |
| "assin2_rte": 0.9407071010860484, | |
| "faquad_nli": 0.8787563033858187, | |
| "hatebr_offensive": 0.9024358249091997, | |
| "portuguese_hate_speech": 0.7042216543825339, | |
| "tweetsentbr": 0.7076749453899551 | |
| }, | |
| "result_metrics_average": 0.791793403592545, | |
| "result_metrics_npm": 0.6924788466103498 | |
| }, | |
| { | |
| "model": "llama3_2_90b", | |
| "name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)", | |
| "link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 88.6, | |
| "result_metrics": { | |
| "enem_challenge": 0.821553533939818, | |
| "bluex": 0.7482614742698191, | |
| "oab_exams": 0.7061503416856492, | |
| "assin2_sts": 0.7368518566379951, | |
| "assin2_rte": 0.9216548775103446, | |
| "faquad_nli": 0.8632015306122449, | |
| "hatebr_offensive": 0.8965270877302478, | |
| "portuguese_hate_speech": 0.7059127552081422, | |
| "tweetsentbr": 0.7352076218951984 | |
| }, | |
| "result_metrics_average": 0.7928134532766066, | |
| "result_metrics_npm": 0.6915070359785283 | |
| }, | |
| { | |
| "model": "gemini-1.5-flash-002", | |
| "name": "Gemini 1.5 Flash (002)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8327501749475158, | |
| "bluex": 0.760778859527121, | |
| "oab_exams": 0.6369020501138952, | |
| "assin2_sts": 0.8380176734291938, | |
| "assin2_rte": 0.941176117215237, | |
| "faquad_nli": 0.8360786822325283, | |
| "hatebr_offensive": 0.9046145161133335, | |
| "portuguese_hate_speech": 0.7406414313684444, | |
| "tweetsentbr": 0.6997509880131249 | |
| }, | |
| "result_metrics_average": 0.7989678325511549, | |
| "result_metrics_npm": 0.6979777100000177 | |
| }, | |
| { | |
| "model": "gemini-1.5-flash-8b-001", | |
| "name": "Gemini 1.5 Flash 8B (001)", | |
| "link": "https://aistudio.google.com", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7641707487753674, | |
| "bluex": 0.6467315716272601, | |
| "oab_exams": 0.5603644646924829, | |
| "assin2_sts": 0.7638946799836569, | |
| "assin2_rte": 0.9329452628161146, | |
| "faquad_nli": 0.7937022965448601, | |
| "hatebr_offensive": 0.850497640901663, | |
| "portuguese_hate_speech": 0.7391317606010173, | |
| "tweetsentbr": 0.7376684798923661 | |
| }, | |
| "result_metrics_average": 0.7543452117594209, | |
| "result_metrics_npm": 0.6359642422837162 | |
| }, | |
| { | |
| "model": "gemini-2.0-flash-001", | |
| "name": "Gemini 2.0 Flash (001)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8789363191042687, | |
| "bluex": 0.803894297635605, | |
| "oab_exams": 0.7767653758542141, | |
| "assin2_sts": 0.8440142633742483, | |
| "assin2_rte": 0.9305165510724053, | |
| "faquad_nli": 0.7533651260745065, | |
| "hatebr_offensive": 0.8890432813545366, | |
| "portuguese_hate_speech": 0.7655392938544128, | |
| "tweetsentbr": 0.7652542619451799 | |
| }, | |
| "result_metrics_average": 0.8230365300299308, | |
| "result_metrics_npm": 0.7253778946033657 | |
| }, | |
| { | |
| "model": "gemini-2.0-flash-lite-001", | |
| "name": "Gemini 2.0 Flash Lite (001)", | |
| "link": "https://cloud.google.com/vertex-ai", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8509447165850245, | |
| "bluex": 0.7872044506258693, | |
| "oab_exams": 0.7061503416856492, | |
| "assin2_sts": 0.8492479991621328, | |
| "assin2_rte": 0.9216548775103446, | |
| "faquad_nli": 0.7652777777777777, | |
| "hatebr_offensive": 0.8522499647780968, | |
| "portuguese_hate_speech": 0.7501387383201693, | |
| "tweetsentbr": 0.7675746509081982 | |
| }, | |
| "result_metrics_average": 0.8056048352614735, | |
| "result_metrics_npm": 0.6986042497176748 | |
| }, | |
| { | |
| "model": "gemini-2.5-pro-exp-03-25", | |
| "name": "Gemini 2.5 Pro Experimental [reasoning] (0325)", | |
| "link": "https://aistudio.google.com", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9769069279216235, | |
| "bluex": 0.9499304589707928, | |
| "oab_exams": 0.9216400911161731, | |
| "assin2_sts": 0.837785744915033, | |
| "assin2_rte": 0.9415510158830285, | |
| "faquad_nli": 0.8738735797309651, | |
| "hatebr_offensive": 0.9248478168290788, | |
| "portuguese_hate_speech": 0.7336133105156697, | |
| "tweetsentbr": 0.7928002469993594 | |
| }, | |
| "result_metrics_average": 0.8836610214313025, | |
| "result_metrics_npm": 0.8134610556797854 | |
| }, | |
| { | |
| "model": "deepSeek-v3-0324", | |
| "name": "deepseek-ai/DeepSeek-V3-0324 (API)", | |
| "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 685.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8901329601119664, | |
| "bluex": 0.8414464534075105, | |
| "oab_exams": 0.7148063781321184, | |
| "assin2_sts": 0.8145997097875548, | |
| "assin2_rte": 0.9421860387625551, | |
| "faquad_nli": 0.796751127001399, | |
| "hatebr_offensive": 0.9060129756724185, | |
| "portuguese_hate_speech": 0.7262480672025753, | |
| "tweetsentbr": 0.7037326638925795 | |
| }, | |
| "result_metrics_average": 0.8151018193300753, | |
| "result_metrics_npm": 0.7165435243787625 | |
| }, | |
| { | |
| "model": "qwen2-5-vl-72b-instruct", | |
| "name": "Qwen/Qwen2.5-VL-72B-Instruct (API)", | |
| "link": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 73.4, | |
| "result_metrics": { | |
| "enem_challenge": 0.8600419874037789, | |
| "bluex": 0.8052851182197497, | |
| "oab_exams": 0.6888382687927107, | |
| "assin2_sts": 0.7595538567467497, | |
| "assin2_rte": 0.9472975104201871, | |
| "faquad_nli": 0.8447190882122586, | |
| "hatebr_offensive": 0.8810695094657859, | |
| "portuguese_hate_speech": 0.769596419318135, | |
| "tweetsentbr": 0.5644757075411895 | |
| }, | |
| "result_metrics_average": 0.7912086073467273, | |
| "result_metrics_npm": 0.6888261361422966 | |
| }, | |
| { | |
| "model": "qwen2-5-72b-instruct", | |
| "name": "Qwen/Qwen2.5-72B-Instruct (API)", | |
| "link": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 72.7, | |
| "result_metrics": { | |
| "enem_challenge": 0.8432470258922323, | |
| "bluex": 0.780250347705146, | |
| "oab_exams": 0.675626423690205, | |
| "assin2_sts": 0.8230708844558656, | |
| "assin2_rte": 0.9509720145268106, | |
| "faquad_nli": 0.8194444444444444, | |
| "hatebr_offensive": 0.8810033427242816, | |
| "portuguese_hate_speech": 0.7601866578782712, | |
| "tweetsentbr": 0.7620172222071487 | |
| }, | |
| "result_metrics_average": 0.8106464848360451, | |
| "result_metrics_npm": 0.7142994872542282 | |
| }, | |
| { | |
| "model": "qwen2-5-vl-32b-instruct", | |
| "name": "Qwen/Qwen2.5-VL-32B-Instruct (API)", | |
| "link": "https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 33.5, | |
| "result_metrics": { | |
| "enem_challenge": 0.8600419874037789, | |
| "bluex": 0.8052851182197497, | |
| "oab_exams": 0.6888382687927107, | |
| "assin2_sts": 0.7780549055529008, | |
| "assin2_rte": 0.9472975104201871, | |
| "faquad_nli": 0.8447190882122586, | |
| "hatebr_offensive": 0.8810695094657859, | |
| "portuguese_hate_speech": 0.769596419318135, | |
| "tweetsentbr": 0.7027408707999051 | |
| }, | |
| "result_metrics_average": 0.8086270753539346, | |
| "result_metrics_npm": 0.7137431116807307 | |
| }, | |
| { | |
| "model": "qwen-turbo-2024-11-01", | |
| "name": "Qwen-Turbo (2024-11-01)", | |
| "link": "https://www.alibabacloud.com/en/product/modelstudio", | |
| "date": "2025-04-03", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.7795661301609517, | |
| "bluex": 0.7079276773296245, | |
| "oab_exams": 0.6091116173120729, | |
| "assin2_sts": 0.7640477700456898, | |
| "assin2_rte": 0.9260451969385788, | |
| "faquad_nli": 0.8128063725490196, | |
| "hatebr_offensive": 0.8567933277676292, | |
| "portuguese_hate_speech": 0.7239183383094245, | |
| "tweetsentbr": 0.7038360447972195 | |
| }, | |
| "result_metrics_average": 0.7648947194678011, | |
| "result_metrics_npm": 0.6490441260447987 | |
| }, | |
| { | |
| "model": "gpt-4o-2024-08-06", | |
| "name": "GPT-4o (2024-08-06)", | |
| "link": "https://www.openai.com/", | |
| "date": "2025-04-09", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8530440867739678, | |
| "bluex": 0.7969401947148818, | |
| "oab_exams": 0.8200455580865603, | |
| "assin2_sts": 0.8078677969518289, | |
| "assin2_rte": 0.9407235712144604, | |
| "faquad_nli": 0.8654396266184885, | |
| "hatebr_offensive": 0.9320137873994456, | |
| "portuguese_hate_speech": 0.7512552701451538, | |
| "tweetsentbr": 0.7761054092302796 | |
| }, | |
| "result_metrics_average": 0.8381594779038962, | |
| "result_metrics_npm": 0.7566365012704034 | |
| }, | |
| { | |
| "model": "claude-3-7-sonnet-20250219", | |
| "name": "Claude 3.7 Sonnet (2025-02-19)", | |
| "link": "https://www.anthropic.com/", | |
| "date": "2025-04-04", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8901329601119664, | |
| "bluex": 0.8456189151599444, | |
| "oab_exams": 0.8355353075170843, | |
| "assin2_sts": 0.8087979933117393, | |
| "assin2_rte": 0.9472965253044003, | |
| "faquad_nli": 0.8097848807348216, | |
| "hatebr_offensive": 0.9125114739050616, | |
| "portuguese_hate_speech": 0.7698524509742262, | |
| "tweetsentbr": 0.7842080985659372 | |
| }, | |
| "result_metrics_average": 0.8448598450650201, | |
| "result_metrics_npm": 0.7622301724524201 | |
| }, | |
| { | |
| "model": "llama-4-scout-16e", | |
| "name": "meta-llama/Llama-4-Scout-17B-16E-Instruct (Groq API)", | |
| "link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", | |
| "date": "2025-04-05", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 109.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8054583624912526, | |
| "bluex": 0.721835883171071, | |
| "oab_exams": 0.6815489749430524, | |
| "assin2_sts": 0.7741640227983941, | |
| "assin2_rte": 0.9312877465954967, | |
| "faquad_nli": 0.8567037452287072, | |
| "hatebr_offensive": 0.8813700069483281, | |
| "portuguese_hate_speech": 0.7009183720501475, | |
| "tweetsentbr": 0.7277278145615887 | |
| }, | |
| "result_metrics_average": 0.7867794365320042, | |
| "result_metrics_npm": 0.6811274967601382 | |
| }, | |
| { | |
| "model": "llama-4-maverick-128e", | |
| "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct (FireworksAI API)", | |
| "link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct", | |
| "date": "2025-04-05", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 402.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8775367389783065, | |
| "bluex": 0.8122392211404729, | |
| "oab_exams": 0.7284738041002278, | |
| "assin2_sts": 0.7333246903202654, | |
| "assin2_rte": 0.9329419027588105, | |
| "faquad_nli": 0.7823695413019562, | |
| "hatebr_offensive": 0.9047550357833591, | |
| "portuguese_hate_speech": 0.7231286908077994, | |
| "tweetsentbr": 0.7165294511353842 | |
| }, | |
| "result_metrics_average": 0.8012554529251759, | |
| "result_metrics_npm": 0.6997802853383734 | |
| }, | |
| { | |
| "model": "gemma-3-27b-it", | |
| "name": "google/gemma-3-27b-it (GoogleAI API)", | |
| "link": "https://huggingface.co/google/gemma-3-27b-it", | |
| "date": "2025-04-08", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 27.4, | |
| "result_metrics": { | |
| "enem_challenge": 0.814555633310007, | |
| "bluex": 0.7385257301808067, | |
| "oab_exams": 0.6159453302961275, | |
| "assin2_sts": 0.8147646517017526, | |
| "assin2_rte": 0.9411147367212748, | |
| "faquad_nli": 0.8143210816987241, | |
| "hatebr_offensive": 0.8729414870796344, | |
| "portuguese_hate_speech": 0.7264768061421736, | |
| "tweetsentbr": 0.7448943824093712 | |
| }, | |
| "result_metrics_average": 0.7870599821710969, | |
| "result_metrics_npm": 0.6795192293708728 | |
| }, | |
| { | |
| "model": "deepseek-v3_1", | |
| "name": "deepseek-ai/DeepSeek-V3.1 (API)", | |
| "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3.1", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 685.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8887333799860042, | |
| "bluex": 0.8178025034770514, | |
| "oab_exams": 0.7038724373576309, | |
| "assin2_sts": 0.8082104938836681, | |
| "assin2_rte": 0.949346100935343, | |
| "faquad_nli": 0.8406862745098038, | |
| "hatebr_offensive": 0.9211711711711712, | |
| "portuguese_hate_speech": 0.7423067698027224, | |
| "tweetsentbr": 0.7584190029617157 | |
| }, | |
| "result_metrics_average": 0.8256164593427902, | |
| "result_metrics_npm": 0.7370296776379883 | |
| }, | |
| { | |
| "model": "kimi-k2", | |
| "name": "moonshotai/Kimi-K2-Instruct (API)", | |
| "link": "https://huggingface.co/moonshotai/Kimi-K2-Instruct", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "chat", | |
| "params": 1000.0, | |
| "result_metrics": { | |
| "enem_challenge": 0.8789363191042687, | |
| "bluex": 0.827538247566064, | |
| "oab_exams": 0.6970387243735763, | |
| "assin2_sts": 0.7760142475181766, | |
| "assin2_rte": 0.9436236879837872, | |
| "faquad_nli": 0.8531466083708024, | |
| "hatebr_offensive": 0.8941562198649953, | |
| "portuguese_hate_speech": 0.7535500455551216, | |
| "tweetsentbr": 0.7428370464802363 | |
| }, | |
| "result_metrics_average": 0.8185379052018921, | |
| "result_metrics_npm": 0.7275664672121565 | |
| }, | |
| { | |
| "model": "sabia-3-1-2025-05-08", | |
| "name": "Sabiá-3.1 (2025-05-08)", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8894331700489853, | |
| "bluex": 0.8178025034770514, | |
| "oab_exams": 0.9202733485193622, | |
| "assin2_sts": 0.8340482244079774, | |
| "assin2_rte": 0.9423587830430271, | |
| "faquad_nli": 0.7585644282172838, | |
| "hatebr_offensive": 0.8308611905928697, | |
| "portuguese_hate_speech": 0.7543648446960096, | |
| "tweetsentbr": 0.7398273232644036 | |
| }, | |
| "result_metrics_average": 0.8319482018074411, | |
| "result_metrics_npm": 0.7331597943893793 | |
| }, | |
| { | |
| "model": "sabia-3-2024-12-11", | |
| "name": "Sabiá-3 (2024-12-11)", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8691392582225332, | |
| "bluex": 0.7872044506258693, | |
| "oab_exams": 0.8009111617312072, | |
| "assin2_sts": 0.7850131735268517, | |
| "assin2_rte": 0.9390382723900459, | |
| "faquad_nli": 0.7968815254182839, | |
| "hatebr_offensive": 0.8608047226969084, | |
| "portuguese_hate_speech": 0.7474723628059027, | |
| "tweetsentbr": 0.7360466511491278 | |
| }, | |
| "result_metrics_average": 0.8136123976185256, | |
| "result_metrics_npm": 0.7144701465854594 | |
| }, | |
| { | |
| "model": "sabiazinho-3", | |
| "name": "Sabiázinho-3 (2025-02-06)", | |
| "link": "https://www.maritaca.ai/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "Portuguese", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8439468159552135, | |
| "bluex": 0.7343532684283728, | |
| "oab_exams": 0.8159453302961276, | |
| "assin2_sts": 0.8091208202474276, | |
| "assin2_rte": 0.9370511249219384, | |
| "faquad_nli": 0.7715445403113343, | |
| "hatebr_offensive": 0.8604320820258526, | |
| "portuguese_hate_speech": 0.7129508077161507, | |
| "tweetsentbr": 0.6798994954276046 | |
| }, | |
| "result_metrics_average": 0.7961382539255579, | |
| "result_metrics_npm": 0.685954609257193 | |
| }, | |
| { | |
| "model": "grok-3-mini", | |
| "name": "Grok 3 Mini [reasoning] (API)", | |
| "link": "https://x.ai/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9412176347095871, | |
| "bluex": 0.8984700973574409, | |
| "oab_exams": 0.7075170842824602, | |
| "assin2_sts": 0.7846153023166811, | |
| "assin2_rte": 0.9369863526592658, | |
| "faquad_nli": 0.8974457100080231, | |
| "hatebr_offensive": 0.9264201247592199, | |
| "portuguese_hate_speech": 0.6868265194640906, | |
| "tweetsentbr": 0.7496188889954271 | |
| }, | |
| "result_metrics_average": 0.836568634950244, | |
| "result_metrics_npm": 0.7505284631974409 | |
| }, | |
| { | |
| "model": "gpt-5-nano-2025-08-07", | |
| "name": "GPT 5 Nano [reasoning] (2025-08-07)", | |
| "link": "https://www.openai.com/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9013296011196641, | |
| "bluex": 0.8525730180806675, | |
| "oab_exams": 0.5913439635535308, | |
| "assin2_sts": 0.7157982790377855, | |
| "assin2_rte": 0.9493397775671237, | |
| "faquad_nli": 0.802473455931782, | |
| "hatebr_offensive": 0.9169693400085076, | |
| "portuguese_hate_speech": 0.7166590126291619, | |
| "tweetsentbr": 0.7385573150818597 | |
| }, | |
| "result_metrics_average": 0.7983381958900091, | |
| "result_metrics_npm": 0.699331432280926 | |
| }, | |
| { | |
| "model": "gpt-5-mini-2025-08-07", | |
| "name": "GPT 5 Mini [reasoning] (2025-08-07)", | |
| "link": "https://www.openai.com/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9566130160951715, | |
| "bluex": 0.913769123783032, | |
| "oab_exams": 0.7184510250569476, | |
| "assin2_sts": 0.8151992531421179, | |
| "assin2_rte": 0.9486789502727531, | |
| "faquad_nli": 0.7959895379250218, | |
| "hatebr_offensive": 0.9306148454596409, | |
| "portuguese_hate_speech": 0.7476857189919288, | |
| "tweetsentbr": 0.7208063363431595 | |
| }, | |
| "result_metrics_average": 0.8386453118966414, | |
| "result_metrics_npm": 0.7509015993727701 | |
| }, | |
| { | |
| "model": "gpt-5_reasoning_minimal-2025-08-07", | |
| "name": "GPT 5 [reasoning: minimal] (2025-08-07)", | |
| "link": "https://www.openai.com/", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8432470258922323, | |
| "bluex": 0.7885952712100139, | |
| "oab_exams": 0.8104783599088838, | |
| "assin2_sts": 0.7497712012355019, | |
| "assin2_rte": 0.9497544911228829, | |
| "faquad_nli": 0.9049032312001003, | |
| "hatebr_offensive": 0.9233018502276624, | |
| "portuguese_hate_speech": 0.7502183789864052, | |
| "tweetsentbr": 0.7877925879277 | |
| }, | |
| "result_metrics_average": 0.8342291553012646, | |
| "result_metrics_npm": 0.7560493865775754 | |
| }, | |
| { | |
| "model": "gemini-2_5_flash_lite", | |
| "name": "Gemini 2.5 Flash Lite", | |
| "link": "https://aistudio.google.com", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.8257522743177047, | |
| "bluex": 0.7329624478442281, | |
| "oab_exams": 0.6783599088838269, | |
| "assin2_sts": 0.8399704980607736, | |
| "assin2_rte": 0.9095975398498664, | |
| "faquad_nli": 0.8289944389172974, | |
| "hatebr_offensive": 0.8733247194142535, | |
| "portuguese_hate_speech": 0.7511757826108595, | |
| "tweetsentbr": 0.7696375203962748 | |
| }, | |
| "result_metrics_average": 0.8010861255883428, | |
| "result_metrics_npm": 0.6977608761930978 | |
| }, | |
| { | |
| "model": "gemini-2_5_flash_lite", | |
| "name": "Gemini 2.5 Flash Lite [reasoning: low]", | |
| "link": "https://aistudio.google.com", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9013296011196641, | |
| "bluex": 0.8400556328233658, | |
| "oab_exams": 0.6943052391799545, | |
| "assin2_sts": 0.755562697236674, | |
| "assin2_rte": 0.9464858475885941, | |
| "faquad_nli": 0.8703946691365647, | |
| "hatebr_offensive": 0.9080576836597871, | |
| "portuguese_hate_speech": 0.7416269940699909, | |
| "tweetsentbr": 0.7520493635069894 | |
| }, | |
| "result_metrics_average": 0.8233186364801761, | |
| "result_metrics_npm": 0.7360224650390731 | |
| }, | |
| { | |
| "model": "gemini-2_5_flash", | |
| "name": "Gemini 2.5 Flash", | |
| "link": "https://aistudio.google.com", | |
| "date": "2025-09-01", | |
| "status": "full", | |
| "main_language": "English", | |
| "model_type": "proprietary", | |
| "result_metrics": { | |
| "enem_challenge": 0.9097270818754374, | |
| "bluex": 0.8650904033379694, | |
| "oab_exams": 0.8355353075170843, | |
| "assin2_sts": 0.8714666962450285, | |
| "assin2_rte": 0.9386350099968783, | |
| "faquad_nli": 0.8578569197125898, | |
| "hatebr_offensive": 0.8933375064862327, | |
| "portuguese_hate_speech": 0.7502527990365506, | |
| "tweetsentbr": 0.7801286503914011 | |
| }, | |
| "result_metrics_average": 0.8557811527332413, | |
| "result_metrics_npm": 0.7734849178213028 | |
| } | |
| ] |