open_pt_llm_leaderboard / external_models_results.json
eduagarcia's picture
fix grok, update README
e0154ee
[
{
"model": "sabia-2-small",
"name": "Sabiá-2 Small",
"link": "https://www.maritaca.ai/",
"date": "2024-04-12",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7172848145556333,
"bluex": 0.5549374130737135,
"oab_exams": 0.6364464692482916,
"assin2_sts": 0.7053302344881672,
"assin2_rte": 0.9121728362223306,
"faquad_nli": 0.7575848453041435,
"hatebr_offensive": 0.753800795680591,
"portuguese_hate_speech": 0.6975326368290793,
"tweetsentbr": 0.7119699374276466
},
"result_metrics_average": 0.7163399980921773,
"result_metrics_npm": 0.5744541501392351
},
{
"model": "sabia-2-medium",
"name": "Sabiá-2 Medium",
"link": "https://www.maritaca.ai/",
"date": "2024-04-13",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8180545836249126,
"bluex": 0.717663421418637,
"oab_exams": 0.7321184510250569,
"assin2_sts": 0.7804108376537757,
"assin2_rte": 0.923459363368553,
"faquad_nli": 0.7657657657657658,
"hatebr_offensive": 0.8349989882997386,
"portuguese_hate_speech": 0.7379326358571694,
"tweetsentbr": 0.7269533040381798
},
"result_metrics_average": 0.7819285945613098,
"result_metrics_npm": 0.6676121786922709
},
{
"model": "gpt-3.5-turbo-0125",
"name": "GPT-3.5 Turbo (0125)",
"link": "https://www.openai.com/",
"date": "2024-03-08",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7214835549335199,
"bluex": 0.6244784422809457,
"oab_exams": 0.5430523917995445,
"assin2_sts": 0.7378460201077941,
"assin2_rte": 0.8823038414050672,
"faquad_nli": 0.746353108609074,
"hatebr_offensive": 0.8056205941193919,
"portuguese_hate_speech": 0.7363692688971499,
"tweetsentbr": 0.7028981330613626
},
"result_metrics_average": 0.7222672616904278,
"result_metrics_npm": 0.5841504766165372
},
{
"model": "claude-3-haiku-20240307",
"name": "Claude-3 Haiku (20240307)",
"link": "https://www.claude.ai/",
"date": "2024-04-13",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7718684394681595,
"bluex": 0.6662030598052852,
"oab_exams": 0.626879271070615,
"assin2_sts": 0.7892124744168747,
"assin2_rte": 0.9184462138121732,
"faquad_nli": 0.6340996599941455,
"hatebr_offensive": 0.8023698759439051,
"portuguese_hate_speech": 0.7342166269560177,
"tweetsentbr": 0.7303315733000207
},
"result_metrics_average": 0.7415141327519107,
"result_metrics_npm": 0.6037151240886439
},
{
"model": "gemini-1.0-pro",
"name": "Gemini 1.0 Pro",
"link": "https://ai.google.dev/",
"date": "2024-03-08",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7130860741777467,
"bluex": 0.5869262865090403,
"oab_exams": 0.4988610478359909,
"assin2_sts": 0.7058831239763663,
"assin2_rte": 0.8945993304651698,
"faquad_nli": 0.7070913567220611,
"hatebr_offensive": 0.8086330094493972,
"portuguese_hate_speech": 0.699119105113102,
"tweetsentbr": 0.6803240476660983
},
"result_metrics_average": 0.6993914868794414,
"result_metrics_npm": 0.551208000273598
},
{
"model": "gemini-1.5-pro-preview-0409",
"name": "Gemini 1.5 Pro Preview (0409)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2024-04-15",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8509447165850245,
"bluex": 0.7719054242002782,
"oab_exams": 0.6888382687927107,
"assin2_sts": 0.8159702278408203,
"assin2_rte": 0.9328989988467518,
"faquad_nli": 0.7290756302521009,
"hatebr_offensive": 0.8697698647467024,
"portuguese_hate_speech": 0.7539414414414414,
"tweetsentbr": 0.772785080895884
},
"result_metrics_average": 0.7984588504001905,
"result_metrics_npm": 0.6908188311933006
},
{
"model": "deepseek-v2-chat",
"name": "DeepSeek-V2 Chat (API)",
"link": "https://www.deepseek.com/",
"date": "2024-05-18",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7844646606018194,
"bluex": 0.6954102920723226,
"oab_exams": 0.564009111617312,
"assin2_sts": 0.8533174657651231,
"assin2_rte": 0.9440170304568147,
"faquad_nli": 0.7995469048381548,
"hatebr_offensive": 0.8842986491071644,
"portuguese_hate_speech": 0.7271736342651962,
"tweetsentbr": 0.6835304759163984
},
"result_metrics_average": 0.7706409138489229,
"result_metrics_npm": 0.655901521190756
},
{
"model": "gemini-1.5-flash-preview-0514",
"name": "Gemini 1.5 Flash Preview (0514)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2024-05-18",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8264520643806857,
"bluex": 0.7482614742698191,
"oab_exams": 0.6419134396355353,
"assin2_sts": 0.841655158151231,
"assin2_rte": 0.9362097477374545,
"faquad_nli": 0.8092185592185592,
"hatebr_offensive": 0.9099110141445836,
"portuguese_hate_speech": 0.6875904275305673,
"tweetsentbr": 0.7219800292667018
},
"result_metrics_average": 0.7914657682594597,
"result_metrics_npm": 0.6834036936130392
},
{
"model": "gemini-1.5-flash-001",
"name": "Gemini 1.5 Flash (001)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2024-08-09",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8306508047585724,
"bluex": 0.7579972183588317,
"oab_exams": 0.6446469248291572,
"assin2_sts": 0.838806085610371,
"assin2_rte": 0.9366169973822607,
"faquad_nli": 0.7963910785668922,
"hatebr_offensive": 0.9092078461170015,
"portuguese_hate_speech": 0.6932563987219857,
"tweetsentbr": 0.7312948963367732
},
"result_metrics_average": 0.7932075834090939,
"result_metrics_npm": 0.6855338135928848
},
{
"model": "gpt-4o-mini-2024-07-18",
"name": "GPT 4o Mini (2024-07-18)",
"link": "https://www.openai.com/",
"date": "2024-07-25",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7669699090272918,
"bluex": 0.6842837273991655,
"oab_exams": 0.6013667425968109,
"assin2_sts": 0.7259038954527597,
"assin2_rte": 0.942809846745341,
"faquad_nli": 0.819807735300693,
"hatebr_offensive": 0.8682357029532165,
"portuguese_hate_speech": 0.7501413502853012,
"tweetsentbr": 0.7509303825869922
},
"result_metrics_average": 0.7678276991497301,
"result_metrics_npm": 0.6595966999910003
},
{
"model": "nemotron-4-340b-instruct",
"name": "nvidia/Nemotron-4-340B-Instruct (Nvidia API)",
"link": "https://huggingface.co/nvidia/Nemotron-4-340B-Instruct",
"date": "2024-06-30",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 340.0,
"result_metrics": {
"enem_challenge": 0.6648005598320503,
"bluex": 0.6578581363004172,
"oab_exams": 0.7020501138952164,
"assin2_sts": 0.7857731021403329,
"assin2_rte": 0.9489354458928496,
"faquad_nli": 0.8194444444444444,
"hatebr_offensive": 0.8641580001234928,
"portuguese_hate_speech": 0.7761835184102864,
"tweetsentbr": 0.780880021326841
},
"result_metrics_average": 0.7777870380406591,
"result_metrics_npm": 0.6740728488043128
},
{
"model": "llama_405b_instruct",
"name": "meta-llama/Llama-3.1-405B-Instruct (Vertex AI)",
"link": "https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct",
"date": "2024-08-20",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 406.0,
"result_metrics": {
"enem_challenge": 0.8523442967109867,
"bluex": 0.8011126564673157,
"oab_exams": 0.7640091116173121,
"assin2_sts": 0.7888441732870783,
"assin2_rte": 0.9476445477916471,
"faquad_nli": 0.825063276593557,
"hatebr_offensive": 0.9073940659389119,
"portuguese_hate_speech": 0.7191480935512969,
"tweetsentbr": 0.7821434639106575
},
"result_metrics_average": 0.8208559650965292,
"result_metrics_npm": 0.7286932366792048
},
{
"model": "sabia-3-2024-07-15",
"name": "Sabiá-3 (2024-07-15)",
"link": "https://www.maritaca.ai/",
"date": "2024-08-20",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8789363191042687,
"bluex": 0.7899860917941586,
"oab_exams": 0.8391799544419134,
"assin2_sts": 0.8253863689009022,
"assin2_rte": 0.9477034821619312,
"faquad_nli": 0.8243848812618203,
"hatebr_offensive": 0.8278737774590023,
"portuguese_hate_speech": 0.7241071428571428,
"tweetsentbr": 0.7510613086648664
},
"result_metrics_average": 0.8231799251828895,
"result_metrics_npm": 0.7241097388486535
},
{
"model": "llama3_3_70b",
"name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)",
"link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 70.6,
"result_metrics": {
"enem_challenge": 0.8320503848845346,
"bluex": 0.7593880389429764,
"oab_exams": 0.6733485193621868,
"assin2_sts": 0.7275578599896508,
"assin2_rte": 0.9407071010860484,
"faquad_nli": 0.8787563033858187,
"hatebr_offensive": 0.9024358249091997,
"portuguese_hate_speech": 0.7042216543825339,
"tweetsentbr": 0.7076749453899551
},
"result_metrics_average": 0.791793403592545,
"result_metrics_npm": 0.6924788466103498
},
{
"model": "llama3_2_90b",
"name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)",
"link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 88.6,
"result_metrics": {
"enem_challenge": 0.821553533939818,
"bluex": 0.7482614742698191,
"oab_exams": 0.7061503416856492,
"assin2_sts": 0.7368518566379951,
"assin2_rte": 0.9216548775103446,
"faquad_nli": 0.8632015306122449,
"hatebr_offensive": 0.8965270877302478,
"portuguese_hate_speech": 0.7059127552081422,
"tweetsentbr": 0.7352076218951984
},
"result_metrics_average": 0.7928134532766066,
"result_metrics_npm": 0.6915070359785283
},
{
"model": "gemini-1.5-flash-002",
"name": "Gemini 1.5 Flash (002)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8327501749475158,
"bluex": 0.760778859527121,
"oab_exams": 0.6369020501138952,
"assin2_sts": 0.8380176734291938,
"assin2_rte": 0.941176117215237,
"faquad_nli": 0.8360786822325283,
"hatebr_offensive": 0.9046145161133335,
"portuguese_hate_speech": 0.7406414313684444,
"tweetsentbr": 0.6997509880131249
},
"result_metrics_average": 0.7989678325511549,
"result_metrics_npm": 0.6979777100000177
},
{
"model": "gemini-1.5-flash-8b-001",
"name": "Gemini 1.5 Flash 8B (001)",
"link": "https://aistudio.google.com",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7641707487753674,
"bluex": 0.6467315716272601,
"oab_exams": 0.5603644646924829,
"assin2_sts": 0.7638946799836569,
"assin2_rte": 0.9329452628161146,
"faquad_nli": 0.7937022965448601,
"hatebr_offensive": 0.850497640901663,
"portuguese_hate_speech": 0.7391317606010173,
"tweetsentbr": 0.7376684798923661
},
"result_metrics_average": 0.7543452117594209,
"result_metrics_npm": 0.6359642422837162
},
{
"model": "gemini-2.0-flash-001",
"name": "Gemini 2.0 Flash (001)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8789363191042687,
"bluex": 0.803894297635605,
"oab_exams": 0.7767653758542141,
"assin2_sts": 0.8440142633742483,
"assin2_rte": 0.9305165510724053,
"faquad_nli": 0.7533651260745065,
"hatebr_offensive": 0.8890432813545366,
"portuguese_hate_speech": 0.7655392938544128,
"tweetsentbr": 0.7652542619451799
},
"result_metrics_average": 0.8230365300299308,
"result_metrics_npm": 0.7253778946033657
},
{
"model": "gemini-2.0-flash-lite-001",
"name": "Gemini 2.0 Flash Lite (001)",
"link": "https://cloud.google.com/vertex-ai",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8509447165850245,
"bluex": 0.7872044506258693,
"oab_exams": 0.7061503416856492,
"assin2_sts": 0.8492479991621328,
"assin2_rte": 0.9216548775103446,
"faquad_nli": 0.7652777777777777,
"hatebr_offensive": 0.8522499647780968,
"portuguese_hate_speech": 0.7501387383201693,
"tweetsentbr": 0.7675746509081982
},
"result_metrics_average": 0.8056048352614735,
"result_metrics_npm": 0.6986042497176748
},
{
"model": "gemini-2.5-pro-exp-03-25",
"name": "Gemini 2.5 Pro Experimental [reasoning] (0325)",
"link": "https://aistudio.google.com",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9769069279216235,
"bluex": 0.9499304589707928,
"oab_exams": 0.9216400911161731,
"assin2_sts": 0.837785744915033,
"assin2_rte": 0.9415510158830285,
"faquad_nli": 0.8738735797309651,
"hatebr_offensive": 0.9248478168290788,
"portuguese_hate_speech": 0.7336133105156697,
"tweetsentbr": 0.7928002469993594
},
"result_metrics_average": 0.8836610214313025,
"result_metrics_npm": 0.8134610556797854
},
{
"model": "deepSeek-v3-0324",
"name": "deepseek-ai/DeepSeek-V3-0324 (API)",
"link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 685.0,
"result_metrics": {
"enem_challenge": 0.8901329601119664,
"bluex": 0.8414464534075105,
"oab_exams": 0.7148063781321184,
"assin2_sts": 0.8145997097875548,
"assin2_rte": 0.9421860387625551,
"faquad_nli": 0.796751127001399,
"hatebr_offensive": 0.9060129756724185,
"portuguese_hate_speech": 0.7262480672025753,
"tweetsentbr": 0.7037326638925795
},
"result_metrics_average": 0.8151018193300753,
"result_metrics_npm": 0.7165435243787625
},
{
"model": "qwen2-5-vl-72b-instruct",
"name": "Qwen/Qwen2.5-VL-72B-Instruct (API)",
"link": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 73.4,
"result_metrics": {
"enem_challenge": 0.8600419874037789,
"bluex": 0.8052851182197497,
"oab_exams": 0.6888382687927107,
"assin2_sts": 0.7595538567467497,
"assin2_rte": 0.9472975104201871,
"faquad_nli": 0.8447190882122586,
"hatebr_offensive": 0.8810695094657859,
"portuguese_hate_speech": 0.769596419318135,
"tweetsentbr": 0.5644757075411895
},
"result_metrics_average": 0.7912086073467273,
"result_metrics_npm": 0.6888261361422966
},
{
"model": "qwen2-5-72b-instruct",
"name": "Qwen/Qwen2.5-72B-Instruct (API)",
"link": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 72.7,
"result_metrics": {
"enem_challenge": 0.8432470258922323,
"bluex": 0.780250347705146,
"oab_exams": 0.675626423690205,
"assin2_sts": 0.8230708844558656,
"assin2_rte": 0.9509720145268106,
"faquad_nli": 0.8194444444444444,
"hatebr_offensive": 0.8810033427242816,
"portuguese_hate_speech": 0.7601866578782712,
"tweetsentbr": 0.7620172222071487
},
"result_metrics_average": 0.8106464848360451,
"result_metrics_npm": 0.7142994872542282
},
{
"model": "qwen2-5-vl-32b-instruct",
"name": "Qwen/Qwen2.5-VL-32B-Instruct (API)",
"link": "https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 33.5,
"result_metrics": {
"enem_challenge": 0.8600419874037789,
"bluex": 0.8052851182197497,
"oab_exams": 0.6888382687927107,
"assin2_sts": 0.7780549055529008,
"assin2_rte": 0.9472975104201871,
"faquad_nli": 0.8447190882122586,
"hatebr_offensive": 0.8810695094657859,
"portuguese_hate_speech": 0.769596419318135,
"tweetsentbr": 0.7027408707999051
},
"result_metrics_average": 0.8086270753539346,
"result_metrics_npm": 0.7137431116807307
},
{
"model": "qwen-turbo-2024-11-01",
"name": "Qwen-Turbo (2024-11-01)",
"link": "https://www.alibabacloud.com/en/product/modelstudio",
"date": "2025-04-03",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.7795661301609517,
"bluex": 0.7079276773296245,
"oab_exams": 0.6091116173120729,
"assin2_sts": 0.7640477700456898,
"assin2_rte": 0.9260451969385788,
"faquad_nli": 0.8128063725490196,
"hatebr_offensive": 0.8567933277676292,
"portuguese_hate_speech": 0.7239183383094245,
"tweetsentbr": 0.7038360447972195
},
"result_metrics_average": 0.7648947194678011,
"result_metrics_npm": 0.6490441260447987
},
{
"model": "gpt-4o-2024-08-06",
"name": "GPT-4o (2024-08-06)",
"link": "https://www.openai.com/",
"date": "2025-04-09",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8530440867739678,
"bluex": 0.7969401947148818,
"oab_exams": 0.8200455580865603,
"assin2_sts": 0.8078677969518289,
"assin2_rte": 0.9407235712144604,
"faquad_nli": 0.8654396266184885,
"hatebr_offensive": 0.9320137873994456,
"portuguese_hate_speech": 0.7512552701451538,
"tweetsentbr": 0.7761054092302796
},
"result_metrics_average": 0.8381594779038962,
"result_metrics_npm": 0.7566365012704034
},
{
"model": "claude-3-7-sonnet-20250219",
"name": "Claude 3.7 Sonnet (2025-02-19)",
"link": "https://www.anthropic.com/",
"date": "2025-04-04",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8901329601119664,
"bluex": 0.8456189151599444,
"oab_exams": 0.8355353075170843,
"assin2_sts": 0.8087979933117393,
"assin2_rte": 0.9472965253044003,
"faquad_nli": 0.8097848807348216,
"hatebr_offensive": 0.9125114739050616,
"portuguese_hate_speech": 0.7698524509742262,
"tweetsentbr": 0.7842080985659372
},
"result_metrics_average": 0.8448598450650201,
"result_metrics_npm": 0.7622301724524201
},
{
"model": "llama-4-scout-16e",
"name": "meta-llama/Llama-4-Scout-17B-16E-Instruct (Groq API)",
"link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
"date": "2025-04-05",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 109.0,
"result_metrics": {
"enem_challenge": 0.8054583624912526,
"bluex": 0.721835883171071,
"oab_exams": 0.6815489749430524,
"assin2_sts": 0.7741640227983941,
"assin2_rte": 0.9312877465954967,
"faquad_nli": 0.8567037452287072,
"hatebr_offensive": 0.8813700069483281,
"portuguese_hate_speech": 0.7009183720501475,
"tweetsentbr": 0.7277278145615887
},
"result_metrics_average": 0.7867794365320042,
"result_metrics_npm": 0.6811274967601382
},
{
"model": "llama-4-maverick-128e",
"name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct (FireworksAI API)",
"link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
"date": "2025-04-05",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 402.0,
"result_metrics": {
"enem_challenge": 0.8775367389783065,
"bluex": 0.8122392211404729,
"oab_exams": 0.7284738041002278,
"assin2_sts": 0.7333246903202654,
"assin2_rte": 0.9329419027588105,
"faquad_nli": 0.7823695413019562,
"hatebr_offensive": 0.9047550357833591,
"portuguese_hate_speech": 0.7231286908077994,
"tweetsentbr": 0.7165294511353842
},
"result_metrics_average": 0.8012554529251759,
"result_metrics_npm": 0.6997802853383734
},
{
"model": "gemma-3-27b-it",
"name": "google/gemma-3-27b-it (GoogleAI API)",
"link": "https://huggingface.co/google/gemma-3-27b-it",
"date": "2025-04-08",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 27.4,
"result_metrics": {
"enem_challenge": 0.814555633310007,
"bluex": 0.7385257301808067,
"oab_exams": 0.6159453302961275,
"assin2_sts": 0.8147646517017526,
"assin2_rte": 0.9411147367212748,
"faquad_nli": 0.8143210816987241,
"hatebr_offensive": 0.8729414870796344,
"portuguese_hate_speech": 0.7264768061421736,
"tweetsentbr": 0.7448943824093712
},
"result_metrics_average": 0.7870599821710969,
"result_metrics_npm": 0.6795192293708728
},
{
"model": "deepseek-v3_1",
"name": "deepseek-ai/DeepSeek-V3.1 (API)",
"link": "https://huggingface.co/deepseek-ai/DeepSeek-V3.1",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 685.0,
"result_metrics": {
"enem_challenge": 0.8887333799860042,
"bluex": 0.8178025034770514,
"oab_exams": 0.7038724373576309,
"assin2_sts": 0.8082104938836681,
"assin2_rte": 0.949346100935343,
"faquad_nli": 0.8406862745098038,
"hatebr_offensive": 0.9211711711711712,
"portuguese_hate_speech": 0.7423067698027224,
"tweetsentbr": 0.7584190029617157
},
"result_metrics_average": 0.8256164593427902,
"result_metrics_npm": 0.7370296776379883
},
{
"model": "kimi-k2",
"name": "moonshotai/Kimi-K2-Instruct (API)",
"link": "https://huggingface.co/moonshotai/Kimi-K2-Instruct",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "chat",
"params": 1000.0,
"result_metrics": {
"enem_challenge": 0.8789363191042687,
"bluex": 0.827538247566064,
"oab_exams": 0.6970387243735763,
"assin2_sts": 0.7760142475181766,
"assin2_rte": 0.9436236879837872,
"faquad_nli": 0.8531466083708024,
"hatebr_offensive": 0.8941562198649953,
"portuguese_hate_speech": 0.7535500455551216,
"tweetsentbr": 0.7428370464802363
},
"result_metrics_average": 0.8185379052018921,
"result_metrics_npm": 0.7275664672121565
},
{
"model": "sabia-3-1-2025-05-08",
"name": "Sabiá-3.1 (2025-05-08)",
"link": "https://www.maritaca.ai/",
"date": "2025-09-01",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8894331700489853,
"bluex": 0.8178025034770514,
"oab_exams": 0.9202733485193622,
"assin2_sts": 0.8340482244079774,
"assin2_rte": 0.9423587830430271,
"faquad_nli": 0.7585644282172838,
"hatebr_offensive": 0.8308611905928697,
"portuguese_hate_speech": 0.7543648446960096,
"tweetsentbr": 0.7398273232644036
},
"result_metrics_average": 0.8319482018074411,
"result_metrics_npm": 0.7331597943893793
},
{
"model": "sabia-3-2024-12-11",
"name": "Sabiá-3 (2024-12-11)",
"link": "https://www.maritaca.ai/",
"date": "2025-09-01",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8691392582225332,
"bluex": 0.7872044506258693,
"oab_exams": 0.8009111617312072,
"assin2_sts": 0.7850131735268517,
"assin2_rte": 0.9390382723900459,
"faquad_nli": 0.7968815254182839,
"hatebr_offensive": 0.8608047226969084,
"portuguese_hate_speech": 0.7474723628059027,
"tweetsentbr": 0.7360466511491278
},
"result_metrics_average": 0.8136123976185256,
"result_metrics_npm": 0.7144701465854594
},
{
"model": "sabiazinho-3",
"name": "Sabiázinho-3 (2025-02-06)",
"link": "https://www.maritaca.ai/",
"date": "2025-09-01",
"status": "full",
"main_language": "Portuguese",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8439468159552135,
"bluex": 0.7343532684283728,
"oab_exams": 0.8159453302961276,
"assin2_sts": 0.8091208202474276,
"assin2_rte": 0.9370511249219384,
"faquad_nli": 0.7715445403113343,
"hatebr_offensive": 0.8604320820258526,
"portuguese_hate_speech": 0.7129508077161507,
"tweetsentbr": 0.6798994954276046
},
"result_metrics_average": 0.7961382539255579,
"result_metrics_npm": 0.685954609257193
},
{
"model": "grok-3-mini",
"name": "Grok 3 Mini [reasoning] (API)",
"link": "https://x.ai/",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9412176347095871,
"bluex": 0.8984700973574409,
"oab_exams": 0.7075170842824602,
"assin2_sts": 0.7846153023166811,
"assin2_rte": 0.9369863526592658,
"faquad_nli": 0.8974457100080231,
"hatebr_offensive": 0.9264201247592199,
"portuguese_hate_speech": 0.6868265194640906,
"tweetsentbr": 0.7496188889954271
},
"result_metrics_average": 0.836568634950244,
"result_metrics_npm": 0.7505284631974409
},
{
"model": "gpt-5-nano-2025-08-07",
"name": "GPT 5 Nano [reasoning] (2025-08-07)",
"link": "https://www.openai.com/",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9013296011196641,
"bluex": 0.8525730180806675,
"oab_exams": 0.5913439635535308,
"assin2_sts": 0.7157982790377855,
"assin2_rte": 0.9493397775671237,
"faquad_nli": 0.802473455931782,
"hatebr_offensive": 0.9169693400085076,
"portuguese_hate_speech": 0.7166590126291619,
"tweetsentbr": 0.7385573150818597
},
"result_metrics_average": 0.7983381958900091,
"result_metrics_npm": 0.699331432280926
},
{
"model": "gpt-5-mini-2025-08-07",
"name": "GPT 5 Mini [reasoning] (2025-08-07)",
"link": "https://www.openai.com/",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9566130160951715,
"bluex": 0.913769123783032,
"oab_exams": 0.7184510250569476,
"assin2_sts": 0.8151992531421179,
"assin2_rte": 0.9486789502727531,
"faquad_nli": 0.7959895379250218,
"hatebr_offensive": 0.9306148454596409,
"portuguese_hate_speech": 0.7476857189919288,
"tweetsentbr": 0.7208063363431595
},
"result_metrics_average": 0.8386453118966414,
"result_metrics_npm": 0.7509015993727701
},
{
"model": "gpt-5_reasoning_minimal-2025-08-07",
"name": "GPT 5 [reasoning: minimal] (2025-08-07)",
"link": "https://www.openai.com/",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8432470258922323,
"bluex": 0.7885952712100139,
"oab_exams": 0.8104783599088838,
"assin2_sts": 0.7497712012355019,
"assin2_rte": 0.9497544911228829,
"faquad_nli": 0.9049032312001003,
"hatebr_offensive": 0.9233018502276624,
"portuguese_hate_speech": 0.7502183789864052,
"tweetsentbr": 0.7877925879277
},
"result_metrics_average": 0.8342291553012646,
"result_metrics_npm": 0.7560493865775754
},
{
"model": "gemini-2_5_flash_lite",
"name": "Gemini 2.5 Flash Lite",
"link": "https://aistudio.google.com",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.8257522743177047,
"bluex": 0.7329624478442281,
"oab_exams": 0.6783599088838269,
"assin2_sts": 0.8399704980607736,
"assin2_rte": 0.9095975398498664,
"faquad_nli": 0.8289944389172974,
"hatebr_offensive": 0.8733247194142535,
"portuguese_hate_speech": 0.7511757826108595,
"tweetsentbr": 0.7696375203962748
},
"result_metrics_average": 0.8010861255883428,
"result_metrics_npm": 0.6977608761930978
},
{
"model": "gemini-2_5_flash_lite",
"name": "Gemini 2.5 Flash Lite [reasoning: low]",
"link": "https://aistudio.google.com",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9013296011196641,
"bluex": 0.8400556328233658,
"oab_exams": 0.6943052391799545,
"assin2_sts": 0.755562697236674,
"assin2_rte": 0.9464858475885941,
"faquad_nli": 0.8703946691365647,
"hatebr_offensive": 0.9080576836597871,
"portuguese_hate_speech": 0.7416269940699909,
"tweetsentbr": 0.7520493635069894
},
"result_metrics_average": 0.8233186364801761,
"result_metrics_npm": 0.7360224650390731
},
{
"model": "gemini-2_5_flash",
"name": "Gemini 2.5 Flash",
"link": "https://aistudio.google.com",
"date": "2025-09-01",
"status": "full",
"main_language": "English",
"model_type": "proprietary",
"result_metrics": {
"enem_challenge": 0.9097270818754374,
"bluex": 0.8650904033379694,
"oab_exams": 0.8355353075170843,
"assin2_sts": 0.8714666962450285,
"assin2_rte": 0.9386350099968783,
"faquad_nli": 0.8578569197125898,
"hatebr_offensive": 0.8933375064862327,
"portuguese_hate_speech": 0.7502527990365506,
"tweetsentbr": 0.7801286503914011
},
"result_metrics_average": 0.8557811527332413,
"result_metrics_npm": 0.7734849178213028
}
]