File size: 26,802 Bytes
5342d35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
,overall_rank,model_name,gpu_util_time,gpu_util_time_raw,full_time_from_gpu_log,parameters,parameters_raw,total_time,total_time_raw,batch_size,batch_sizes,"anli_r1(acc,none)","anli_r2(acc,none)","anli_r3(acc,none)","arc_challenge(acc_norm,none)","bbh(exact_match,get-answer)","boolq(acc,none)","drop(f1,none)","gpqa_main_zeroshot(acc_norm,none)","gsm8k(exact_match,strict-match)","hellaswag(acc_norm,none)","mmlu(acc,none)","nq_open(exact_match,remove_whitespace)","openbookqa(acc_norm,none)","piqa(acc_norm,none)","qnli(acc,none)","sciq(acc_norm,none)","triviaqa(exact_match,remove_whitespace)","truthfulqa_mc1(acc,none)","truthfulqa_mc2(acc,none)","winogrande(acc,none)","gsm8k(exact_match,strict-match)_rank","bbh(exact_match,get-answer)_rank","arc_challenge(acc_norm,none)_rank","anli_r1(acc,none)_rank","anli_r2(acc,none)_rank","anli_r3(acc,none)_rank","gpqa_main_zeroshot(acc_norm,none)_rank","hellaswag(acc_norm,none)_rank","piqa(acc_norm,none)_rank","winogrande(acc,none)_rank","boolq(acc,none)_rank","openbookqa(acc_norm,none)_rank","sciq(acc_norm,none)_rank","qnli(acc,none)_rank","mmlu(acc,none)_rank","nq_open(exact_match,remove_whitespace)_rank","drop(f1,none)_rank","truthfulqa_mc1(acc,none)_rank","truthfulqa_mc2(acc,none)_rank","triviaqa(exact_match,remove_whitespace)_rank",Reasoning & Math_mean,Reasoning & Math_rank,Commonsense & NLI_mean,Commonsense & NLI_rank,Knowledge & Reading_mean,Knowledge & Reading_rank,overall_mean
0,1,google_gemma-3-12b-it,14h 8m,50906.40000000001,15h 47m,12.2B,12187325040,15h 45m,56750.865892400994,auto,[2],0.603,0.56,0.5958333333333333,0.6109215017064846,0.8018737521118108,0.8746177370030581,0.13956585570469748,0.33705357142857145,0.8771796815769523,0.81876120294762,0.7161373023785785,0.15706371191135735,0.498,0.780739934711643,0.7457440966501923,0.954,0.2752452073116362,0.40514075887392903,0.5811825612574781,0.744277821625888,3.0,1.0,2.0,5.0,3.0,2.0,9.0,3.0,19.0,4.0,3.0,1.0,6.0,8.0,6.0,8.0,8.0,8.0,8.0,23.0,0.6265516914510217,1,0.7737343989912002,3,0.3790558995729461,10,0.6038169015266615
1,2,Qwen_Qwen3-14B (8bit),17h 29m,62956.2,29h 46m,14.8B,14768307200,29h 45m,107151.8020654649,1,[],0.646,0.57,0.5566666666666666,0.6006825938566553,0.432959606819229,0.8917431192660551,0.09041002516778524,0.39732142857142855,0.8984078847611827,0.7876916948814977,0.7694772824383991,0.09224376731301939,0.46,0.794885745375408,0.8442247849167124,0.966,0.4074899687917967,0.40636474908200737,0.5894043212858241,0.7205998421468035,1.0,29.0,4.0,4.0,2.0,3.0,1.0,12.0,14.0,11.0,1.0,8.0,1.0,2.0,2.0,15.0,18.0,7.0,6.0,13.0,0.586005454382166,3,0.780735026655211,2,0.39256501901313867,7,0.5961286740670235
2,3,openchat_openchat-3.6-8b-20240522,6h 59m,25150.800000000003,7h 52m,8.0B,8030261248,7h 51m,28278.859469966963,3,[],0.556,0.513,0.48,0.6032423208191127,0.617877438181539,0.872782874617737,0.2515687919463106,0.3325892857142857,0.7505686125852918,0.7978490340569607,0.6430707876370887,0.1706371191135734,0.462,0.8182807399347116,0.7300018304960645,0.964,0.5659273294694606,0.3525091799265606,0.4976009936991865,0.7632202052091555,16.0,10.0,3.0,9.0,8.0,11.0,11.0,6.0,3.0,1.0,4.0,7.0,2.0,9.0,14.0,5.0,3.0,15.0,19.0,3.0,0.5504682367571756,6,0.7725906691878042,5,0.4135523669653634,2,0.5871363271703519
3,4,Qwen_Qwen3-8B,13h 44m,49496.99999999999,15h 33m,8.2B,8190735360,15h 31m,55918.467860276,auto,[1],0.669,0.542,0.5558333333333333,0.5622866894197952,0.7975733374289664,0.8657492354740061,0.10987730704697922,0.35044642857142855,0.8726307808946171,0.7486556462856004,0.7289559891753311,0.07368421052631578,0.418,0.7752992383025027,0.7818048691195314,0.958,0.32060855996433346,0.3635250917992656,0.5431400402852035,0.6803472770323599,4.0,2.0,11.0,3.0,5.0,4.0,5.0,24.0,21.0,22.0,6.0,20.0,4.0,7.0,3.0,16.0,13.0,13.0,13.0,18.0,0.62139579566402,2,0.7468366094591429,8,0.35663186646623807,14,0.5858709017329785
4,5,Qwen_Qwen2.5-7B-Instruct,8h 33m,30831.600000000002,9h 38m,7.6B,7615616512,9h 36m,34616.604248491,3,[],0.685,0.549,0.5525,0.552901023890785,0.4487789894025495,0.863302752293578,0.07108850671140957,0.328125,0.7626990144048522,0.8049193387771361,0.7180601053980914,0.045706371191135735,0.486,0.8030467899891186,0.8045030203185063,0.937,0.32540124832813194,0.4773561811505508,0.6484829087237318,0.7119179163378059,12.0,27.0,12.0,2.0,4.0,5.0,12.0,5.0,10.0,14.0,7.0,2.0,12.0,4.0,4.0,23.0,27.0,2.0,2.0,17.0,0.5541434325283124,5,0.7729556882451636,4,0.3810158869171752,9,0.5787894583458693
5,6,Qwen_Qwen2.5-14B-Instruct (8bit),29h 32m,106374.6,52h 45m,14.8B,14770033664,52h 44m,189869.409403997,1,[],0.721,0.634,0.6175,0.6151877133105802,0.10689602211641837,0.8862385321100917,0.07127621644295308,0.3549107142857143,0.7922668688400303,0.8419637522405895,0.7830793334282866,0.06149584487534626,0.476,0.8171926006528836,0.8539264140582098,0.929,0.039288898796255016,0.5104039167686658,0.6830150175318825,0.7545382794001578,9.0,41.0,1.0,1.0,1.0,1.0,4.0,1.0,4.0,2.0,2.0,4.0,17.0,1.0,1.0,19.0,25.0,1.0,1.0,35.0,0.5488230455075348,7,0.7941227969231331,1,0.3580932046405649,13,0.5774590062429031
6,7,01-ai_Yi-1.5-9B,10h 26m,37569.6,11h 44m,8.8B,8829407232,11h 43m,42212.11262178002,2,[],0.532,0.48,0.43916666666666665,0.5469283276450512,0.7120258024880971,0.8581039755351682,0.44568582214765123,0.29464285714285715,0.6391205458680819,0.7789285002987453,0.6892892750320467,0.15318559556786704,0.456,0.8063112078346029,0.5086948563060589,0.952,0.5438029424877396,0.3219094247246022,0.46757177682431345,0.7261247040252565,25.0,5.0,15.0,12.0,12.0,18.0,20.0,14.0,6.0,9.0,10.0,9.0,7.0,28.0,8.0,10.0,1.0,19.0,26.0,4.0,0.5205548856872506,16,0.726594749142833,15,0.4369074727973701,1,0.5675746140297402
7,8,Qwen_Qwen2.5-7B-Instruct-1M,10h 10m,36621.00000000001,11h 18m,7.6B,7615616512,11h 17m,40632.813397081,auto,[1],0.585,0.533,0.5566666666666666,0.5853242320819113,0.27722316080479187,0.8525993883792049,0.05704697986577195,0.3392857142857143,0.7952994692949203,0.789982075283808,0.716635806865119,0.15761772853185596,0.48,0.8161044613710555,0.6781987918725975,0.95,0.4205305394560856,0.4259485924112607,0.6000723763902577,0.7277032359905288,8.0,38.0,7.0,6.0,6.0,3.0,8.0,11.0,5.0,8.0,12.0,3.0,8.0,11.0,5.0,7.0,29.0,3.0,3.0,12.0,0.5245427490191436,15,0.7563697075567422,7,0.39630867058672514,6,0.5672119609775775
8,9,meta-llama_Llama-3.1-8B-Instruct,10h 52m,39147.59999999999,12h 20m,8.0B,8030261248,12h 19m,44363.249360034,auto,[1],0.482,0.467,0.44333333333333336,0.5503412969283277,0.7155582859775764,0.8415902140672783,0.193729026845638,0.34375,0.7543593631539045,0.7920732921728739,0.6793191853012391,0.17756232686980608,0.432,0.8063112078346029,0.5013728720483251,0.962,0.518167632634864,0.3659730722154223,0.5411536633287395,0.7387529597474349,15.0,4.0,13.0,18.0,14.0,17.0,6.0,9.0,6.0,6.0,16.0,16.0,3.0,30.0,12.0,4.0,4.0,12.0,14.0,7.0,0.5366203256275918,12,0.7248715065529306,17,0.41265081786595154,3,0.5653173866229683
9,10,01-ai_Yi-1.5-9B-Chat,12h 15m,44120.4,13h 55m,8.8B,8829407232,13h 54m,50056.33134526899,2,[],0.535,0.509,0.5258333333333334,0.5870307167235495,0.6106588849639072,0.8681957186544342,0.125326132550335,0.30357142857142855,0.7081122062168309,0.7872933678550089,0.6840905853866971,0.009418282548476454,0.436,0.8035908596300326,0.7876624565257184,0.954,0.338664734730272,0.37454100367197063,0.5479336385093577,0.7466456195737964,18.0,12.0,6.0,11.0,9.0,6.0,18.0,13.0,9.0,3.0,5.0,14.0,6.0,6.0,9.0,36.0,9.0,10.0,11.0,15.0,0.5398866528298641,9,0.7690554317484272,6,0.34666239623285144,15,0.5621284484722574
10,11,mistralai_Ministral-8B-Instruct-2410,9h 27m,34053.6,10h 47m,8.0B,8019808256,10h 46m,38770.33925615801,auto,[1],0.488,0.487,0.4658333333333333,0.5622866894197952,0.6925203501766242,0.8602446483180428,0.07141254194630874,0.34151785714285715,0.7748294162244125,0.791077474606652,0.6407206950576841,0.15761772853185596,0.466,0.823177366702938,0.49496613582280796,0.956,0.5278087382969238,0.32558139534883723,0.48666999709167413,0.7379636937647988,11.0,7.0,11.0,16.0,11.0,14.0,7.0,10.0,2.0,7.0,8.0,6.0,5.0,39.0,15.0,7.0,24.0,18.0,22.0,5.0,0.5445696637567174,8,0.7327756170307486,12,0.3683018493788807,12,0.5575614030892774
11,12,meta-llama_Meta-Llama-3-8B-Instruct,5h 46m,20809.800000000003,6h 31m,8.0B,8030261248,6h 30m,23440.23442057398,3,[],0.484,0.458,0.4483333333333333,0.5639931740614335,0.6790047611733989,0.8311926605504587,0.16397651006711153,0.31026785714285715,0.756633813495072,0.7592113124875522,0.6387266771115225,0.15900277008310248,0.43,0.7872687704026116,0.5464030752333883,0.932,0.5112015158270173,0.3635250917992656,0.517141921073546,0.7166535122336227,14.0,8.0,10.0,17.0,18.0,16.0,16.0,19.0,18.0,13.0,19.0,17.0,15.0,20.0,16.0,6.0,5.0,13.0,17.0,8.0,0.5286047056008708,13,0.7146756187010904,22,0.39226241432692754,8,0.5528268378037648
12,13,Qwen_Qwen3-4B,5h 3m,18234.6,5h 52m,4.0B,4022468096,5h 51m,21077.94364616298,6,[],0.55,0.461,0.5133333333333333,0.5392491467576792,0.7522653970204269,0.8504587155963302,0.09770658557046955,0.32589285714285715,0.8567096285064443,0.6833300139414459,0.6835920809001567,0.014681440443213296,0.402,0.7513601741022851,0.8087131612667032,0.932,0.22503343736067766,0.3671970624235006,0.5475752916992002,0.6582478295185478,5.0,3.0,17.0,10.0,17.0,8.0,13.0,29.0,27.0,27.0,13.0,23.0,15.0,3.0,10.0,34.0,16.0,11.0,12.0,26.0,0.5712071946801058,4,0.7265871277750447,16,0.322630983066203,21,0.5510173077791636
13,14,NousResearch_Hermes-2-Pro-Mistral-7B,7h 28m,26916.0,8h 28m,7.2B,7241994240,8h 27m,30434.329021440935,3,[],0.531,0.496,0.5,0.5656996587030717,0.5737981876823837,0.8681957186544342,0.10975356543624108,0.2767857142857143,0.6853677028051555,0.8049193387771361,0.6051132317333713,0.04044321329639889,0.434,0.7986942328618063,0.5564708035877722,0.917,0.47113241194828354,0.41370869033047736,0.5911563612073706,0.7198105761641673,21.0,17.0,9.0,13.0,10.0,9.0,25.0,5.0,12.0,12.0,5.0,15.0,20.0,18.0,20.0,25.0,14.0,6.0,5.0,11.0,0.5183787519251893,17,0.7284415242921881,13,0.37188457899202376,11,0.5479524703736892
14,15,mistralai_Mistral-7B-Instruct-v0.3,7h 41m,27676.8,8h 39m,7.2B,7248023552,8h 38m,31084.838324364042,3,[],0.476,0.443,0.4483333333333333,0.5895904436860068,0.5625863922592536,0.8584097859327217,0.08997168624161092,0.28348214285714285,0.489764973464746,0.828918542123083,0.5971371599487253,0.15373961218836565,0.47,0.8269858541893362,0.5145524437122461,0.943,0.5683236736513598,0.42105263157894735,0.5968131853249063,0.7403314917127072,28.0,18.0,5.0,20.0,20.0,16.0,24.0,2.0,1.0,5.0,9.0,5.0,9.0,26.0,22.0,9.0,19.0,4.0,4.0,2.0,0.470393897942926,22,0.7403140168100134,9,0.40450632482231924,5,0.5450996676102245
15,16,google_gemma-3-4b-it,3h 50m,13811.399999999998,4h 52m,4.3B,4300079472,4h 51m,17460.233507480007,auto,[4],0.492,0.471,0.4683333333333333,0.5708191126279863,0.7094148364306558,0.8397553516819571,0.08928376677852287,0.28794642857142855,0.7619408642911296,0.7413861780521809,0.5755590371741917,0.10941828254847645,0.466,0.7720348204570185,0.5659893831228263,0.931,0.3148127507802051,0.3488372093023256,0.5188213608080946,0.7008681925808997,13.0,6.0,8.0,15.0,13.0,13.0,23.0,26.0,22.0,17.0,17.0,6.0,16.0,16.0,24.0,13.0,20.0,16.0,16.0,19.0,0.5373506536077904,11,0.7167191322706975,19,0.326122067898636,20,0.5367610454270617
16,17,01-ai_Yi-1.5-6B-Chat,7h 1m,25318.799999999996,8h 5m,6.1B,6061035520,8h 4m,29040.429801896913,2,[],0.477,0.453,0.46,0.5392491467576792,0.5478421133466441,0.847400611620795,0.11608116610738237,0.35714285714285715,0.6702047005307051,0.7674765982871938,0.6178607036034753,0.027146814404432132,0.436,0.7878128400435256,0.6794801391177009,0.934,0.33097414177440926,0.3769889840881273,0.5343714554244458,0.7095501183898973,22.0,21.0,17.0,19.0,19.0,15.0,3.0,16.0,17.0,15.0,15.0,14.0,14.0,10.0,18.0,29.0,12.0,9.0,15.0,16.0,0.5006341168254121,19,0.7373886153513018,10,0.33390387756704537,19,0.5334791195319635
17,18,01-ai_Yi-1.5-6B,3h 54m,14091.599999999999,4h 29m,6.1B,6061035520,4h 28m,16094.199660658,auto,[8],0.448,0.407,0.4066666666666667,0.49658703071672355,0.5754876363077869,0.8015290519877676,0.39946203859060414,0.29017857142857145,0.5223654283548143,0.7541326428998207,0.6242700470018516,0.1781163434903047,0.422,0.8014145810663765,0.598572213069742,0.941,0.4952073116362015,0.2998776009791922,0.44075044864963386,0.7205998421468035,27.0,16.0,20.0,21.0,26.0,27.0,22.0,22.0,11.0,11.0,23.0,19.0,10.0,13.0,17.0,3.0,2.0,23.0,32.0,10.0,0.449469333353509,24,0.7198926187386444,18,0.4062806317246313,4,0.5311608727496431
18,19,Qwen_Qwen2-7B-Instruct,10h 11m,36684.6,11h 31m,7.6B,7615616512,11h 30m,41431.857966535026,auto,[1],0.573,0.525,0.5225,0.5401023890784983,0.577484257410536,0.8562691131498471,0.052028104026845816,0.31473214285714285,0.6467020470053071,0.80601473809998,0.6994017946161516,0.013296398891966758,0.462,0.8057671381936888,0.5471352736591616,0.916,0.008136424431564869,0.40514075887392903,0.5734370927343657,0.6985003946329913,23.0,15.0,16.0,7.0,7.0,7.0,15.0,4.0,7.0,18.0,11.0,7.0,21.0,19.0,7.0,35.0,31.0,8.0,9.0,39.0,0.5285029766216406,14,0.7273838082479528,14,0.29190676226247064,24,0.5271324033830989
19,20,deepseek-ai_DeepSeek-R1-0528-Qwen3-8B,15h 30m,55855.200000000004,17h 59m,8.2B,8190735360,17h 57m,64675.53916321404,auto,[1],0.511,0.464,0.4766666666666667,0.5494880546075085,0.5840884656734756,0.8483180428134557,0.05327915268456377,0.37276785714285715,0.8127369219105383,0.7564230233021311,0.6829511465603191,0.018282548476454295,0.43,0.7568008705114254,0.5577521508328757,0.941,0.029480606330806954,0.3574051407588739,0.5590129767155586,0.675611681136543,7.0,14.0,14.0,14.0,16.0,12.0,2.0,20.0,25.0,24.0,14.0,17.0,10.0,17.0,11.0,33.0,30.0,14.0,10.0,36.0,0.5386782808572923,10,0.7094151097994902,23,0.28340192858776275,28,0.5218532653062027
20,21,meta-llama_Llama-3.2-3B-Instruct,5h 57m,21476.999999999996,7h 13m,3.2B,3212749824,7h 12m,25939.885958662,auto,[2],0.447,0.418,0.43083333333333335,0.4590443686006826,0.556442942712333,0.7847094801223241,0.15539429530201407,0.328125,0.6421531463229719,0.7054371639115714,0.6051844466600199,0.13905817174515236,0.358,0.7551686615886833,0.5451217279882848,0.932,0.33894337940258584,0.3268053855569155,0.49757877829085795,0.6708760852407262,24.0,20.0,25.0,22.0,23.0,21.0,12.0,27.0,26.0,25.0,26.0,28.0,15.0,21.0,19.0,12.0,7.0,17.0,20.0,14.0,0.46879982728133157,23,0.6787590169787985,30,0.3438274094929243,16,0.5047938183389229
21,22,Qwen_Qwen2.5-3B-Instruct,6h 30m,23452.2,7h 49m,3.1B,3085938688,7h 48m,28089.51656791498,auto:4,"[2, 64, 64, 64, 64]",0.562,0.466,0.49416666666666664,0.48208191126279865,0.24911687912763017,0.8012232415902141,0.0773332634228189,0.32142857142857145,0.10159211523881728,0.7490539733120892,0.6549636803874092,0.008310249307479225,0.422,0.780739934711643,0.7979132344865458,0.913,0.3009919750334374,0.41615667074663404,0.5860553667449402,0.6929755327545383,41.0,39.0,22.0,8.0,15.0,10.0,14.0,23.0,19.0,20.0,24.0,19.0,24.0,5.0,13.0,37.0,21.0,5.0,7.0,21.0,0.3823408776749263,32,0.7367008452650045,11,0.3406352009404532,17,0.4938551633111117
22,23,Qwen_Qwen2.5-Math-7B,24h 38m,88696.20000000001,27h 23m,7.6B,7615616512,27h 21m,98517.403244708,auto,[4],0.387,0.407,0.3825,0.5025597269624573,0.6724005529104592,0.745565749235474,0.043235109060402774,0.3080357142857143,0.8476118271417741,0.6528579964150567,0.5799031476997578,0.05096952908587258,0.392,0.7453754080522307,0.49807797913234486,0.929,0.2183459652251449,0.32068543451652387,0.48321897449850354,0.6479873717442778,6.0,9.0,18.0,30.0,26.0,31.0,17.0,31.0,29.0,29.0,33.0,24.0,17.0,33.0,23.0,22.0,33.0,20.0,23.0,27.0,0.501015403042915,18,0.6586949292256262,32,0.2827263600143676,29,0.4907165242982997
23,24,deepseek-ai_deepseek-llm-7b-chat,9h 8m,32906.40000000001,10h 8m,6.9B,6910365696,10h 6m,36412.969243890024,3,[],0.423,0.419,0.42083333333333334,0.49658703071672355,0.4547688527107971,0.8330275229357799,0.10304844798657667,0.2924107142857143,0.46398786959818045,0.7772356104361681,0.49878934624697335,0.06343490304709141,0.46,0.8014145810663765,0.49697968149368477,0.893,0.31119037004012484,0.3488372093023256,0.478933363826628,0.7016574585635359,29.0,26.0,20.0,26.0,22.0,23.0,21.0,15.0,11.0,16.0,18.0,8.0,29.0,35.0,32.0,18.0,15.0,16.0,24.0,20.0,0.42436968580639267,27,0.7090449792136494,24,0.30070560674162,23,0.48690681477950076
24,25,deepseek-ai_DeepSeek-R1-Distill-Llama-8B,10h 36m,38179.200000000004,11h 47m,8.0B,8030261248,11h 46m,42405.48981113202,auto:5,"[1, 64, 64, 64, 64, 64]",0.404,0.41,0.3883333333333333,0.4232081911262799,0.6037475042236216,0.8287461773700305,0.07122483221476522,0.27455357142857145,0.624715693707354,0.7429794861581358,0.5326876513317191,0.05844875346260388,0.41,0.7758433079434167,0.5147354933186894,0.899,0.19404814979937585,0.3219094247246022,0.5044600613345165,0.6779794790844514,26.0,13.0,31.0,29.0,25.0,29.0,26.0,25.0,20.0,23.0,20.0,21.0,27.0,25.0,27.0,20.0,26.0,19.0,18.0,28.0,0.44693689911702295,26,0.6927548491249605,27,0.28046314547793044,30,0.48303105552807335
25,26,meta-llama_Llama-2-13b-hf,17h 38m,63506.40000000001,19h 22m,13.0B,13015864320,19h 21m,69687.765641973,auto,[1],0.377,0.39,0.385,0.48976109215017066,0.47765320227307634,0.8064220183486238,0.030132130872483273,0.2544642857142857,0.22971948445792267,0.7938657637920733,0.5209371884346959,0.23628808864265927,0.452,0.8052230685527747,0.4953322350356947,0.935,0.6088386090057958,0.2594859241126071,0.3689920443187282,0.7221783741120757,36.0,25.0,21.0,31.0,29.0,30.0,32.0,8.0,8.0,10.0,22.0,10.0,13.0,38.0,31.0,1.0,38.0,33.0,40.0,1.0,0.3719425806564936,33,0.7157173514058917,20,0.33744566423116157,18,0.48191467549118333
26,27,meta-llama_Llama-2-13b-chat-hf,15h 37m,56271.600000000006,17h 9m,13.0B,13015864320,17h 8m,61732.05361796002,auto,[1],0.43,0.43,0.4141666666666667,0.5017064846416383,0.47796037475042236,0.8165137614678899,0.09150901845637556,0.29910714285714285,0.34723275208491283,0.7966540529774945,0.5312633527987466,0.10304709141274238,0.44,0.7932535364526659,0.5438403807431814,0.905,0.27245876058849755,0.2802937576499388,0.43962381047566673,0.7119179163378059,31.0,24.0,19.0,25.0,21.0,24.0,19.0,7.0,15.0,14.0,21.0,12.0,26.0,22.0,28.0,14.0,17.0,30.0,33.0,24.0,0.4143104887143975,28,0.7153113782827196,21,0.2863659652303279,26,0.48127744301808945
27,28,deepseek-ai_DeepSeek-R1-Distill-Qwen-7B,5h 43m,20637.000000000004,6h 29m,7.6B,7615616512,6h 28m,23311.02294069098,3,[],0.445,0.418,0.41,0.4377133105802048,0.556903701428352,0.7782874617737003,0.041197567114093975,0.33482142857142855,0.7862016679302501,0.6025692093208525,0.5263495228599915,0.03213296398891967,0.36,0.7165397170837867,0.5209591799377631,0.918,0.059239857333927774,0.28886168910648713,0.45631948777242143,0.5990528808208366,10.0,19.0,28.0,23.0,23.0,25.0,10.0,34.0,33.0,33.0,28.0,27.0,19.0,24.0,29.0,28.0,35.0,28.0,29.0,34.0,0.4840914440728908,21,0.6422012069909914,34,0.2340168480293069,35,0.4644074822811509
28,29,Qwen_Qwen2.5-1.5B-Instruct,2h 36m,9398.4,3h 21m,1.5B,1543714304,3h 20m,12036.56519530993,6,[],0.448,0.392,0.43166666666666664,0.4684300341296928,0.36922131776992784,0.7813455657492355,0.03905201342281896,0.28348214285714285,0.3191811978771797,0.6829316869149572,0.6005554764278592,0.04155124653739612,0.406,0.7584330794341676,0.5667215815485996,0.939,0.28260142666072224,0.31211750305997554,0.4657480128053983,0.6274664561957379,33.0,37.0,23.0,21.0,28.0,20.0,24.0,30.0,24.0,30.0,27.0,22.0,11.0,15.0,21.0,24.0,36.0,21.0,27.0,22.0,0.3874259084715157,31,0.680271195691814,29,0.29027094648569507,25,0.46077527040287397
29,30,Qwen_Qwen3-1.7B,3h 36m,13010.4,4h 26m,1.7B,1720574976,4h 25m,15915.268575096969,6,[],0.41,0.404,0.43416666666666665,0.43430034129692835,0.4825679619106128,0.7764525993883792,0.0752600671140939,0.29017857142857145,0.6899166034874905,0.6037641904003187,0.5537672696197123,0.0221606648199446,0.376,0.720348204570185,0.5105253523704925,0.914,0.13497547926883638,0.29498164014687883,0.4588116356646071,0.6085240726124704,20.0,23.0,29.0,28.0,27.0,19.0,22.0,33.0,32.0,31.0,30.0,26.0,23.0,27.0,25.0,30.0,23.0,25.0,28.0,32.0,0.44930430639861,25,0.6442306313345494,33,0.2566594594390122,34,0.45973506603830955
30,31,Qwen_Qwen2.5-Math-7B-Instruct,4h 57m,17861.4,5h 38m,7.6B,7615616512,5h 37m,20230.489568555,auto,[4],0.431,0.415,0.42916666666666664,0.4308873720136519,0.6140377822147136,0.6061162079510704,0.02729865771812086,0.28794642857142855,0.890068233510235,0.5881298546106354,0.5372454066372312,0.019944598337950138,0.334,0.6855277475516867,0.6774665934468241,0.858,0.007467677218011591,0.29865361077111385,0.4750352357815752,0.579321231254933,2.0,11.0,30.0,24.0,24.0,22.0,23.0,35.0,35.0,35.0,39.0,30.0,32.0,12.0,26.0,32.0,40.0,24.0,25.0,40.0,0.49972949756809937,20,0.6183659478307356,37,0.2276075310773338,36,0.4596156652127924
31,32,meta-llama_Llama-2-7b-chat-hf,6h 7m,22072.800000000003,6h 59m,6.7B,6738415616,6h 57m,25079.294749224995,auto,[4],0.417,0.41,0.4075,0.44283276450511944,0.4013208416525879,0.7978593272171254,0.11749685402684486,0.2611607142857143,0.23199393479909022,0.754829715196176,0.46360917248255235,0.0667590027700831,0.438,0.7714907508161044,0.5800842028189639,0.878,0.1903700401248328,0.3023255813953488,0.4532173856202156,0.664561957379637,35.0,33.0,27.0,27.0,25.0,26.0,31.0,21.0,23.0,26.0,25.0,13.0,31.0,14.0,33.0,17.0,11.0,22.0,30.0,29.0,0.3674011793203588,35,0.6978322790611438,25,0.2656296727366462,32,0.4525206122545198
32,33,meta-llama_Llama-2-7b-hf,4h 59m,17980.200000000004,5h 43m,6.7B,6738415616,5h 42m,20539.25803211797,auto,[4],0.364,0.372,0.37583333333333335,0.46245733788395904,0.3990170480724927,0.7773700305810397,0.0363349412751679,0.24107142857142858,0.1379833206974981,0.7600079665405298,0.4185301239139724,0.1889196675900277,0.442,0.7905331882480957,0.49917627677100496,0.91,0.5250780205082479,0.2521419828641371,0.38971625246980024,0.6898184688239937,40.0,34.0,24.0,33.0,31.0,33.0,34.0,18.0,16.0,21.0,29.0,11.0,25.0,31.0,37.0,2.0,37.0,34.0,38.0,6.0,0.3360517812226731,39,0.6955579901378091,26,0.3017868314368922,22,0.4515994694072364
33,34,deepseek-ai_deepseek-llm-7b-base,6h 26m,23180.4,7h 12m,6.9B,6910365696,7h 11m,25877.186720471946,3,[],0.34,0.363,0.3775,0.4453924914675768,0.4237444324988481,0.7235474006116208,0.042181208053691466,0.25223214285714285,0.16224412433661864,0.760605457080263,0.44281441390115367,0.15096952908587258,0.434,0.7976060935799782,0.4958813838550247,0.915,0.5003901025412394,0.23255813953488372,0.34921436699656794,0.6937647987371744,38.0,30.0,26.0,37.0,32.0,32.0,33.0,17.0,13.0,19.0,34.0,15.0,22.0,37.0,36.0,11.0,34.0,36.0,41.0,9.0,0.33773045588002665,38,0.6886293048377231,28,0.2863546266855681,27,0.4451323042568829
34,35,deepseek-ai_deepseek-math-7b-rl,7h 12m,25973.400000000005,8h 3m,6.9B,6910365696,8h 2m,28925.110782705015,3,[],0.368,0.389,0.405,0.48976109215017066,0.5246505913070189,0.7559633027522936,0.11902684563758364,0.27232142857142855,0.1425322213798332,0.6896036646086438,0.5249964392536676,0.03933518005540166,0.424,0.750272034820457,0.49899322716456157,0.928,0.17465448060633082,0.2876376988984088,0.4028843192592468,0.6511444356748224,39.0,22.0,21.0,32.0,30.0,28.0,27.0,28.0,28.0,28.0,32.0,18.0,18.0,32.0,30.0,26.0,10.0,29.0,37.0,31.0,0.370180761915493,34,0.671139523574397,31,0.2580891606184399,33,0.44188884810699347
35,36,meta-llama_Llama-3.2-1B-Instruct,2h 35m,9307.800000000001,3h 32m,1.2B,1235814400,3h 30m,12653.736081852,auto,[2],0.338,0.334,0.3725,0.38054607508532423,0.3781293196129627,0.6948012232415902,0.16348364093959766,0.27455357142857145,0.3373768006065201,0.6088428599880502,0.4589089873237431,0.056509695290858725,0.346,0.7421109902067464,0.4946000366099213,0.897,0.24994427106553724,0.2717258261933905,0.4382997975469724,0.601420678768745,32.0,36.0,32.0,38.0,37.0,34.0,26.0,32.0,30.0,32.0,35.0,29.0,28.0,40.0,34.0,21.0,6.0,31.0,34.0,25.0,0.3450151095333398,37,0.6263965412592933,36,0.2731453697266833,31,0.4219376886954266
36,37,google_gemma-3-1b-it,4h 52m,17533.8,6h 51m,999.9M,999885952,6h 50m,24641.929493917996,auto,[1],0.332,0.354,0.3566666666666667,0.38054607508532423,0.38227614805713406,0.7581039755351682,0.07615666946308705,0.265625,0.24715693707354056,0.5782712607050389,0.38591368750890187,0.03573407202216067,0.388,0.720892274211099,0.49405088779059125,0.858,0.18970129291127955,0.2460220318237454,0.38746290463027977,0.5895816890292028,34.0,35.0,32.0,39.0,34.0,36.0,30.0,36.0,31.0,34.0,31.0,25.0,32.0,41.0,39.0,27.0,22.0,35.0,39.0,30.0,0.33118154669752364,40,0.6267000124673,35,0.2201651097265757,38,0.40130807862566087
37,38,deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B,2h 52m,10353.600000000002,3h 42m,1.8B,1777088000,3h 40m,13254.913052365184,6,[],0.356,0.362,0.3625,0.3464163822525597,0.40592842881277835,0.6801223241590214,0.05068582214765113,0.27232142857142855,0.7012888551933283,0.44672376020713006,0.36063238854863977,0.006371191135734072,0.308,0.6577801958650707,0.5053999633900788,0.845,0.009028087382969237,0.2937576499388005,0.45174216968854103,0.5493291239147593,19.0,32.0,34.0,34.0,33.0,35.0,27.0,39.0,37.0,38.0,36.0,32.0,33.0,29.0,41.0,38.0,32.0,26.0,31.0,38.0,0.40092215640429923,30,0.57033648107658,39,0.19536955147372262,41,0.3985513885604245
38,39,Qwen_Qwen2.5-Math-1.5B-Instruct,2h 39m,9542.400000000001,3h 26m,1.5B,1543714304,3h 25m,12324.098490235978,auto:4,"[6, 64, 64, 64, 64]",0.342,0.341,0.35333333333333333,0.3651877133105802,0.43726002150207344,0.5694189602446483,0.023086199664429596,0.28348214285714285,0.7369219105382866,0.41655048795060745,0.3787921948440393,0.003878116343490305,0.286,0.6137105549510338,0.4973457807065715,0.718,0.004291127953633527,0.29008567931456547,0.4895014774253539,0.5256511444356748,17.0,28.0,33.0,36.0,36.0,37.0,24.0,40.0,38.0,39.0,40.0,33.0,35.0,34.0,40.0,39.0,41.0,27.0,21.0,41.0,0.4084550173630595,29,0.5180967040412194,41,0.198272465924252,40,0.38377484226877323
39,40,Qwen_Qwen3-0.6B,2h 53m,10404.6,3h 46m,596.0M,596049920,3h 45m,13547.446140576852,6,[],0.343,0.319,0.3441666666666667,0.34215017064846415,0.4148364306558132,0.6391437308868502,0.06054425335570444,0.2700892857142857,0.41243366186504926,0.47191794463254333,0.401296111665005,0.020498614958448753,0.32,0.6751904243743199,0.49606443346146806,0.833,0.019282211324119482,0.27050183598531213,0.4277423214808083,0.5516969218626677,30.0,31.0,35.0,35.0,38.0,39.0,28.0,38.0,36.0,37.0,38.0,31.0,34.0,36.0,38.0,31.0,28.0,32.0,35.0,37.0,0.34938231650718266,36,0.5695733507454069,40,0.19997755812823304,39,0.38162775097687635
40,41,Qwen_Qwen2.5-0.5B-Instruct,1h 48m,6532.8,2h 35m,494.0M,494032768,2h 34m,9253.074769329047,6,[],0.324,0.342,0.3475,0.3370307167235495,0.21379204423283674,0.6767584097859327,0.028644085570469928,0.26785714285714285,0.20773313115996966,0.5240987851025692,0.45755590371741917,0.020498614958448753,0.346,0.7040261153427638,0.5368844956983343,0.883,0.13419527418635754,0.2717258261933905,0.4183870928910321,0.5564325177584846,37.0,40.0,36.0,40.0,35.0,38.0,29.0,37.0,34.0,36.0,37.0,29.0,30.0,23.0,35.0,31.0,39.0,31.0,36.0,33.0,0.291416147853357,41,0.6038857605268693,38,0.22183446625285297,37,0.379906007808935
|