diff --git a/results/cross_lingual/few_shot/cross_logiqa.csv b/results/cross_lingual/few_shot/cross_logiqa.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..71aa547ad4422f93cf50235ccb0580bd32aa00cc 100644 --- a/results/cross_lingual/few_shot/cross_logiqa.csv +++ b/results/cross_lingual/few_shot/cross_logiqa.csv @@ -1 +1,5 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Meta-Llama-3-70B,0.6152597402597404,0.49480519480519464,0.5484971301967684,0.7272727272727273,0.6534090909090909,0.625,0.5681818181818182,0.6136363636363636,0.5795454545454546,0.5397727272727273 +Meta-Llama-3-8B,0.44967532467532456,0.2623376623376623,0.33136129711503204,0.5227272727272727,0.4431818181818182,0.44886363636363635,0.44886363636363635,0.3693181818181818,0.4602272727272727,0.45454545454545453 +llama3-8b-cpt-sea-lionv2-base,0.43993506493506496,0.27012987012987016,0.3347288285088485,0.5170454545454546,0.4375,0.4431818181818182,0.4772727272727273,0.4090909090909091,0.4659090909090909,0.32954545454545453 +Meta-Llama-3.1-8B,0.46266233766233766,0.277435064935065,0.34686989908229837,0.5284090909090909,0.5,0.4375,0.4772727272727273,0.4318181818181818,0.4431818181818182,0.42045454545454547 diff --git a/results/cross_lingual/few_shot/cross_mmlu.csv b/results/cross_lingual/few_shot/cross_mmlu.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..d7e22ffc6e289ec10a0f1897904287c845324841 100644 --- a/results/cross_lingual/few_shot/cross_mmlu.csv +++ b/results/cross_lingual/few_shot/cross_mmlu.csv @@ -1 +1,5 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Meta-Llama-3-70B,0.7552380952380952,0.6674285714285715,0.708623453080271,0.8066666666666666,0.7266666666666667,0.7866666666666666,0.7533333333333333,0.7733333333333333,0.72,0.72 +Meta-Llama-3-8B,0.5295238095238096,0.31923809523809527,0.3983311959862401,0.6266666666666667,0.5466666666666666,0.56,0.4866666666666667,0.5266666666666666,0.5,0.46 +llama3-8b-cpt-sea-lionv2-base,0.5228571428571429,0.32704761904761903,0.402396106759339,0.6533333333333333,0.44,0.5066666666666667,0.47333333333333333,0.58,0.5466666666666666,0.46 +Meta-Llama-3.1-8B,0.5342857142857141,0.2960000000000001,0.3809497590731823,0.6733333333333333,0.5533333333333333,0.5133333333333333,0.47333333333333333,0.5133333333333333,0.5,0.5133333333333333 diff --git a/results/cross_lingual/few_shot/cross_xquad.csv b/results/cross_lingual/few_shot/cross_xquad.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..83336b0ad91d422259c6ab2cfee934cd0208f483 100644 --- a/results/cross_lingual/few_shot/cross_xquad.csv +++ b/results/cross_lingual/few_shot/cross_xquad.csv @@ -1 +1,5 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Meta-Llama-3-70B,0.9596638655462185,0.9359243697478992,0.9476454662047799,0.9697478991596639,0.9504201680672268,0.957983193277311,0.9605042016806723,,, +Meta-Llama-3-8B,0.8928571428571429,0.8163865546218487,0.8529112234365448,0.926890756302521,0.8823529411764706,0.888235294117647,0.8739495798319328,,, +llama3-8b-cpt-sea-lionv2-base,0.9029411764705881,0.842016806722689,0.8714154189951169,0.9218487394957983,0.8815126050420168,0.9058823529411765,0.9025210084033614,,, +Meta-Llama-3.1-8B,0.9052521008403361,0.8355042016806722,0.8689808363106925,0.9352941176470588,0.8932773109243698,0.9,0.892436974789916,,, diff --git a/results/cross_lingual/zero_shot/cross_logiqa.csv b/results/cross_lingual/zero_shot/cross_logiqa.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..5e492c2a82a38de35408f0f5ca984236a5fdd97c 100644 --- a/results/cross_lingual/zero_shot/cross_logiqa.csv +++ b/results/cross_lingual/zero_shot/cross_logiqa.csv @@ -1 +1,10 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Qwen2-7B-Instruct,0.5673701298701299,0.477922077922078,0.5188189663543613,0.6590909090909091,0.6704545454545454,0.5340909090909091,0.5625,0.5340909090909091,0.5397727272727273,0.4715909090909091 +Meta-Llama-3.1-8B-Instruct,0.43993506493506496,0.33425324675324675,0.37988102268160845,0.5113636363636364,0.45454545454545453,0.4772727272727273,0.48295454545454547,0.3977272727272727,0.39204545454545453,0.36363636363636365 +Qwen2-72B-Instruct,0.6753246753246753,0.6814935064935067,0.6783950674333673,0.75,0.8125,0.6647727272727273,0.6136363636363636,0.6420454545454546,0.6590909090909091,0.5852272727272727 +Meta-Llama-3-8B-Instruct,0.4115259740259741,0.34042207792207796,0.3726122484532397,0.48863636363636365,0.4659090909090909,0.42613636363636365,0.4034090909090909,0.4034090909090909,0.36363636363636365,0.32954545454545453 +SeaLLMs-v3-7B-Chat,0.5633116883116883,0.5176948051948052,0.5395407640365807,0.6079545454545454,0.7045454545454546,0.5681818181818182,0.5511363636363636,0.5340909090909091,0.5170454545454546,0.4602272727272727 +gemma-2-9b-it,0.6193181818181818,0.5688311688311687,0.5930020245684557,0.6818181818181818,0.6590909090909091,0.5625,0.6193181818181818,0.5909090909090909,0.6306818181818182,0.5909090909090909 +Meta-Llama-3-70B-Instruct,0.6290584415584416,0.6181818181818182,0.6235727047409828,0.6988636363636364,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273 +gemma-2-2b-it,0.48214285714285715,0.44772727272727286,0.4642981843076105,0.5625,0.5113636363636364,0.48863636363636365,0.5,0.4431818181818182,0.4659090909090909,0.4034090909090909 +llama3-8b-cpt-sea-lionv2-instruct,0.43831168831168826,0.38831168831168833,0.41179951229957745,0.4943181818181818,0.48295454545454547,0.48295454545454547,0.4318181818181818,0.4147727272727273,0.38636363636363635,0.375 diff --git a/results/cross_lingual/zero_shot/cross_mmlu.csv b/results/cross_lingual/zero_shot/cross_mmlu.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..c8a9ce32a2493a2684a540a3295aeed7bbc6a22e 100644 --- a/results/cross_lingual/zero_shot/cross_mmlu.csv +++ b/results/cross_lingual/zero_shot/cross_mmlu.csv @@ -1 +1,10 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Qwen2-7B-Instruct,0.6495238095238095,0.529714285714286,0.5835327779462245,0.74,0.6733333333333333,0.7,0.6,0.6533333333333333,0.6333333333333333,0.5466666666666666 +Meta-Llama-3.1-8B-Instruct,0.5771428571428572,0.47047619047619055,0.5183792207297393,0.6933333333333334,0.5333333333333333,0.6266666666666667,0.54,0.54,0.54,0.5666666666666667 +Qwen2-72B-Instruct,0.7714285714285715,0.7765714285714286,0.773991456997936,0.8,0.78,0.7866666666666666,0.7333333333333333,0.76,0.78,0.76 +Meta-Llama-3-8B-Instruct,0.5276190476190475,0.3792380952380953,0.4412894449458876,0.62,0.5066666666666667,0.5066666666666667,0.5466666666666666,0.49333333333333335,0.52,0.5 +SeaLLMs-v3-7B-Chat,0.6580952380952381,0.6253333333333335,0.641296131344116,0.7466666666666667,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.66,0.58,0.5866666666666667 +gemma-2-9b-it,0.7114285714285715,0.7201904761904762,0.7157827111185566,0.76,0.7333333333333333,0.7,0.66,0.7066666666666667,0.6933333333333334,0.7266666666666667 +Meta-Llama-3-70B-Instruct,0.7542857142857143,0.7228571428571428,0.7382370820168919,0.7933333333333333,0.74,0.7666666666666667,0.7466666666666667,0.7666666666666667,0.72,0.7466666666666667 +gemma-2-2b-it,0.5752380952380953,0.5333333333333332,0.5534936998355239,0.6866666666666666,0.5866666666666667,0.6066666666666667,0.5466666666666666,0.5466666666666666,0.5133333333333333,0.54 +llama3-8b-cpt-sea-lionv2-instruct,0.5466666666666667,0.4720000000000001,0.5065968585890122,0.66,0.49333333333333335,0.5466666666666666,0.5866666666666667,0.5666666666666667,0.5066666666666667,0.4666666666666667 diff --git a/results/cross_lingual/zero_shot/cross_xquad.csv b/results/cross_lingual/zero_shot/cross_xquad.csv index c70b781c2b64d574dce967ddf17d8399f2a2a234..7dc7520524ffcc3abd27ff8813dc18725848c830 100644 --- a/results/cross_lingual/zero_shot/cross_xquad.csv +++ b/results/cross_lingual/zero_shot/cross_xquad.csv @@ -1 +1,10 @@ Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino +Qwen2-7B-Instruct,0.940546218487395,0.9016806722689076,0.9207034712119446,0.9521008403361344,0.9352941176470588,0.9445378151260504,0.9302521008403362,,, +Meta-Llama-3.1-8B-Instruct,0.9340336134453782,0.8831932773109243,0.9079022683718587,0.9369747899159664,0.9302521008403362,0.946218487394958,0.9226890756302522,,, +Qwen2-72B-Instruct,0.9611344537815126,0.9506302521008403,0.9558534951942531,0.9638655462184874,0.9554621848739496,0.9613445378151261,0.9638655462184874,,, +Meta-Llama-3-8B-Instruct,0.8756302521008403,0.7699579831932772,0.8194012188828194,0.8815126050420168,0.8420168067226891,0.9092436974789916,0.8697478991596639,,, +SeaLLMs-v3-7B-Chat,0.9394957983193277,0.9172268907563025,0.9282278015934072,0.9512605042016806,0.938655462184874,0.938655462184874,0.9294117647058824,,, +gemma-2-9b-it,0.9571428571428572,0.9352941176470588,0.9460923622945893,0.9663865546218487,0.9411764705882353,0.9613445378151261,0.9596638655462185,,, +Meta-Llama-3-70B-Instruct,0.9586134453781513,0.9434873949579832,0.9509902767764395,0.9705882352941176,0.9394957983193277,0.9596638655462185,0.9647058823529412,,, +gemma-2-2b-it,0.9149159663865546,0.8632352941176471,0.888324599638689,0.9302521008403362,0.9016806722689076,0.9184873949579831,0.9092436974789916,,, +llama3-8b-cpt-sea-lionv2-instruct,0.8930672268907562,0.8262605042016806,0.8583659343003551,0.9142857142857143,0.8798319327731092,0.8890756302521008,0.8890756302521008,,, diff --git a/results/cultural_reasoning/few_shot/cn_eval.csv b/results/cultural_reasoning/few_shot/cn_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..92dfeb51c7f4553bb7b9c067b99fc44445ca6b45 100644 --- a/results/cultural_reasoning/few_shot/cn_eval.csv +++ b/results/cultural_reasoning/few_shot/cn_eval.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.6 +Meta-Llama-3-8B,0.41904761904761906 +llama3-8b-cpt-sea-lionv2-base,0.4095238095238095 +Meta-Llama-3.1-8B,0.4857142857142857 diff --git a/results/cultural_reasoning/few_shot/ph_eval.csv b/results/cultural_reasoning/few_shot/ph_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..5cfd2852f5f2078a8d683fe50416c8b99458a0d5 100644 --- a/results/cultural_reasoning/few_shot/ph_eval.csv +++ b/results/cultural_reasoning/few_shot/ph_eval.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.68 +Meta-Llama-3-8B,0.54 +llama3-8b-cpt-sea-lionv2-base,0.52 +Meta-Llama-3.1-8B,0.51 diff --git a/results/cultural_reasoning/few_shot/sg_eval.csv b/results/cultural_reasoning/few_shot/sg_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..7afb73e2d7fa5abb9308b4d386f5a645cceff7ac 100644 --- a/results/cultural_reasoning/few_shot/sg_eval.csv +++ b/results/cultural_reasoning/few_shot/sg_eval.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7572815533980582 +Meta-Llama-3-8B,0.6407766990291263 +llama3-8b-cpt-sea-lionv2-base,0.6310679611650486 +Meta-Llama-3.1-8B,0.6116504854368932 diff --git a/results/cultural_reasoning/few_shot/us_eval.csv b/results/cultural_reasoning/few_shot/us_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..d4cc9d68b3613ad03c62b4c7c3da8a89213ee67e 100644 --- a/results/cultural_reasoning/few_shot/us_eval.csv +++ b/results/cultural_reasoning/few_shot/us_eval.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.8785046728971962 +Meta-Llama-3-8B,0.6915887850467289 +llama3-8b-cpt-sea-lionv2-base,0.719626168224299 +Meta-Llama-3.1-8B,0.6728971962616822 diff --git a/results/cultural_reasoning/zero_shot/cn_eval.csv b/results/cultural_reasoning/zero_shot/cn_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..f52e9bd5d26b98217f4e0a7ca19bfd61938bdaec 100644 --- a/results/cultural_reasoning/zero_shot/cn_eval.csv +++ b/results/cultural_reasoning/zero_shot/cn_eval.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.8095238095238095 +Meta-Llama-3.1-8B-Instruct,0.42857142857142855 +Qwen2-72B-Instruct,0.8571428571428571 +Meta-Llama-3-8B-Instruct,0.37142857142857144 +SeaLLMs-v3-7B-Chat,0.8095238095238095 +gemma-2-9b-it,0.6190476190476191 +Meta-Llama-3-70B-Instruct,0.5142857142857142 +gemma-2-2b-it,0.4095238095238095 +llama3-8b-cpt-sea-lionv2-instruct,0.47619047619047616 diff --git a/results/cultural_reasoning/zero_shot/ph_eval.csv b/results/cultural_reasoning/zero_shot/ph_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..f24785e4e5b37735fec0b1535e7d161aa2b03ee6 100644 --- a/results/cultural_reasoning/zero_shot/ph_eval.csv +++ b/results/cultural_reasoning/zero_shot/ph_eval.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.51 +Meta-Llama-3.1-8B-Instruct,0.56 +Qwen2-72B-Instruct,0.63 +Meta-Llama-3-8B-Instruct,0.54 +SeaLLMs-v3-7B-Chat,0.5 +gemma-2-9b-it,0.61 +Meta-Llama-3-70B-Instruct,0.63 +gemma-2-2b-it,0.39 +llama3-8b-cpt-sea-lionv2-instruct,0.53 diff --git a/results/cultural_reasoning/zero_shot/sg_eval.csv b/results/cultural_reasoning/zero_shot/sg_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..6e806123c644d095cc3b9c07edd561898516d081 100644 --- a/results/cultural_reasoning/zero_shot/sg_eval.csv +++ b/results/cultural_reasoning/zero_shot/sg_eval.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.6699029126213593 +Meta-Llama-3.1-8B-Instruct,0.6019417475728155 +Qwen2-72B-Instruct,0.7378640776699029 +Meta-Llama-3-8B-Instruct,0.5922330097087378 +SeaLLMs-v3-7B-Chat,0.6310679611650486 +gemma-2-9b-it,0.6893203883495146 +Meta-Llama-3-70B-Instruct,0.7184466019417476 +gemma-2-2b-it,0.5533980582524272 +llama3-8b-cpt-sea-lionv2-instruct,0.6019417475728155 diff --git a/results/cultural_reasoning/zero_shot/us_eval.csv b/results/cultural_reasoning/zero_shot/us_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..67468b19ba3472b7de20d2b8def5ea967bfc86fb 100644 --- a/results/cultural_reasoning/zero_shot/us_eval.csv +++ b/results/cultural_reasoning/zero_shot/us_eval.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.719626168224299 +Meta-Llama-3.1-8B-Instruct,0.6448598130841121 +Qwen2-72B-Instruct,0.8504672897196262 +Meta-Llama-3-8B-Instruct,0.6448598130841121 +SeaLLMs-v3-7B-Chat,0.7009345794392523 +gemma-2-9b-it,0.8317757009345794 +Meta-Llama-3-70B-Instruct,0.8691588785046729 +gemma-2-2b-it,0.7102803738317757 +llama3-8b-cpt-sea-lionv2-instruct,0.6542056074766355 diff --git a/results/dialogue/few_shot/dream.csv b/results/dialogue/few_shot/dream.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..ca8ea443b5ef324dd6489272c5826921d656490c 100644 --- a/results/dialogue/few_shot/dream.csv +++ b/results/dialogue/few_shot/dream.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.9510044096031357 +Meta-Llama-3-8B,0.8250857422831945 +llama3-8b-cpt-sea-lionv2-base,0.8515433610975012 +Meta-Llama-3.1-8B,0.8530132288094071 diff --git a/results/dialogue/zero_shot/dialogsum.csv b/results/dialogue/zero_shot/dialogsum.csv index 432ea7efdaa7a932511e6a0f0b394bfd4ea17963..35ab27303a8830ec46af028f8d7a6e623d21e6ce 100644 --- a/results/dialogue/zero_shot/dialogsum.csv +++ b/results/dialogue/zero_shot/dialogsum.csv @@ -1 +1,10 @@ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L +Qwen2-7B-Instruct,0.20907406151501814,0.3054588156947843,0.09317750879187732,0.22858586005839285 +Meta-Llama-3.1-8B-Instruct,0.25775524210830225,0.361264483769506,0.1319601664036931,0.28004107615170776 +Qwen2-72B-Instruct,0.21903635116217549,0.31670807543803475,0.10250931612356096,0.23789166192493072 +Meta-Llama-3-8B-Instruct,0.23748034560689027,0.33656243928704743,0.11826169056076426,0.2576169069728591 +SeaLLMs-v3-7B-Chat,0.24723061042117522,0.3515679169380843,0.12081049484108507,0.2693134194843562 +gemma-2-9b-it,0.2587338648607764,0.3658237880022337,0.12722373001686862,0.2831540765632268 +Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438 +gemma-2-2b-it,0.26123184071161726,0.3683777522574926,0.12793735218483035,0.28738041769252887 +llama3-8b-cpt-sea-lionv2-instruct,0.2531827068435159,0.35516222681696785,0.12864609875605545,0.2757397949575244 diff --git a/results/dialogue/zero_shot/dream.csv b/results/dialogue/zero_shot/dream.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..1ff97c7dc4a08538dae9881b7f40d5669a3bc578 100644 --- a/results/dialogue/zero_shot/dream.csv +++ b/results/dialogue/zero_shot/dream.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.9338559529642332 +Meta-Llama-3.1-8B-Instruct,0.8858402743753062 +Qwen2-72B-Instruct,0.9603135717785399 +Meta-Llama-3-8B-Instruct,0.5433610975012249 +SeaLLMs-v3-7B-Chat,0.9211170994610485 +gemma-2-9b-it,0.9397354238118569 +Meta-Llama-3-70B-Instruct,0.9480646741793238 +gemma-2-2b-it,0.8486036256736894 +llama3-8b-cpt-sea-lionv2-instruct,0.7555120039196472 diff --git a/results/dialogue/zero_shot/samsum.csv b/results/dialogue/zero_shot/samsum.csv index 432ea7efdaa7a932511e6a0f0b394bfd4ea17963..d6fb8398179cd4481edc04d62836406d803f2a07 100644 --- a/results/dialogue/zero_shot/samsum.csv +++ b/results/dialogue/zero_shot/samsum.csv @@ -1 +1,10 @@ Model,Average,ROUGE-1,ROUGE-2,ROUGE-L +Qwen2-7B-Instruct,0.2609036529701212,0.36802926348230236,0.1319027531874975,0.28277894224056366 +Meta-Llama-3.1-8B-Instruct,0.3002534894623792,0.41234119292969856,0.16596515741670248,0.3224541180407366 +Qwen2-72B-Instruct,0.27953180135225114,0.3883786925058577,0.15246657328712612,0.2977501382637696 +Meta-Llama-3-8B-Instruct,0.2850232460296334,0.3945214081577773,0.15619034353394273,0.3043579863971803 +SeaLLMs-v3-7B-Chat,0.2947730352305254,0.40661343212311085,0.16241730068430632,0.31528837288415906 +gemma-2-9b-it,0.30920311453647803,0.4269492679851157,0.16650133263007386,0.33415874299424464 +Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321 +gemma-2-2b-it,0.3067902178200617,0.4277497131478937,0.1609158209467132,0.3317051193655783 +llama3-8b-cpt-sea-lionv2-instruct,0.29924948830821335,0.40828658585731714,0.16733998585334992,0.32212189321397305 diff --git a/results/emotion/few_shot/ind_emotion.csv b/results/emotion/few_shot/ind_emotion.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..a516bf193e3207bcb628c3a97d08dabce2ee3c35 100644 --- a/results/emotion/few_shot/ind_emotion.csv +++ b/results/emotion/few_shot/ind_emotion.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7159090909090909 +Meta-Llama-3-8B,0.4636363636363636 +llama3-8b-cpt-sea-lionv2-base,0.525 +Meta-Llama-3.1-8B,0.5136363636363637 diff --git a/results/emotion/few_shot/sst2.csv b/results/emotion/few_shot/sst2.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..ed2101fb6b7ac2d95f752ed9125267212006d9ba 100644 --- a/results/emotion/few_shot/sst2.csv +++ b/results/emotion/few_shot/sst2.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.9002293577981652 +Meta-Llama-3-8B,0.6697247706422018 +llama3-8b-cpt-sea-lionv2-base,0.75 +Meta-Llama-3.1-8B,0.8405963302752294 diff --git a/results/emotion/zero_shot/ind_emotion.csv b/results/emotion/zero_shot/ind_emotion.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..5a1d171fb5850c541ff3c79fb98edd1a12c1738f 100644 --- a/results/emotion/zero_shot/ind_emotion.csv +++ b/results/emotion/zero_shot/ind_emotion.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.6386363636363637 +Meta-Llama-3.1-8B-Instruct,0.6295454545454545 +Qwen2-72B-Instruct,0.675 +Meta-Llama-3-8B-Instruct,0.6522727272727272 +SeaLLMs-v3-7B-Chat,0.34545454545454546 +gemma-2-9b-it,0.7431818181818182 +Meta-Llama-3-70B-Instruct,0.6909090909090909 +gemma-2-2b-it,0.625 +llama3-8b-cpt-sea-lionv2-instruct,0.6272727272727273 diff --git a/results/emotion/zero_shot/sst2.csv b/results/emotion/zero_shot/sst2.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..3d61c8739d18c10f0f6557c4e0bd1f57077363fd 100644 --- a/results/emotion/zero_shot/sst2.csv +++ b/results/emotion/zero_shot/sst2.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.9231651376146789 +Meta-Llama-3.1-8B-Instruct,0.8784403669724771 +Qwen2-72B-Instruct,0.9369266055045872 +Meta-Llama-3-8B-Instruct,0.8669724770642202 +SeaLLMs-v3-7B-Chat,0.9346330275229358 +gemma-2-9b-it,0.9311926605504587 +Meta-Llama-3-70B-Instruct,0.9495412844036697 +gemma-2-2b-it,0.9208715596330275 +llama3-8b-cpt-sea-lionv2-instruct,0.9162844036697247 diff --git a/results/flores_translation/few_shot/ind2eng.csv b/results/flores_translation/few_shot/ind2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..2829b14d37b6c9e9afdc5c23dfbb1f6b8b49f69b 100644 --- a/results/flores_translation/few_shot/ind2eng.csv +++ b/results/flores_translation/few_shot/ind2eng.csv @@ -1 +1,5 @@ Model,BLEU +Meta-Llama-3-70B,0.4224655367668861 +Meta-Llama-3-8B,0.37760317005449096 +llama3-8b-cpt-sea-lionv2-base,0.37662180389435995 +Meta-Llama-3.1-8B,0.384092499597103 diff --git a/results/flores_translation/few_shot/vie2eng.csv b/results/flores_translation/few_shot/vie2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..66919bd957daa9cf255aa8b1d75121c00d5fe2f7 100644 --- a/results/flores_translation/few_shot/vie2eng.csv +++ b/results/flores_translation/few_shot/vie2eng.csv @@ -1 +1,5 @@ Model,BLEU +Meta-Llama-3-70B,0.3564689224836266 +Meta-Llama-3-8B,0.31157996445764863 +llama3-8b-cpt-sea-lionv2-base,0.30608365217733097 +Meta-Llama-3.1-8B,0.320367356810332 diff --git a/results/flores_translation/few_shot/zho2eng.csv b/results/flores_translation/few_shot/zho2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..d6442442355f718994e069117909785478c8bb1f 100644 --- a/results/flores_translation/few_shot/zho2eng.csv +++ b/results/flores_translation/few_shot/zho2eng.csv @@ -1 +1,5 @@ Model,BLEU +Meta-Llama-3-70B,0.27798501796196434 +Meta-Llama-3-8B,0.23710858530408072 +llama3-8b-cpt-sea-lionv2-base,0.22831898923969038 +Meta-Llama-3.1-8B,0.23777256698409086 diff --git a/results/flores_translation/few_shot/zsm2eng.csv b/results/flores_translation/few_shot/zsm2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..ce920350b7384931884df3b1255ee0716e0c5dbb 100644 --- a/results/flores_translation/few_shot/zsm2eng.csv +++ b/results/flores_translation/few_shot/zsm2eng.csv @@ -1 +1,5 @@ Model,BLEU +Meta-Llama-3-70B,0.44357168236218214 +Meta-Llama-3-8B,0.3908770132718593 +llama3-8b-cpt-sea-lionv2-base,0.37668373435658764 +Meta-Llama-3.1-8B,0.3893813156403672 diff --git a/results/flores_translation/zero_shot/ind2eng.csv b/results/flores_translation/zero_shot/ind2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..8730164d110b4eeaa8f7a0312f20bb51ae712c7e 100644 --- a/results/flores_translation/zero_shot/ind2eng.csv +++ b/results/flores_translation/zero_shot/ind2eng.csv @@ -1 +1,10 @@ Model,BLEU +Qwen2-7B-Instruct,0.2968667083646938 +Meta-Llama-3.1-8B-Instruct,0.3851478947359834 +Qwen2-72B-Instruct,0.40378146176265345 +Meta-Llama-3-8B-Instruct,0.33011728860318257 +SeaLLMs-v3-7B-Chat,0.3642282499148727 +gemma-2-9b-it,0.4115273387213549 +Meta-Llama-3-70B-Instruct,0.3830092775167675 +gemma-2-2b-it,0.3496340692126605 +llama3-8b-cpt-sea-lionv2-instruct,0.39322992478935465 diff --git a/results/flores_translation/zero_shot/vie2eng.csv b/results/flores_translation/zero_shot/vie2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..fe7d73c746401e06f6ea73398c4d90df46ff506d 100644 --- a/results/flores_translation/zero_shot/vie2eng.csv +++ b/results/flores_translation/zero_shot/vie2eng.csv @@ -1 +1,10 @@ Model,BLEU +Qwen2-7B-Instruct,0.23571859325121644 +Meta-Llama-3.1-8B-Instruct,0.3229889780558947 +Qwen2-72B-Instruct,0.3326034551014482 +Meta-Llama-3-8B-Instruct,0.2637063711923046 +SeaLLMs-v3-7B-Chat,0.3073965938987496 +gemma-2-9b-it,0.33638205957057027 +Meta-Llama-3-70B-Instruct,0.3230140263371192 +gemma-2-2b-it,0.2717960864611513 +llama3-8b-cpt-sea-lionv2-instruct,0.33210048239854756 diff --git a/results/flores_translation/zero_shot/zho2eng.csv b/results/flores_translation/zero_shot/zho2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..a15ed0ec60b11f6bfc84c249ed68fc1db7933ef7 100644 --- a/results/flores_translation/zero_shot/zho2eng.csv +++ b/results/flores_translation/zero_shot/zho2eng.csv @@ -1 +1,10 @@ Model,BLEU +Qwen2-7B-Instruct,0.21747115262398484 +Meta-Llama-3.1-8B-Instruct,0.24469097639356438 +Qwen2-72B-Instruct,0.24317967002278634 +Meta-Llama-3-8B-Instruct,0.19960072119079214 +SeaLLMs-v3-7B-Chat,0.25023469014968713 +gemma-2-9b-it,0.26747029920541504 +Meta-Llama-3-70B-Instruct,0.24397819518058994 +gemma-2-2b-it,0.21203164253450932 +llama3-8b-cpt-sea-lionv2-instruct,0.24572934810342245 diff --git a/results/flores_translation/zero_shot/zsm2eng.csv b/results/flores_translation/zero_shot/zsm2eng.csv index c92cc58bffcf5c69f0b19b68264727e785b847ae..6899526e35c1895ba7a6d88342433ab1b623882d 100644 --- a/results/flores_translation/zero_shot/zsm2eng.csv +++ b/results/flores_translation/zero_shot/zsm2eng.csv @@ -1 +1,10 @@ Model,BLEU +Qwen2-7B-Instruct,0.27198336767927184 +Meta-Llama-3.1-8B-Instruct,0.3833985449157327 +Qwen2-72B-Instruct,0.40613262295280417 +Meta-Llama-3-8B-Instruct,0.31536374302282033 +SeaLLMs-v3-7B-Chat,0.3535493169696862 +gemma-2-9b-it,0.4248122066845582 +Meta-Llama-3-70B-Instruct,0.3957287030176054 +gemma-2-2b-it,0.33384917509056944 +llama3-8b-cpt-sea-lionv2-instruct,0.391912232406389 diff --git a/results/fundamental_nlp_tasks/few_shot/c3.csv b/results/fundamental_nlp_tasks/few_shot/c3.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..38493301cb7e68236bb3de9841ab68bae7dd4070 100644 --- a/results/fundamental_nlp_tasks/few_shot/c3.csv +++ b/results/fundamental_nlp_tasks/few_shot/c3.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.9390426327599103 +Meta-Llama-3-8B,0.7703814510097232 +llama3-8b-cpt-sea-lionv2-base,0.7913238593866866 +Meta-Llama-3.1-8B,0.8208676140613314 diff --git a/results/fundamental_nlp_tasks/few_shot/cola.csv b/results/fundamental_nlp_tasks/few_shot/cola.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..00f5e8542eb823ed227b36ee556e370be93a8403 100644 --- a/results/fundamental_nlp_tasks/few_shot/cola.csv +++ b/results/fundamental_nlp_tasks/few_shot/cola.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7171620325982742 +Meta-Llama-3-8B,0.6596356663470757 +llama3-8b-cpt-sea-lionv2-base,0.6021093000958773 +Meta-Llama-3.1-8B,0.6222435282837967 diff --git a/results/fundamental_nlp_tasks/few_shot/mnli.csv b/results/fundamental_nlp_tasks/few_shot/mnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..68e73f5a56b76e3396eacac230adcae81941b1a1 100644 --- a/results/fundamental_nlp_tasks/few_shot/mnli.csv +++ b/results/fundamental_nlp_tasks/few_shot/mnli.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7505 +Meta-Llama-3-8B,0.46174988547869905 +llama3-8b-cpt-sea-lionv2-base,0.472 +Meta-Llama-3.1-8B,0.48506133251895966 diff --git a/results/fundamental_nlp_tasks/few_shot/mrpc.csv b/results/fundamental_nlp_tasks/few_shot/mrpc.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..baaa2c4bd10480028fc45d8fb3c7f5523a71669c 100644 --- a/results/fundamental_nlp_tasks/few_shot/mrpc.csv +++ b/results/fundamental_nlp_tasks/few_shot/mrpc.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.6764705882352942 +Meta-Llama-3-8B,0.5906862745098039 +llama3-8b-cpt-sea-lionv2-base,0.6078431372549019 +Meta-Llama-3.1-8B,0.5661764705882353 diff --git a/results/fundamental_nlp_tasks/few_shot/ocnli.csv b/results/fundamental_nlp_tasks/few_shot/ocnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..9a93b8aefa1041268d828aad3f9b3bfc6dd48289 100644 --- a/results/fundamental_nlp_tasks/few_shot/ocnli.csv +++ b/results/fundamental_nlp_tasks/few_shot/ocnli.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.6840677966101695 +Meta-Llama-3-8B,0.3935593220338983 +llama3-8b-cpt-sea-lionv2-base,0.3840677966101695 +Meta-Llama-3.1-8B,0.411864406779661 diff --git a/results/fundamental_nlp_tasks/few_shot/qnli.csv b/results/fundamental_nlp_tasks/few_shot/qnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..7b083801e8a6c607e5d9b304dbdee71b5dc75e43 100644 --- a/results/fundamental_nlp_tasks/few_shot/qnli.csv +++ b/results/fundamental_nlp_tasks/few_shot/qnli.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.572 +Meta-Llama-3-8B,0.5059491122094087 +llama3-8b-cpt-sea-lionv2-base,0.49716273110012815 +Meta-Llama-3.1-8B,0.5081457074867289 diff --git a/results/fundamental_nlp_tasks/few_shot/qqp.csv b/results/fundamental_nlp_tasks/few_shot/qqp.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..39970ced8743d97ba4b0766cab39349656b86a9f 100644 --- a/results/fundamental_nlp_tasks/few_shot/qqp.csv +++ b/results/fundamental_nlp_tasks/few_shot/qqp.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7215 +Meta-Llama-3-8B,0.551 +llama3-8b-cpt-sea-lionv2-base,0.519 +Meta-Llama-3.1-8B,0.5565 diff --git a/results/fundamental_nlp_tasks/few_shot/rte.csv b/results/fundamental_nlp_tasks/few_shot/rte.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..1ddb286762e7636c710a97efcafa927eaf29fbec 100644 --- a/results/fundamental_nlp_tasks/few_shot/rte.csv +++ b/results/fundamental_nlp_tasks/few_shot/rte.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.776173285198556 +Meta-Llama-3-8B,0.5487364620938628 +llama3-8b-cpt-sea-lionv2-base,0.6462093862815884 +Meta-Llama-3.1-8B,0.6137184115523465 diff --git a/results/fundamental_nlp_tasks/few_shot/wnli.csv b/results/fundamental_nlp_tasks/few_shot/wnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..f5b37890cf4e1315837daf80baa3930e0ab7912d 100644 --- a/results/fundamental_nlp_tasks/few_shot/wnli.csv +++ b/results/fundamental_nlp_tasks/few_shot/wnli.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.8169014084507042 +Meta-Llama-3-8B,0.4647887323943662 +llama3-8b-cpt-sea-lionv2-base,0.5915492957746479 +Meta-Llama-3.1-8B,0.5211267605633803 diff --git a/results/fundamental_nlp_tasks/zero_shot/c3.csv b/results/fundamental_nlp_tasks/zero_shot/c3.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..991d1687eed76b506cec18f559a28f6ff189066c 100644 --- a/results/fundamental_nlp_tasks/zero_shot/c3.csv +++ b/results/fundamental_nlp_tasks/zero_shot/c3.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.9233358264771877 +Meta-Llama-3.1-8B-Instruct,0.7984293193717278 +Qwen2-72B-Instruct,0.9599850411368736 +Meta-Llama-3-8B-Instruct,0.8515332834704562 +SeaLLMs-v3-7B-Chat,0.912490650710546 +gemma-2-9b-it,0.9210919970082274 +Meta-Llama-3-70B-Instruct,0.9521316379955124 +gemma-2-2b-it,0.7703814510097232 +llama3-8b-cpt-sea-lionv2-instruct,0.675392670157068 diff --git a/results/fundamental_nlp_tasks/zero_shot/cola.csv b/results/fundamental_nlp_tasks/zero_shot/cola.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..ca3b4f8921443a79027ebcd680ad8022277661fb 100644 --- a/results/fundamental_nlp_tasks/zero_shot/cola.csv +++ b/results/fundamental_nlp_tasks/zero_shot/cola.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7861936720997124 +Meta-Llama-3.1-8B-Instruct,0.7046979865771812 +Qwen2-72B-Instruct,0.8360498561840843 +Meta-Llama-3-8B-Instruct,0.6481303930968361 +SeaLLMs-v3-7B-Chat,0.7890699904122723 +gemma-2-9b-it,0.7967401725790988 +Meta-Llama-3-70B-Instruct,0.835091083413231 +gemma-2-2b-it,0.6711409395973155 +llama3-8b-cpt-sea-lionv2-instruct,0.5915627996164909 diff --git a/results/fundamental_nlp_tasks/zero_shot/mnli.csv b/results/fundamental_nlp_tasks/zero_shot/mnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..04b52a3d2f53dc4d39c16aa41c0f36f692d289ee 100644 --- a/results/fundamental_nlp_tasks/zero_shot/mnli.csv +++ b/results/fundamental_nlp_tasks/zero_shot/mnli.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7341578867002596 +Meta-Llama-3.1-8B-Instruct,0.4603756298671553 +Qwen2-72B-Instruct,0.7979844251030692 +Meta-Llama-3-8B-Instruct,0.5296991907161399 +SeaLLMs-v3-7B-Chat,0.638 +gemma-2-9b-it,0.707 +Meta-Llama-3-70B-Instruct,0.6709421285692472 +gemma-2-2b-it,0.612 +llama3-8b-cpt-sea-lionv2-instruct,0.5276123581208327 diff --git a/results/fundamental_nlp_tasks/zero_shot/mrpc.csv b/results/fundamental_nlp_tasks/zero_shot/mrpc.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..88020a4a32029218053c59afb6c5eeedf203f86a 100644 --- a/results/fundamental_nlp_tasks/zero_shot/mrpc.csv +++ b/results/fundamental_nlp_tasks/zero_shot/mrpc.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7745098039215687 +Meta-Llama-3.1-8B-Instruct,0.6740196078431373 +Qwen2-72B-Instruct,0.7941176470588235 +Meta-Llama-3-8B-Instruct,0.6764705882352942 +SeaLLMs-v3-7B-Chat,0.7475490196078431 +gemma-2-9b-it,0.7450980392156863 +Meta-Llama-3-70B-Instruct,0.7598039215686274 +gemma-2-2b-it,0.7132352941176471 +llama3-8b-cpt-sea-lionv2-instruct,0.49264705882352944 diff --git a/results/fundamental_nlp_tasks/zero_shot/ocnli.csv b/results/fundamental_nlp_tasks/zero_shot/ocnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..ba340dca11bb458fbba860921508c29f6da8e9b1 100644 --- a/results/fundamental_nlp_tasks/zero_shot/ocnli.csv +++ b/results/fundamental_nlp_tasks/zero_shot/ocnli.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.6474576271186441 +Meta-Llama-3.1-8B-Instruct,0.42135593220338985 +Qwen2-72B-Instruct,0.7874576271186441 +Meta-Llama-3-8B-Instruct,0.4322033898305085 +SeaLLMs-v3-7B-Chat,0.5613559322033899 +gemma-2-9b-it,0.6183050847457627 +Meta-Llama-3-70B-Instruct,0.5928813559322034 +gemma-2-2b-it,0.4335593220338983 +llama3-8b-cpt-sea-lionv2-instruct,0.4135593220338983 diff --git a/results/fundamental_nlp_tasks/zero_shot/qnli.csv b/results/fundamental_nlp_tasks/zero_shot/qnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..46d82540e7622e63677752a7eceedfc5306712ca 100644 --- a/results/fundamental_nlp_tasks/zero_shot/qnli.csv +++ b/results/fundamental_nlp_tasks/zero_shot/qnli.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.8169503935566539 +Meta-Llama-3.1-8B-Instruct,0.6027823540179389 +Qwen2-72B-Instruct,0.8894380377082189 +Meta-Llama-3-8B-Instruct,0.5689181768259198 +SeaLLMs-v3-7B-Chat,0.7181036060772469 +gemma-2-9b-it,0.90481420464946 +Meta-Llama-3-70B-Instruct,0.876807614863628 +gemma-2-2b-it,0.779974373055098 +llama3-8b-cpt-sea-lionv2-instruct,0.5652571846970529 diff --git a/results/fundamental_nlp_tasks/zero_shot/qqp.csv b/results/fundamental_nlp_tasks/zero_shot/qqp.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..83c5398020db45960701813ee083a9a8818dd554 100644 --- a/results/fundamental_nlp_tasks/zero_shot/qqp.csv +++ b/results/fundamental_nlp_tasks/zero_shot/qqp.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7771209497897601 +Meta-Llama-3.1-8B-Instruct,0.5058125154588177 +Qwen2-72B-Instruct,0.7992332426416028 +Meta-Llama-3-8B-Instruct,0.5512490724709375 +SeaLLMs-v3-7B-Chat,0.757 +gemma-2-9b-it,0.761 +Meta-Llama-3-70B-Instruct,0.7876082117239673 +gemma-2-2b-it,0.771 +llama3-8b-cpt-sea-lionv2-instruct,0.585 diff --git a/results/fundamental_nlp_tasks/zero_shot/rte.csv b/results/fundamental_nlp_tasks/zero_shot/rte.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..649af4ed46eacb110390901d10d670d57dc917f1 100644 --- a/results/fundamental_nlp_tasks/zero_shot/rte.csv +++ b/results/fundamental_nlp_tasks/zero_shot/rte.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.8411552346570397 +Meta-Llama-3.1-8B-Instruct,0.6895306859205776 +Qwen2-72B-Instruct,0.8592057761732852 +Meta-Llama-3-8B-Instruct,0.6028880866425993 +SeaLLMs-v3-7B-Chat,0.7870036101083032 +gemma-2-9b-it,0.7472924187725631 +Meta-Llama-3-70B-Instruct,0.8086642599277978 +gemma-2-2b-it,0.7003610108303249 +llama3-8b-cpt-sea-lionv2-instruct,0.6209386281588448 diff --git a/results/fundamental_nlp_tasks/zero_shot/wnli.csv b/results/fundamental_nlp_tasks/zero_shot/wnli.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..8e0b89e5c021198f9b133b4c87f6cb0678bcdfa9 100644 --- a/results/fundamental_nlp_tasks/zero_shot/wnli.csv +++ b/results/fundamental_nlp_tasks/zero_shot/wnli.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.647887323943662 +Meta-Llama-3.1-8B-Instruct,0.4507042253521127 +Qwen2-72B-Instruct,0.9014084507042254 +Meta-Llama-3-8B-Instruct,0.4507042253521127 +SeaLLMs-v3-7B-Chat,0.6619718309859155 +gemma-2-9b-it,0.7464788732394366 +Meta-Llama-3-70B-Instruct,0.7887323943661971 +gemma-2-2b-it,0.43661971830985913 +llama3-8b-cpt-sea-lionv2-instruct,0.4788732394366197 diff --git a/results/general_reasoning/few_shot/c_eval.csv b/results/general_reasoning/few_shot/c_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..26806ff8e15bb7ce4a642ff247caac97b7a3cadf 100644 --- a/results/general_reasoning/few_shot/c_eval.csv +++ b/results/general_reasoning/few_shot/c_eval.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.6183063511830635 +Meta-Llama-3-8B,0.43773349937733497 +llama3-8b-cpt-sea-lionv2-base,0.42092154420921546 +Meta-Llama-3.1-8B,0.44458281444582815 diff --git a/results/general_reasoning/few_shot/cmmlu.csv b/results/general_reasoning/few_shot/cmmlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..1ae9aa2f5730321f5b477c9c05eb9a6811af4693 100644 --- a/results/general_reasoning/few_shot/cmmlu.csv +++ b/results/general_reasoning/few_shot/cmmlu.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.652650664824728 +Meta-Llama-3-8B,0.4308409601105163 +llama3-8b-cpt-sea-lionv2-base,0.4389570022448627 +Meta-Llama-3.1-8B,0.4556207908824037 diff --git a/results/general_reasoning/few_shot/indommlu.csv b/results/general_reasoning/few_shot/indommlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..a5fd152703c28585c0d717aff80eac1b024ec105 100644 --- a/results/general_reasoning/few_shot/indommlu.csv +++ b/results/general_reasoning/few_shot/indommlu.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.6355564456906335 +Meta-Llama-3-8B,0.4500300420588824 +llama3-8b-cpt-sea-lionv2-base,0.5077775552440082 +Meta-Llama-3.1-8B,0.4644502303224514 diff --git a/results/general_reasoning/few_shot/mmlu.csv b/results/general_reasoning/few_shot/mmlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..ada19c754955a7ed08e6bea28f5a2ea0bfcd9bd0 100644 --- a/results/general_reasoning/few_shot/mmlu.csv +++ b/results/general_reasoning/few_shot/mmlu.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.7509474436896675 +Meta-Llama-3-8B,0.5651054701465856 +llama3-8b-cpt-sea-lionv2-base,0.5598140865212728 +Meta-Llama-3.1-8B,0.5749731855559528 diff --git a/results/general_reasoning/few_shot/zbench.csv b/results/general_reasoning/few_shot/zbench.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..cf5c92c8c272f0be2588cfe95e9f01a6a45368a3 100644 --- a/results/general_reasoning/few_shot/zbench.csv +++ b/results/general_reasoning/few_shot/zbench.csv @@ -1 +1,5 @@ Model,Accuracy +Meta-Llama-3-70B,0.5151515151515151 +Meta-Llama-3-8B,0.2727272727272727 +llama3-8b-cpt-sea-lionv2-base,0.3333333333333333 +Meta-Llama-3.1-8B,0.3939393939393939 diff --git a/results/general_reasoning/zero_shot/c_eval.csv b/results/general_reasoning/zero_shot/c_eval.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..b55656fc88ac00a75b4d820f0e0cc242c571beb7 100644 --- a/results/general_reasoning/zero_shot/c_eval.csv +++ b/results/general_reasoning/zero_shot/c_eval.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7546699875466999 +Meta-Llama-3.1-8B-Instruct,0.3493150684931507 +Qwen2-72B-Instruct,0.823785803237858 +Meta-Llama-3-8B-Instruct,0.4533001245330012 +SeaLLMs-v3-7B-Chat,0.7440846824408468 +gemma-2-9b-it,0.547945205479452 +Meta-Llama-3-70B-Instruct,0.6046077210460772 +gemma-2-2b-it,0.4153175591531756 +llama3-8b-cpt-sea-lionv2-instruct,0.398505603985056 diff --git a/results/general_reasoning/zero_shot/cmmlu.csv b/results/general_reasoning/zero_shot/cmmlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..83334baa2e2c4f60320bdddcaa0f4a1035cf57a7 100644 --- a/results/general_reasoning/zero_shot/cmmlu.csv +++ b/results/general_reasoning/zero_shot/cmmlu.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.7656708685891901 +Meta-Llama-3.1-8B-Instruct,0.38240372992574684 +Qwen2-72B-Instruct,0.8240372992574685 +Meta-Llama-3-8B-Instruct,0.4679675358314626 +SeaLLMs-v3-7B-Chat,0.7718010706268348 +gemma-2-9b-it,0.5721809704714211 +Meta-Llama-3-70B-Instruct,0.6195821101709549 +gemma-2-2b-it,0.4336902089449145 +llama3-8b-cpt-sea-lionv2-instruct,0.4105508547746503 diff --git a/results/general_reasoning/zero_shot/indommlu.csv b/results/general_reasoning/zero_shot/indommlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..b38fcd5ddef99da80babd5dbf3970982f79b2397 100644 --- a/results/general_reasoning/zero_shot/indommlu.csv +++ b/results/general_reasoning/zero_shot/indommlu.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.53027571934041 +Meta-Llama-3.1-8B-Instruct,0.4701916015755391 +Qwen2-72B-Instruct,0.6356232058214835 +Meta-Llama-3-8B-Instruct,0.5115161225716003 +SeaLLMs-v3-7B-Chat,0.42826623940182923 +gemma-2-9b-it,0.5599839775685961 +Meta-Llama-3-70B-Instruct,0.6323519594098405 +gemma-2-2b-it,0.43447493157086586 +llama3-8b-cpt-sea-lionv2-instruct,0.4962280526069831 diff --git a/results/general_reasoning/zero_shot/mmlu.csv b/results/general_reasoning/zero_shot/mmlu.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..318313795debe87f13ce99a158586420f8f1ce65 100644 --- a/results/general_reasoning/zero_shot/mmlu.csv +++ b/results/general_reasoning/zero_shot/mmlu.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.6654272434751519 +Meta-Llama-3.1-8B-Instruct,0.5518770110833036 +Qwen2-72B-Instruct,0.7935645334286736 +Meta-Llama-3-8B-Instruct,0.508044333214158 +SeaLLMs-v3-7B-Chat,0.6637826242402575 +gemma-2-9b-it,0.7107615302109402 +Meta-Llama-3-70B-Instruct,0.7607436539149088 +gemma-2-2b-it,0.5676081515909903 +llama3-8b-cpt-sea-lionv2-instruct,0.5619592420450482 diff --git a/results/general_reasoning/zero_shot/zbench.csv b/results/general_reasoning/zero_shot/zbench.csv index 1dd9db3171b0ad2e4edc84dcebb54f8d76c675ab..2195daac5055b173ebf1ab96e97cb8b8895c3158 100644 --- a/results/general_reasoning/zero_shot/zbench.csv +++ b/results/general_reasoning/zero_shot/zbench.csv @@ -1 +1,10 @@ Model,Accuracy +Qwen2-7B-Instruct,0.696969696969697 +Meta-Llama-3.1-8B-Instruct,0.45454545454545453 +Qwen2-72B-Instruct,0.5757575757575758 +Meta-Llama-3-8B-Instruct,0.30303030303030304 +SeaLLMs-v3-7B-Chat,0.5151515151515151 +gemma-2-9b-it,0.48484848484848486 +Meta-Llama-3-70B-Instruct,0.45454545454545453 +gemma-2-2b-it,0.21212121212121213 +llama3-8b-cpt-sea-lionv2-instruct,0.09090909090909091