davidkim205 commited on
Commit
e54658b
1 Parent(s): 1b0c644

add c4ai-command-r-plus Qwen2-57B-A14B-Instruct

Browse files
Files changed (1) hide show
  1. ko_bench.csv +8 -0
ko_bench.csv CHANGED
@@ -10,11 +10,13 @@ gpt-4o,1,google__gemma-2-27b-it,8.3,6.8,9.4,9.5,7.9,5.4,9.0,9.0,9.2
10
  gpt-4o,1,google__gemini-1.5-pro,8.2,5.5,9.7,8.7,7.5,6.5,9.1,9.4,9.2
11
  gpt-4o,1,davidkim205__ko-gemma-2-9b-it,7.8,6.6,9.0,8.4,6.7,6.2,8.1,8.9,8.7
12
  gpt-4o,1,google__gemma-2-9b-it,7.7,6.2,9.3,8.8,5.4,5.4,8.8,8.8,8.7
 
13
  gpt-4o,1,alpindale__WizardLM-2-8x22B,7.4,6.8,6.8,7.8,8.7,4.8,7.2,8.4,8.7
14
  gpt-4o,1,openai__gpt-3.5-turbo-0125,6.7,5.2,9.0,7.7,6.4,3.3,7.2,6.5,8.6
15
  gpt-4o,1,meta-llama__Meta-Llama-3.1-70B-Instruct,6.6,6.4,8.7,8.0,4.5,4.0,7.9,7.4,5.9
16
  gpt-4o,1,Qwen__Qwen2-7B-Instruct,6.5,3.9,9.0,8.0,5.6,3.6,7.0,6.6,8.2
17
  gpt-4o,1,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,6.2,4.9,7.4,7.1,7.3,5.1,6.4,4.1,7.6
 
18
  gpt-4o,1,Qwen__Qwen1.5-32B-Chat,6.1,4.0,8.6,8.5,4.7,2.6,6.3,7.5,6.7
19
  gpt-4o,1,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,5.8,3.5,5.0,8.5,5.4,3.2,5.4,7.5,7.6
20
  gpt-4o,1,davidkim205__Ko-Llama-3-8B-Instruct,5.7,4.6,7.0,7.7,2.8,2.5,6.2,6.9,7.6
@@ -36,10 +38,12 @@ gpt-4o,2,alpindale__WizardLM-2-8x22B,6.4,6.0,8.2,7.2,6.1,4.1,7.0,6.8,5.5
36
  gpt-4o,2,google__gemma-2-9b-it,6.2,4.8,7.6,8.3,4.9,3.9,7.0,7.4,6.1
37
  gpt-4o,2,Qwen__Qwen1.5-32B-Chat,5.8,4.3,8.2,7.6,3.8,3.0,6.8,5.9,6.9
38
  gpt-4o,2,meta-llama__Meta-Llama-3.1-70B-Instruct,5.7,5.5,8.0,7.4,3.6,2.9,6.6,5.7,5.7
 
39
  gpt-4o,2,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,5.6,5.8,6.2,5.5,7.0,4.0,5.7,4.3,6.5
40
  gpt-4o,2,openai__gpt-3.5-turbo-0125,5.4,5.8,5.7,7.2,4.4,3.0,6.6,4.4,6.4
41
  gpt-4o,2,Qwen__Qwen2-7B-Instruct,5.3,5.0,7.0,6.6,5.1,2.7,5.6,4.8,5.9
42
  gpt-4o,2,Qwen__Qwen1.5-14B-Chat,4.9,3.5,5.1,7.4,4.1,2.7,5.9,5.0,5.9
 
43
  gpt-4o,2,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,4.5,3.3,3.8,7.6,4.9,2.1,5.6,5.7,3.3
44
  gpt-4o,2,mistralai__Mistral-7B-Instruct-v0.2,4.5,3.9,4.4,6.8,2.2,2.4,6.2,5.6,4.6
45
  gpt-4o,2,davidkim205__Ko-Llama-3-8B-Instruct,4.0,3.7,4.3,6.4,2.8,2.3,4.9,4.0,4.1
@@ -56,6 +60,7 @@ keval,1,google__gemma-2-27b-it,8.1,5.9,9.3,9.4,7.4,5.7,8.9,9.0,9.0
56
  keval,1,Qwen__Qwen2-72B-Instruct,8.0,5.0,9.2,8.8,8.6,6.9,7.7,9.1,9.0
57
  keval,1,davidkim205__ko-gemma-2-9b-it,7.8,5.9,9.4,8.5,6.0,6.3,8.2,9.0,8.9
58
  keval,1,google__gemma-2-9b-it,7.6,6.7,8.8,8.5,5.2,5.5,9.0,8.6,8.5
 
59
  keval,1,meta-llama__Meta-Llama-3.1-70B-Instruct,7.3,6.8,9.0,8.3,5.9,5.1,8.4,8.0,7.1
60
  keval,1,Qwen__Qwen1.5-14B-Chat,7.2,4.7,9.7,8.8,4.5,4.8,8.1,8.9,8.4
61
  keval,1,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,7.2,5.7,8.8,8.1,8.2,6.0,7.7,5.6,7.3
@@ -63,6 +68,7 @@ keval,1,alpindale__WizardLM-2-8x22B,7.1,6.1,5.6,7.9,8.8,5.9,6.5,8.7,7.1
63
  keval,1,Qwen__Qwen1.5-32B-Chat,7.0,3.9,9.9,8.9,5.8,3.6,7.1,8.6,7.9
64
  keval,1,openai__gpt-3.5-turbo-0125,6.9,5.6,8.9,7.7,6.4,3.2,7.4,7.5,8.6
65
  keval,1,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,6.8,3.4,8.6,8.5,5.5,4.1,6.9,8.8,8.4
 
66
  keval,1,Qwen__Qwen2-7B-Instruct,6.4,3.6,9.0,7.7,5.5,3.5,7.1,6.7,8.4
67
  keval,1,meta-llama__Meta-Llama-3.1-8B-Instruct,6.3,4.3,8.9,7.7,5.3,3.3,7.3,6.0,7.5
68
  keval,1,davidkim205__Ko-Llama-3-8B-Instruct,6.0,5.0,7.4,7.6,2.9,2.9,7.0,8.0,7.6
@@ -84,8 +90,10 @@ keval,2,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,6.2,5.9,7.0,6.4,6.7,4.3,7.6,4.2,7.
84
  keval,2,Qwen__Qwen1.5-32B-Chat,6.2,5.2,7.7,8.0,4.1,4.0,7.7,6.7,6.5
85
  keval,2,Qwen__Qwen1.5-14B-Chat,6.0,4.7,6.9,7.9,4.8,3.8,7.2,6.3,6.7
86
  keval,2,meta-llama__Meta-Llama-3.1-70B-Instruct,6.0,6.0,7.3,7.6,5.6,2.9,7.0,6.2,5.6
 
87
  keval,2,Qwen__Qwen2-7B-Instruct,5.6,4.9,7.0,6.5,5.1,3.1,6.3,5.0,6.5
88
  keval,2,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,5.5,4.6,4.9,6.7,5.9,3.2,6.9,6.8,5.2
 
89
  keval,2,openai__gpt-3.5-turbo-0125,5.3,6.2,5.5,7.0,4.5,3.3,6.2,4.5,5.4
90
  keval,2,meta-llama__Meta-Llama-3.1-8B-Instruct,4.8,5.0,6.0,5.5,4.4,2.6,5.9,5.0,4.4
91
  keval,2,davidkim205__Ko-Llama-3-8B-Instruct,4.2,3.6,4.6,6.3,2.8,2.2,6.1,3.7,4.3
 
10
  gpt-4o,1,google__gemini-1.5-pro,8.2,5.5,9.7,8.7,7.5,6.5,9.1,9.4,9.2
11
  gpt-4o,1,davidkim205__ko-gemma-2-9b-it,7.8,6.6,9.0,8.4,6.7,6.2,8.1,8.9,8.7
12
  gpt-4o,1,google__gemma-2-9b-it,7.7,6.2,9.3,8.8,5.4,5.4,8.8,8.8,8.7
13
+ gpt-4o,1,Qwen__Qwen2-57B-A14B-Instruct,7.4,7.0,9.3,8.2,6.2,6.0,7.8,6.6,8.1
14
  gpt-4o,1,alpindale__WizardLM-2-8x22B,7.4,6.8,6.8,7.8,8.7,4.8,7.2,8.4,8.7
15
  gpt-4o,1,openai__gpt-3.5-turbo-0125,6.7,5.2,9.0,7.7,6.4,3.3,7.2,6.5,8.6
16
  gpt-4o,1,meta-llama__Meta-Llama-3.1-70B-Instruct,6.6,6.4,8.7,8.0,4.5,4.0,7.9,7.4,5.9
17
  gpt-4o,1,Qwen__Qwen2-7B-Instruct,6.5,3.9,9.0,8.0,5.6,3.6,7.0,6.6,8.2
18
  gpt-4o,1,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,6.2,4.9,7.4,7.1,7.3,5.1,6.4,4.1,7.6
19
+ gpt-4o,1,CohereForAI__c4ai-command-r-plus,6.2,1.9,6.8,8.5,3.0,3.8,8.6,8.6,8.1
20
  gpt-4o,1,Qwen__Qwen1.5-32B-Chat,6.1,4.0,8.6,8.5,4.7,2.6,6.3,7.5,6.7
21
  gpt-4o,1,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,5.8,3.5,5.0,8.5,5.4,3.2,5.4,7.5,7.6
22
  gpt-4o,1,davidkim205__Ko-Llama-3-8B-Instruct,5.7,4.6,7.0,7.7,2.8,2.5,6.2,6.9,7.6
 
38
  gpt-4o,2,google__gemma-2-9b-it,6.2,4.8,7.6,8.3,4.9,3.9,7.0,7.4,6.1
39
  gpt-4o,2,Qwen__Qwen1.5-32B-Chat,5.8,4.3,8.2,7.6,3.8,3.0,6.8,5.9,6.9
40
  gpt-4o,2,meta-llama__Meta-Llama-3.1-70B-Instruct,5.7,5.5,8.0,7.4,3.6,2.9,6.6,5.7,5.7
41
+ gpt-4o,2,CohereForAI__c4ai-command-r-plus,5.7,4.1,6.3,8.6,3.1,3.1,7.4,6.4,6.2
42
  gpt-4o,2,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,5.6,5.8,6.2,5.5,7.0,4.0,5.7,4.3,6.5
43
  gpt-4o,2,openai__gpt-3.5-turbo-0125,5.4,5.8,5.7,7.2,4.4,3.0,6.6,4.4,6.4
44
  gpt-4o,2,Qwen__Qwen2-7B-Instruct,5.3,5.0,7.0,6.6,5.1,2.7,5.6,4.8,5.9
45
  gpt-4o,2,Qwen__Qwen1.5-14B-Chat,4.9,3.5,5.1,7.4,4.1,2.7,5.9,5.0,5.9
46
+ gpt-4o,2,Qwen__Qwen2-57B-A14B-Instruct,4.7,4.4,3.8,6.9,3.9,2.9,5.6,4.8,5.5
47
  gpt-4o,2,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,4.5,3.3,3.8,7.6,4.9,2.1,5.6,5.7,3.3
48
  gpt-4o,2,mistralai__Mistral-7B-Instruct-v0.2,4.5,3.9,4.4,6.8,2.2,2.4,6.2,5.6,4.6
49
  gpt-4o,2,davidkim205__Ko-Llama-3-8B-Instruct,4.0,3.7,4.3,6.4,2.8,2.3,4.9,4.0,4.1
 
60
  keval,1,Qwen__Qwen2-72B-Instruct,8.0,5.0,9.2,8.8,8.6,6.9,7.7,9.1,9.0
61
  keval,1,davidkim205__ko-gemma-2-9b-it,7.8,5.9,9.4,8.5,6.0,6.3,8.2,9.0,8.9
62
  keval,1,google__gemma-2-9b-it,7.6,6.7,8.8,8.5,5.2,5.5,9.0,8.6,8.5
63
+ keval,1,Qwen__Qwen2-57B-A14B-Instruct,7.6,5.4,9.5,8.3,5.9,6.7,7.5,8.2,8.9
64
  keval,1,meta-llama__Meta-Llama-3.1-70B-Instruct,7.3,6.8,9.0,8.3,5.9,5.1,8.4,8.0,7.1
65
  keval,1,Qwen__Qwen1.5-14B-Chat,7.2,4.7,9.7,8.8,4.5,4.8,8.1,8.9,8.4
66
  keval,1,LGAI-EXAONE__EXAONE-3.0-7.8B-Instruct,7.2,5.7,8.8,8.1,8.2,6.0,7.7,5.6,7.3
 
68
  keval,1,Qwen__Qwen1.5-32B-Chat,7.0,3.9,9.9,8.9,5.8,3.6,7.1,8.6,7.9
69
  keval,1,openai__gpt-3.5-turbo-0125,6.9,5.6,8.9,7.7,6.4,3.2,7.4,7.5,8.6
70
  keval,1,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,6.8,3.4,8.6,8.5,5.5,4.1,6.9,8.8,8.4
71
+ keval,1,CohereForAI__c4ai-command-r-plus,6.4,1.9,5.9,9.3,4.4,4.6,8.4,9.0,8.0
72
  keval,1,Qwen__Qwen2-7B-Instruct,6.4,3.6,9.0,7.7,5.5,3.5,7.1,6.7,8.4
73
  keval,1,meta-llama__Meta-Llama-3.1-8B-Instruct,6.3,4.3,8.9,7.7,5.3,3.3,7.3,6.0,7.5
74
  keval,1,davidkim205__Ko-Llama-3-8B-Instruct,6.0,5.0,7.4,7.6,2.9,2.9,7.0,8.0,7.6
 
90
  keval,2,Qwen__Qwen1.5-32B-Chat,6.2,5.2,7.7,8.0,4.1,4.0,7.7,6.7,6.5
91
  keval,2,Qwen__Qwen1.5-14B-Chat,6.0,4.7,6.9,7.9,4.8,3.8,7.2,6.3,6.7
92
  keval,2,meta-llama__Meta-Llama-3.1-70B-Instruct,6.0,6.0,7.3,7.6,5.6,2.9,7.0,6.2,5.6
93
+ keval,2,CohereForAI__c4ai-command-r-plus,5.8,3.9,5.0,8.3,3.9,3.6,7.6,6.7,7.3
94
  keval,2,Qwen__Qwen2-7B-Instruct,5.6,4.9,7.0,6.5,5.1,3.1,6.3,5.0,6.5
95
  keval,2,KISTI-KONI__KONI-Llama3-8B-Instruct-20240729,5.5,4.6,4.9,6.7,5.9,3.2,6.9,6.8,5.2
96
+ keval,2,Qwen__Qwen2-57B-A14B-Instruct,5.5,4.6,5.2,6.8,4.2,4.5,6.8,5.0,7.1
97
  keval,2,openai__gpt-3.5-turbo-0125,5.3,6.2,5.5,7.0,4.5,3.3,6.2,4.5,5.4
98
  keval,2,meta-llama__Meta-Llama-3.1-8B-Instruct,4.8,5.0,6.0,5.5,4.4,2.6,5.9,5.0,4.4
99
  keval,2,davidkim205__Ko-Llama-3-8B-Instruct,4.2,3.6,4.6,6.3,2.8,2.2,6.1,3.7,4.3