eval
Browse files
README.md
CHANGED
@@ -37,7 +37,7 @@ model-index:
|
|
37 |
num_few_shot: 0
|
38 |
metrics:
|
39 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
40 |
-
value:
|
41 |
name: strict accuracy
|
42 |
source:
|
43 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
@@ -52,7 +52,7 @@ model-index:
|
|
52 |
num_few_shot: 3
|
53 |
metrics:
|
54 |
- type: acc_norm
|
55 |
-
value: 27.
|
56 |
name: normalized accuracy
|
57 |
source:
|
58 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
@@ -67,7 +67,7 @@ model-index:
|
|
67 |
num_few_shot: 4
|
68 |
metrics:
|
69 |
- type: exact_match
|
70 |
-
value: 15.
|
71 |
name: exact match
|
72 |
source:
|
73 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
@@ -82,7 +82,7 @@ model-index:
|
|
82 |
num_few_shot: 0
|
83 |
metrics:
|
84 |
- type: acc_norm
|
85 |
-
value: 4.
|
86 |
name: acc_norm
|
87 |
source:
|
88 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
@@ -97,7 +97,7 @@ model-index:
|
|
97 |
num_few_shot: 0
|
98 |
metrics:
|
99 |
- type: acc_norm
|
100 |
-
value:
|
101 |
name: acc_norm
|
102 |
source:
|
103 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
@@ -114,7 +114,7 @@ model-index:
|
|
114 |
num_few_shot: 5
|
115 |
metrics:
|
116 |
- type: acc
|
117 |
-
value: 29.
|
118 |
name: accuracy
|
119 |
source:
|
120 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
37 |
num_few_shot: 0
|
38 |
metrics:
|
39 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
40 |
+
value: 71.68
|
41 |
name: strict accuracy
|
42 |
source:
|
43 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
52 |
num_few_shot: 3
|
53 |
metrics:
|
54 |
- type: acc_norm
|
55 |
+
value: 27.24
|
56 |
name: normalized accuracy
|
57 |
source:
|
58 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
67 |
num_few_shot: 4
|
68 |
metrics:
|
69 |
- type: exact_match
|
70 |
+
value: 15.33
|
71 |
name: exact match
|
72 |
source:
|
73 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
82 |
num_few_shot: 0
|
83 |
metrics:
|
84 |
- type: acc_norm
|
85 |
+
value: 4.81
|
86 |
name: acc_norm
|
87 |
source:
|
88 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
97 |
num_few_shot: 0
|
98 |
metrics:
|
99 |
- type: acc_norm
|
100 |
+
value: 4.7
|
101 |
name: acc_norm
|
102 |
source:
|
103 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
114 |
num_few_shot: 5
|
115 |
metrics:
|
116 |
- type: acc
|
117 |
+
value: 29.59
|
118 |
name: accuracy
|
119 |
source:
|
120 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|