alexmarques
commited on
Commit
•
88efeae
1
Parent(s):
517dbb1
Update README.md
Browse files
README.md
CHANGED
@@ -159,6 +159,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
159 |
<td><strong>Recovery</strong>
|
160 |
</td>
|
161 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
<tr>
|
163 |
<td>MMLU (CoT, 0-shot)
|
164 |
</td>
|
@@ -229,6 +239,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
229 |
<td>101.2%
|
230 |
</td>
|
231 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
</table>
|
233 |
|
234 |
### Reproduction
|
|
|
159 |
<td><strong>Recovery</strong>
|
160 |
</td>
|
161 |
</tr>
|
162 |
+
<tr>
|
163 |
+
<td>MMLU (5-shot)
|
164 |
+
</td>
|
165 |
+
<td>87.41
|
166 |
+
</td>
|
167 |
+
<td>87.47
|
168 |
+
</td>
|
169 |
+
<td>100.1%
|
170 |
+
</td>
|
171 |
+
</tr>
|
172 |
<tr>
|
173 |
<td>MMLU (CoT, 0-shot)
|
174 |
</td>
|
|
|
239 |
<td>101.2%
|
240 |
</td>
|
241 |
</tr>
|
242 |
+
<tr>
|
243 |
+
<td><strong>Average</strong>
|
244 |
+
</td>
|
245 |
+
<td><strong>86.73</strong>
|
246 |
+
</td>
|
247 |
+
<td><strong>86.89</strong>
|
248 |
+
</td>
|
249 |
+
<td><strong>100.2%</strong>
|
250 |
+
</td>
|
251 |
+
</tr>
|
252 |
</table>
|
253 |
|
254 |
### Reproduction
|