clean tables
Browse files- 5 lang table
- repo_id only as label
- avg. as first column
README.md
CHANGED
@@ -80,29 +80,42 @@ Currently, we are working on more suitable benchmarks for Spanish, French, Germa
|
|
80 |
<details>
|
81 |
<summary>Evaluation results</summary>
|
82 |
|
83 |
-
###
|
84 |
-
|
85 |
-
|
|
86 |
-
|
87 |
-
| Occiglot-7b-eu5
|
88 |
-
| Occiglot-7b-eu5-instruct
|
89 |
-
| Occiglot-7b-es-en
|
90 |
-
| Occiglot-7b-es-en-instruct
|
91 |
-
|
|
92 |
-
| Mistral-
|
93 |
-
| Mistral-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
### Spanish
|
96 |
|
97 |
-
|
|
98 |
-
|
99 |
-
| Occiglot-7b-eu5
|
100 |
-
| Occiglot-7b-eu5-instruct
|
101 |
-
| Occiglot-7b-es-en
|
102 |
-
| Occiglot-7b-es-en-instruct
|
103 |
-
|
|
104 |
-
| Mistral-
|
105 |
-
| Mistral-
|
106 |
|
107 |
|
108 |
|
|
|
80 |
<details>
|
81 |
<summary>Evaluation results</summary>
|
82 |
|
83 |
+
### All 5 Languages
|
84 |
+
|
85 |
+
| | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
|
86 |
+
|:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
|
87 |
+
| Occiglot-7b-eu5 | 0.516895 | 0.508109 | 0.675556 | 0.718963 | 0.402064 | 0.279782 |
|
88 |
+
| Occiglot-7b-eu5-instruct | 0.537799 | 0.53632 | 0.691111 | 0.731918 | 0.405198 | 0.32445 |
|
89 |
+
| Occiglot-7b-es-en | 0.483388 | 0.482949 | 0.606889 | 0.653902 | 0.398922 | 0.274277 |
|
90 |
+
| Occiglot-7b-es-en-instruct | 0.504023 | 0.494576 | 0.65 | 0.670847 | 0.406176 | 0.298513 |
|
91 |
+
| Lince-mistral-7b-it-es | 0.543427 | 0.540222 | 0.745111 | 0.692931 | 0.426241 | 0.312629 |
|
92 |
+
| Mistral-7b-v0.1 | 0.547111 | 0.528937 | 0.768444 | 0.682516 | 0.448253 | 0.307403 |
|
93 |
+
| Mistral-7b-instruct-v0.2 | 0.56713 | 0.547228 | 0.741111 | 0.69455 | 0.422501 | 0.430262 |
|
94 |
+
|
95 |
+
|
96 |
+
### English
|
97 |
+
|
98 |
+
| | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
|
99 |
+
|:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
|
100 |
+
| Occiglot-7b-eu5 | 0.59657 | 0.530717 | 0.726667 | 0.789882 | 0.531904 | 0.403678 |
|
101 |
+
| Occiglot-7b-eu5-instruct | 0.617905 | 0.558874 | 0.746667 | 0.799841 | 0.535109 | 0.449 |
|
102 |
+
| Occiglot-7b-es-en | 0.593609 | 0.543515 | 0.697778 | 0.788289 | 0.548355 | 0.390109 |
|
103 |
+
| Occiglot-7b-es-en-instruct | 0.615707 | 0.552048 | 0.736667 | 0.797451 | 0.557328 | 0.435042 |
|
104 |
+
| Leo-mistral-hessianai-7b | 0.600949 | 0.522184 | 0.736667 | 0.777833 | 0.538812 | 0.429248 |
|
105 |
+
| Mistral-7b-v0.1 | 0.668385 | 0.612628 | 0.844444 | 0.834097 | 0.624555 | 0.426201 |
|
106 |
+
| Mistral-7b-instruct-v0.2 | 0.713657 | 0.637372 | 0.824444 | 0.846345 | 0.59201 | 0.668116 |
|
107 |
|
108 |
### Spanish
|
109 |
|
110 |
+
| | avg | arc_challenge_es | belebele_es | hellaswag_es | mmlu_es | truthfulqa_es |
|
111 |
+
|:---------------------------|---------:|-------------------:|--------------:|---------------:|----------:|----------------:|
|
112 |
+
| Occiglot-7b-eu5 | 0.533194 | 0.508547 | 0.676667 | 0.725411 | 0.499325 | 0.25602 |
|
113 |
+
| Occiglot-7b-eu5-instruct | 0.548155 | 0.535043 | 0.68 | 0.737039 | 0.503525 | 0.285171 |
|
114 |
+
| Occiglot-7b-es-en | 0.527264 | 0.529915 | 0.627778 | 0.72253 | 0.512749 | 0.243346 |
|
115 |
+
| Occiglot-7b-es-en-instruct | 0.5396 | 0.545299 | 0.636667 | 0.734372 | 0.524374 | 0.257288 |
|
116 |
+
| Lince-mistral-7b-it-es | 0.547212 | 0.52906 | 0.721111 | 0.687967 | 0.512749 | 0.285171 |
|
117 |
+
| Mistral-7b-v0.1 | 0.554817 | 0.528205 | 0.747778 | 0.672712 | 0.544023 | 0.281369 |
|
118 |
+
| Mistral-7b-instruct-v0.2 | 0.568575 | 0.54188 | 0.73 | 0.685406 | 0.511699 | 0.373891 |
|
119 |
|
120 |
|
121 |
|