clean table (#3)
Browse files- clean table (93a35c6506748bda473a60261a5eb5d5e7041e11)
Co-authored-by: Fabio Barth <[email protected]>
README.md
CHANGED
@@ -80,30 +80,42 @@ Currently, we are working on more suitable benchmarks for Spanish, French, Germa
|
|
80 |
<details>
|
81 |
<summary>Evaluation results</summary>
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
### English
|
84 |
|
85 |
-
|
|
86 |
-
|
87 |
-
| Occiglot-7b-eu5
|
88 |
-
| Occiglot-7b-eu5-instruct
|
89 |
-
| Occiglot-7b-fr-en
|
90 |
-
| Occiglot-7b-fr-en-instruct
|
91 |
-
| Claire-
|
92 |
-
| Mistral-
|
93 |
-
| Mistral-
|
94 |
-
|
95 |
-
|
96 |
### French
|
97 |
|
98 |
-
|
|
99 |
-
|
100 |
-
| Occiglot-7b-eu5
|
101 |
-
| Occiglot-7b-eu5-instruct
|
102 |
-
| Occiglot-7b-fr-en
|
103 |
-
| Occiglot-7b-fr-en-instruct
|
104 |
-
| Claire-
|
105 |
-
| Mistral-
|
106 |
-
| Mistral-
|
107 |
|
108 |
</details>
|
109 |
|
|
|
80 |
<details>
|
81 |
<summary>Evaluation results</summary>
|
82 |
|
83 |
+
### All 5 Languages
|
84 |
+
|
85 |
+
| | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
|
86 |
+
|:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
|
87 |
+
| Occiglot-7b-eu5 | 0.516895 | 0.508109 | 0.675556 | 0.718963 | 0.402064 | 0.279782 |
|
88 |
+
| Occiglot-7b-eu5-instruct | 0.537799 | 0.53632 | 0.691111 | 0.731918 | 0.405198 | 0.32445 |
|
89 |
+
| Occiglot-7b-fr-en | 0.509209 | 0.496806 | 0.691333 | 0.667475 | 0.409129 | 0.281303 |
|
90 |
+
| Occiglot-7b-fr-en-instruct | 0.52884 | 0.515613 | 0.723333 | 0.67371 | 0.413024 | 0.318521 |
|
91 |
+
| Claire-mistral-7b-0.1 | 0.514226 | 0.502773 | 0.705111 | 0.666871 | 0.412128 | 0.284245 |
|
92 |
+
| Mistral-7b-v0.1 | 0.547111 | 0.528937 | 0.768444 | 0.682516 | 0.448253 | 0.307403 |
|
93 |
+
| Mistral-7b-instruct-v0.2 | 0.56713 | 0.547228 | 0.741111 | 0.69455 | 0.422501 | 0.430262 |
|
94 |
+
|
95 |
+
|
96 |
### English
|
97 |
|
98 |
+
| | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
|
99 |
+
|:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
|
100 |
+
| Occiglot-7b-eu5 | 0.59657 | 0.530717 | 0.726667 | 0.789882 | 0.531904 | 0.403678 |
|
101 |
+
| Occiglot-7b-eu5-instruct | 0.617905 | 0.558874 | 0.746667 | 0.799841 | 0.535109 | 0.449 |
|
102 |
+
| Occiglot-7b-fr-en | 0.621947 | 0.568259 | 0.771111 | 0.804919 | 0.570716 | 0.394726 |
|
103 |
+
| Occiglot-7b-fr-en-instruct | 0.646571 | 0.586177 | 0.794444 | 0.808305 | 0.569862 | 0.474064 |
|
104 |
+
| Claire-mistral-7b-0.1 | 0.651798 | 0.59727 | 0.817778 | 0.827126 | 0.600912 | 0.415906 |
|
105 |
+
| Mistral-7b-v0.1 | 0.668385 | 0.612628 | 0.844444 | 0.834097 | 0.624555 | 0.426201 |
|
106 |
+
| Mistral-7b-instruct-v0.2 | 0.713657 | 0.637372 | 0.824444 | 0.846345 | 0.59201 | 0.668116 |
|
107 |
+
|
|
|
108 |
### French
|
109 |
|
110 |
+
| | avg | arc_challenge_fr | belebele_fr | hellaswag_fr | mmlu_fr | truthfulqa_fr |
|
111 |
+
|:---------------------------|---------:|-------------------:|--------------:|---------------:|----------:|----------------:|
|
112 |
+
| Occiglot-7b-eu5 | 0.525017 | 0.506416 | 0.675556 | 0.712358 | 0.495684 | 0.23507 |
|
113 |
+
| Occiglot-7b-eu5-instruct | 0.554216 | 0.541488 | 0.7 | 0.724245 | 0.499122 | 0.306226 |
|
114 |
+
| Occiglot-7b-fr-en | 0.542903 | 0.532934 | 0.706667 | 0.718891 | 0.51333 | 0.242694 |
|
115 |
+
| Occiglot-7b-fr-en-instruct | 0.567079 | 0.542344 | 0.752222 | 0.72553 | 0.52051 | 0.29479 |
|
116 |
+
| Claire-mistral-7b-0.1 | 0.515127 | 0.486741 | 0.694444 | 0.642964 | 0.479566 | 0.271919 |
|
117 |
+
| Mistral-7b-v0.1 | 0.558129 | 0.525235 | 0.776667 | 0.66481 | 0.543121 | 0.280813 |
|
118 |
+
| Mistral-7b-instruct-v0.2 | 0.575821 | 0.551754 | 0.758889 | 0.67916 | 0.506837 | 0.382465 |
|
119 |
|
120 |
</details>
|
121 |
|