metadata
license: apache-2.0
pipeline_tag: text-generation
library_name: transformers
language:
- en
- am
- ar
- as
- az
- be
- bg
- bn
- br
- bs
- ca
- cs
- cy
- da
- de
- el
- eo
- es
- et
- eu
- fa
- ff
- fi
- fr
- fy
- ga
- gd
- gl
- gn
- gu
- ha
- he
- hi
- hr
- ht
- hu
- hy
- id
- ig
- is
- it
- ja
- jv
- ka
- kk
- km
- kn
- ko
- ku
- ky
- la
- lg
- li
- ln
- lo
- lt
- lv
- mg
- mk
- ml
- mn
- mr
- ms
- my
- ne
- nl
- 'no'
- ns
- om
- or
- pa
- pl
- ps
- pt
- qu
- rm
- ro
- ru
- sa
- si
- sc
- sd
- sk
- sl
- so
- sq
- sr
- ss
- su
- sv
- sw
- ta
- te
- th
- tl
- tn
- tr
- ug
- uk
- ur
- uz
- vi
- wo
- xh
- yi
- yo
- zu
datasets:
- bigcode/programming-languages-keywords
- bigcode/the-stack-smol-xs
- nampdn-ai/tiny-textbooks
- xu-song/cc100-samples
- m-a-p/CodeFeedback-Filtered-Instruction
- nampdn-ai/tiny-codes
- ajibawa-2023/Maths-College
- microsoft/orca-math-word-problems-200k
- mlabonne/FineTome-100k
- arcee-ai/agent-data
- cognitivecomputations/SystemChat-2.0
- badrex/llm-emoji-dataset
tags:
- litgpt
- litdata
tangled-llama-108m-32k-base-v0.1
lm-evaluation-harness
Tasks | Version | Filter | n-shot | Metric | Value | Stderr | ||
---|---|---|---|---|---|---|---|---|
arc_challenge | 1 | none | 0 | acc | ↑ | 0.1937 | ± | 0.0115 |
none | 0 | acc_norm | ↑ | 0.2363 | ± | 0.0124 | ||
gsm8k | 3 | flexible-extract | 5 | exact_match | ↑ | 0.0136 | ± | 0.0032 |
strict-match | 5 | exact_match | ↑ | 0.0000 | ± | 0.0000 | ||
hellaswag | 1 | none | 0 | acc | ↑ | 0.2659 | ± | 0.0044 |
none | 0 | acc_norm | ↑ | 0.2709 | ± | 0.0044 | ||
mmlu | 2 | none | acc | ↑ | 0.2309 | ± | 0.0036 | |
- humanities | 2 | none | acc | ↑ | 0.2370 | ± | 0.0062 | |
- formal_logic | 1 | none | 0 | acc | ↑ | 0.2778 | ± | 0.0401 |
- high_school_european_history | 1 | none | 0 | acc | ↑ | 0.2303 | ± | 0.0329 |
- high_school_us_history | 1 | none | 0 | acc | ↑ | 0.2402 | ± | 0.0300 |
- high_school_world_history | 1 | none | 0 | acc | ↑ | 0.2405 | ± | 0.0278 |
- international_law | 1 | none | 0 | acc | ↑ | 0.1983 | ± | 0.0364 |
- jurisprudence | 1 | none | 0 | acc | ↑ | 0.2315 | ± | 0.0408 |
- logical_fallacies | 1 | none | 0 | acc | ↑ | 0.1840 | ± | 0.0304 |
- moral_disputes | 1 | none | 0 | acc | ↑ | 0.2110 | ± | 0.0220 |
- moral_scenarios | 1 | none | 0 | acc | ↑ | 0.2380 | ± | 0.0142 |
- philosophy | 1 | none | 0 | acc | ↑ | 0.1994 | ± | 0.0227 |
- prehistory | 1 | none | 0 | acc | ↑ | 0.2315 | ± | 0.0235 |
- professional_law | 1 | none | 0 | acc | ↑ | 0.2510 | ± | 0.0111 |
- world_religions | 1 | none | 0 | acc | ↑ | 0.2865 | ± | 0.0347 |
- other | 2 | none | acc | ↑ | 0.2372 | ± | 0.0076 | |
- business_ethics | 1 | none | 0 | acc | ↑ | 0.2900 | ± | 0.0456 |
- clinical_knowledge | 1 | none | 0 | acc | ↑ | 0.2113 | ± | 0.0251 |
- college_medicine | 1 | none | 0 | acc | ↑ | 0.2023 | ± | 0.0306 |
- global_facts | 1 | none | 0 | acc | ↑ | 0.1900 | ± | 0.0394 |
- human_aging | 1 | none | 0 | acc | ↑ | 0.3004 | ± | 0.0308 |
- management | 1 | none | 0 | acc | ↑ | 0.1748 | ± | 0.0376 |
- marketing | 1 | none | 0 | acc | ↑ | 0.2863 | ± | 0.0296 |
- medical_genetics | 1 | none | 0 | acc | ↑ | 0.2700 | ± | 0.0446 |
- miscellaneous | 1 | none | 0 | acc | ↑ | 0.2337 | ± | 0.0151 |
- nutrition | 1 | none | 0 | acc | ↑ | 0.2255 | ± | 0.0239 |
- professional_accounting | 1 | none | 0 | acc | ↑ | 0.2411 | ± | 0.0255 |
- professional_medicine | 1 | none | 0 | acc | ↑ | 0.1985 | ± | 0.0242 |
- virology | 1 | none | 0 | acc | ↑ | 0.2711 | ± | 0.0346 |
- social sciences | 2 | none | acc | ↑ | 0.2278 | ± | 0.0076 | |
- econometrics | 1 | none | 0 | acc | ↑ | 0.2105 | ± | 0.0384 |
- high_school_geography | 1 | none | 0 | acc | ↑ | 0.1768 | ± | 0.0272 |
- high_school_government_and_politics | 1 | none | 0 | acc | ↑ | 0.2280 | ± | 0.0303 |
- high_school_macroeconomics | 1 | none | 0 | acc | ↑ | 0.2436 | ± | 0.0218 |
- high_school_microeconomics | 1 | none | 0 | acc | ↑ | 0.2395 | ± | 0.0277 |
- high_school_psychology | 1 | none | 0 | acc | ↑ | 0.2037 | ± | 0.0173 |
- human_sexuality | 1 | none | 0 | acc | ↑ | 0.2595 | ± | 0.0384 |
- professional_psychology | 1 | none | 0 | acc | ↑ | 0.2386 | ± | 0.0172 |
- public_relations | 1 | none | 0 | acc | ↑ | 0.2091 | ± | 0.0390 |
- security_studies | 1 | none | 0 | acc | ↑ | 0.2490 | ± | 0.0277 |
- sociology | 1 | none | 0 | acc | ↑ | 0.1990 | ± | 0.0282 |
- us_foreign_policy | 1 | none | 0 | acc | ↑ | 0.3100 | ± | 0.0465 |
- stem | 2 | none | acc | ↑ | 0.2185 | ± | 0.0074 | |
- abstract_algebra | 1 | none | 0 | acc | ↑ | 0.2600 | ± | 0.0441 |
- anatomy | 1 | none | 0 | acc | ↑ | 0.1630 | ± | 0.0319 |
- astronomy | 1 | none | 0 | acc | ↑ | 0.2237 | ± | 0.0339 |
- college_biology | 1 | none | 0 | acc | ↑ | 0.2708 | ± | 0.0372 |
- college_chemistry | 1 | none | 0 | acc | ↑ | 0.2300 | ± | 0.0423 |
- college_computer_science | 1 | none | 0 | acc | ↑ | 0.2100 | ± | 0.0409 |
- college_mathematics | 1 | none | 0 | acc | ↑ | 0.2200 | ± | 0.0416 |
- college_physics | 1 | none | 0 | acc | ↑ | 0.2647 | ± | 0.0439 |
- computer_security | 1 | none | 0 | acc | ↑ | 0.3000 | ± | 0.0461 |
- conceptual_physics | 1 | none | 0 | acc | ↑ | 0.2000 | ± | 0.0261 |
- electrical_engineering | 1 | none | 0 | acc | ↑ | 0.2345 | ± | 0.0353 |
- elementary_mathematics | 1 | none | 0 | acc | ↑ | 0.2302 | ± | 0.0217 |
- high_school_biology | 1 | none | 0 | acc | ↑ | 0.1903 | ± | 0.0223 |
- high_school_chemistry | 1 | none | 0 | acc | ↑ | 0.1527 | ± | 0.0253 |
- high_school_computer_science | 1 | none | 0 | acc | ↑ | 0.2700 | ± | 0.0446 |
- high_school_mathematics | 1 | none | 0 | acc | ↑ | 0.1926 | ± | 0.0240 |
- high_school_physics | 1 | none | 0 | acc | ↑ | 0.2053 | ± | 0.0330 |
- high_school_statistics | 1 | none | 0 | acc | ↑ | 0.2130 | ± | 0.0279 |
- machine_learning | 1 | none | 0 | acc | ↑ | 0.2768 | ± | 0.0425 |
truthfulqa_mc2 | 2 | none | 0 | acc | ↑ | 0.4683 | ± | 0.0160 |
winogrande | 1 | none | 0 | acc | ↑ | 0.5075 | ± | 0.0141 |
Groups | Version | Filter | n-shot | Metric | Value | Stderr | ||
---|---|---|---|---|---|---|---|---|
mmlu | 2 | none | acc | ↑ | 0.2309 | ± | 0.0036 | |
- humanities | 2 | none | acc | ↑ | 0.2370 | ± | 0.0062 | |
- other | 2 | none | acc | ↑ | 0.2372 | ± | 0.0076 | |
- social sciences | 2 | none | acc | ↑ | 0.2278 | ± | 0.0076 | |
- stem | 2 | none | acc | ↑ | 0.2185 | ± | 0.0074 |