hieunguyen1053's picture
add exams
6b23c56
raw
history blame
2.07 kB
from dataclasses import dataclass
@dataclass
class Task:
code: str
name: str
metric: str
higher_is_better: bool = True
num_fewshot: int = 0
private_test: bool = False
class Lambada(Task):
code = "lambada_vi"
name = "LAMBADA-vi"
metric = "ppl"
higher_is_better = False
num_fewshot = 0
private_test: bool = True
class Arc(Task):
code = "arc_vi"
name = "ARC-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 25
private_test: bool = False
class HellaSwag(Task):
code = "hellaswag_vi"
name = "HellaSwag-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 10
private_test: bool = False
class MMLU(Task):
code = "mmlu_vi"
name = "MMLU-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = False
class TruthfulQA(Task):
code = "truthfulqa_vi"
name = "TruthfulQA-vi"
metric = "mc2"
higher_is_better = True
num_fewshot = 0
private_test: bool = False
class Grade12Exams(Task):
code = "grade_12_exams_vi"
name = "Grade 12 Exams"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = False
class IWSLT2023_en_vi(Task):
code = "translation_vi"
name = "IWSLT 2023 en-vi"
metric = "bleu"
higher_is_better = True
num_fewshot = 0
private_test: bool = False
class WikipediaQA(Task):
code = "wikipediaqa_vi"
name = "GeneralKnowledgeQA-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = True
class Comprehension(Task):
code = "comprehension_vi"
name = "ComprehensionQA-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 0
private_test: bool = True
class Exams(Task):
code = "exams_vi"
name = "Exams-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = True
TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension, Exams]