Spaces:

vlsp-2023-vllm
/

VLLMs-Leaderboard

Runtime error

App Files Files Community

VLLMs-Leaderboard / src /tasks.py

hieunguyen1053

add exams

6b23c56 12 months ago

raw

history blame

2.07 kB

	from dataclasses import dataclass

	@dataclass
	class Task:
	code: str
	name: str
	metric: str
	higher_is_better: bool = True
	num_fewshot: int = 0
	private_test: bool = False


	class Lambada(Task):
	code = "lambada_vi"
	name = "LAMBADA-vi"
	metric = "ppl"
	higher_is_better = False
	num_fewshot = 0
	private_test: bool = True


	class Arc(Task):
	code = "arc_vi"
	name = "ARC-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 25
	private_test: bool = False


	class HellaSwag(Task):
	code = "hellaswag_vi"
	name = "HellaSwag-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 10
	private_test: bool = False


	class MMLU(Task):
	code = "mmlu_vi"
	name = "MMLU-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 5
	private_test: bool = False


	class TruthfulQA(Task):
	code = "truthfulqa_vi"
	name = "TruthfulQA-vi"
	metric = "mc2"
	higher_is_better = True
	num_fewshot = 0
	private_test: bool = False


	class Grade12Exams(Task):
	code = "grade_12_exams_vi"
	name = "Grade 12 Exams"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 5
	private_test: bool = False


	class IWSLT2023_en_vi(Task):
	code = "translation_vi"
	name = "IWSLT 2023 en-vi"
	metric = "bleu"
	higher_is_better = True
	num_fewshot = 0
	private_test: bool = False


	class WikipediaQA(Task):
	code = "wikipediaqa_vi"
	name = "GeneralKnowledgeQA-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 5
	private_test: bool = True


	class Comprehension(Task):
	code = "comprehension_vi"
	name = "ComprehensionQA-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 0
	private_test: bool = True


	class Exams(Task):
	code = "exams_vi"
	name = "Exams-vi"
	metric = "acc_norm"
	higher_is_better = True
	num_fewshot = 5
	private_test: bool = True


	TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension, Exams]