laiviet commited on
Commit
a5244e0
1 Parent(s): 95b0e17

Update app.py to be compatile with the new logs

Browse files
Files changed (1) hide show
  1. app.py +4 -14
app.py CHANGED
@@ -6,10 +6,10 @@ import gradio as gr
6
  from content import *
7
  import glob
8
 
9
- ARC = "arc_challenge"
10
  HELLASWAG = "hellaswag"
11
  MMLU = "mmlu"
12
- TRUTHFULQA = "truthfulqa-mc"
13
  BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
14
 
15
  METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
@@ -39,18 +39,8 @@ def collect_results():
39
  pretrained_models.add(pretrained)
40
 
41
  for lang_task, perfs in results.items():
42
- if lang_task.startswith('arc_') and lang_task.endswith('_challenge'):
43
- lang = lang_task.split('_')[1]
44
- task = ARC
45
- elif lang_task.startswith('hellaswag_'):
46
- _, lang = lang_task.split('_')
47
- task = HELLASWAG
48
- elif lang_task.startswith('mmlu_'):
49
- _, lang = lang_task.split('_')
50
- task = MMLU
51
- elif lang_task.startswith('truthfulqa_') and lang_task.endswith('_mc'):
52
- lang = lang_task.split('_')[1]
53
- task = TRUTHFULQA
54
 
55
  if lang and task:
56
  metric = METRICS[BENCHMARKS.index(task)]
 
6
  from content import *
7
  import glob
8
 
9
+ ARC = "arc"
10
  HELLASWAG = "hellaswag"
11
  MMLU = "mmlu"
12
+ TRUTHFULQA = "truthfulqa"
13
  BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
14
 
15
  METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
 
39
  pretrained_models.add(pretrained)
40
 
41
  for lang_task, perfs in results.items():
42
+ task, lang = lang_task.split('_')
43
+ assert task in BENCHMARKS
 
 
 
 
 
 
 
 
 
 
44
 
45
  if lang and task:
46
  metric = METRICS[BENCHMARKS.index(task)]