ShieldX commited on
Commit
92c6619
1 Parent(s): 0dedab1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -9
README.md CHANGED
@@ -30,10 +30,10 @@ model-index:
30
  type: text-generation
31
  dataset:
32
  name: ai2_arc
33
- type: ai2_arc
34
  metrics:
35
- - name: AI2 Reasoning Challenge (25-Shot)
36
- type: AI2 Reasoning Challenge (25-Shot)
37
  value: 35.92
38
  source:
39
  name: Open LLM Leaderboard
@@ -44,8 +44,8 @@ model-index:
44
  name: hellaswag
45
  type: hellaswag
46
  metrics:
47
- - name: HellaSwag - Commonsense Inference (10-shot)
48
- type: HellaSwag - Commonsense Inference (10-shot)
49
  value: 60.03
50
  source:
51
  name: Open LLM Leaderboard
@@ -56,8 +56,8 @@ model-index:
56
  name: truthful_qa
57
  type: truthful_qa
58
  metrics:
59
- - name: TruthfulQA (0-shot)
60
- type: TruthfulQA (0-shot)
61
  value: 39.17
62
  source:
63
  name: Open LLM Leaderboard
@@ -68,8 +68,8 @@ model-index:
68
  name: winogrande
69
  type: winogrande
70
  metrics:
71
- - name: Winogrande (5-shot)
72
- type: Winogrande - Adversarial Winograd Schema Challenge (5-shot)
73
  value: 61.09
74
  source:
75
  name: Open LLM Leaderboard
 
30
  type: text-generation
31
  dataset:
32
  name: ai2_arc
33
+ type: arc
34
  metrics:
35
+ - name: pass@1
36
+ type: pass@1
37
  value: 35.92
38
  source:
39
  name: Open LLM Leaderboard
 
44
  name: hellaswag
45
  type: hellaswag
46
  metrics:
47
+ - name: pass@1
48
+ type: pass@1
49
  value: 60.03
50
  source:
51
  name: Open LLM Leaderboard
 
56
  name: truthful_qa
57
  type: truthful_qa
58
  metrics:
59
+ - name: pass@1
60
+ type: pass@1
61
  value: 39.17
62
  source:
63
  name: Open LLM Leaderboard
 
68
  name: winogrande
69
  type: winogrande
70
  metrics:
71
+ - name: pass@1
72
+ type: pass@1
73
  value: 61.09
74
  source:
75
  name: Open LLM Leaderboard