Oasis / results_arc.json
gagan3012's picture
Upload folder using huggingface_hub
64ae580 verified
{
"results": {
"arc_challenge": {
"acc,none": 0.5614334470989761,
"acc_stderr,none": 0.014500682618212865,
"acc_norm,none": 0.613481228668942,
"acc_norm_stderr,none": 0.014230084761910473,
"alias": "arc_challenge"
}
},
"configs": {
"arc_challenge": {
"task": "arc_challenge",
"group": [
"ai2_arc"
],
"dataset_path": "/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/eval/ai2_arc",
"dataset_name": "ARC-Challenge",
"training_split": "train",
"validation_split": "validation",
"test_split": "test",
"doc_to_text": "Question: {{question}}\nAnswer:",
"doc_to_target": "{{choices.label.index(answerKey)}}",
"doc_to_choice": "{{choices.text}}",
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 25,
"metric_list": [
{
"metric": "acc",
"aggregation": "mean",
"higher_is_better": true
},
{
"metric": "acc_norm",
"aggregation": "mean",
"higher_is_better": true
}
],
"output_type": "multiple_choice",
"repeats": 1,
"should_decontaminate": true,
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
"metadata": {
"version": 1.0
}
}
},
"versions": {
"arc_challenge": 1.0
},
"n-shot": {
"arc_challenge": 25
},
"config": {
"model": "vllm",
"model_args": "pretrained=/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/Oasis,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=1,max_model_len=4096",
"batch_size": "auto:128",
"batch_sizes": [],
"device": "cuda",
"use_cache": "/lustre07/scratch/gagan30/arocr/cache/",
"limit": null,
"bootstrap_iters": 100000,
"gen_kwargs": null
},
"git_hash": null
}