{
  "results": {
    "arc_challenge": {
      "acc,none": 0.5614334470989761,
      "acc_stderr,none": 0.014500682618212865,
      "acc_norm,none": 0.613481228668942,
      "acc_norm_stderr,none": 0.014230084761910473,
      "alias": "arc_challenge"
    }
  },
  "configs": {
    "arc_challenge": {
      "task": "arc_challenge",
      "group": [
        "ai2_arc"
      ],
      "dataset_path": "/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/eval/ai2_arc",
      "dataset_name": "ARC-Challenge",
      "training_split": "train",
      "validation_split": "validation",
      "test_split": "test",
      "doc_to_text": "Question: {{question}}\nAnswer:",
      "doc_to_target": "{{choices.label.index(answerKey)}}",
      "doc_to_choice": "{{choices.text}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 25,
      "metric_list": [
        {
          "metric": "acc",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "acc_norm",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "multiple_choice",
      "repeats": 1,
      "should_decontaminate": true,
      "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
      "metadata": {
        "version": 1.0
      }
    }
  },
  "versions": {
    "arc_challenge": 1.0
  },
  "n-shot": {
    "arc_challenge": 25
  },
  "config": {
    "model": "vllm",
    "model_args": "pretrained=/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/Oasis,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=1,max_model_len=4096",
    "batch_size": "auto:128",
    "batch_sizes": [],
    "device": "cuda",
    "use_cache": "/lustre07/scratch/gagan30/arocr/cache/",
    "limit": null,
    "bootstrap_iters": 100000,
    "gen_kwargs": null
  },
  "git_hash": null
}