backend

Sleeping

meg-huggingface commited on Jul 4

Commit

f1e6565

•

1 Parent(s): fe8891f

DEBUG

Files changed (3) hide show

main_backend_harness.py CHANGED Viewed

@@ -62,6 +62,8 @@ def run_auto_eval():
         local_dir=EVAL_REQUESTS_PATH_BACKEND,
     )
     run_evaluation(
         eval_request=eval_request,
         task_names=TASKS_HARNESS,

         local_dir=EVAL_REQUESTS_PATH_BACKEND,
     )
+    print("eval request is")
+    print(eval_request)
     run_evaluation(
         eval_request=eval_request,
         task_names=TASKS_HARNESS,

requirements.txt CHANGED Viewed

@@ -10,7 +10,7 @@ sentencepiece
 # Evaluation suites
 lighteval
-lm_eval
 # Log Visualizer
 BeautifulSoup4==4.12.2

 # Evaluation suites
 lighteval
+lm_eval>=0.4.2
 # Log Visualizer
 BeautifulSoup4==4.12.2

src/envs.py CHANGED Viewed

@@ -10,7 +10,7 @@ OWNER = "meg" # Change to your org - don't forget to create a results and reques
 # For harness evaluations
 DEVICE = "cuda:0" #if you add compute, for harness evaluations
-LIMIT = 1 # !!!! For testing, should be None for actual evaluations!!!
 NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
 TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]

 # For harness evaluations
 DEVICE = "cuda:0" #if you add compute, for harness evaluations
+LIMIT = 3 # !!!! For testing, should be None for actual evaluations!!!
 NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
 TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]