meg-huggingface commited on
Commit
f1e6565
1 Parent(s): fe8891f
Files changed (3) hide show
  1. main_backend_harness.py +2 -0
  2. requirements.txt +1 -1
  3. src/envs.py +1 -1
main_backend_harness.py CHANGED
@@ -62,6 +62,8 @@ def run_auto_eval():
62
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
63
  )
64
 
 
 
65
  run_evaluation(
66
  eval_request=eval_request,
67
  task_names=TASKS_HARNESS,
 
62
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
63
  )
64
 
65
+ print("eval request is")
66
+ print(eval_request)
67
  run_evaluation(
68
  eval_request=eval_request,
69
  task_names=TASKS_HARNESS,
requirements.txt CHANGED
@@ -10,7 +10,7 @@ sentencepiece
10
 
11
  # Evaluation suites
12
  lighteval
13
- lm_eval
14
 
15
  # Log Visualizer
16
  BeautifulSoup4==4.12.2
 
10
 
11
  # Evaluation suites
12
  lighteval
13
+ lm_eval>=0.4.2
14
 
15
  # Log Visualizer
16
  BeautifulSoup4==4.12.2
src/envs.py CHANGED
@@ -10,7 +10,7 @@ OWNER = "meg" # Change to your org - don't forget to create a results and reques
10
 
11
  # For harness evaluations
12
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
13
- LIMIT = 1 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
  TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
16
 
 
10
 
11
  # For harness evaluations
12
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
13
+ LIMIT = 3 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
  TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
16