MirakramAghalarov commited on
Commit
4861eca
1 Parent(s): 5e1fdc4

solved grouped dataset bug

Browse files
.gitignore CHANGED
@@ -18,4 +18,5 @@ src/assets/model_counts.html
18
  test
19
  env
20
  a.py
21
- testing.py
 
 
18
  test
19
  env
20
  a.py
21
+ testing.py
22
+ frontend
app.py CHANGED
@@ -60,6 +60,14 @@ try:
60
  force_download=True,
61
  token=TOKEN
62
  )
 
 
 
 
 
 
 
 
63
  except Exception:
64
  restart_space()
65
 
@@ -392,6 +400,6 @@ with demo:
392
  pass
393
 
394
  scheduler = BackgroundScheduler()
395
- scheduler.add_job(restart_space, "interval", seconds=300)
396
  scheduler.start()
397
  demo.queue(default_concurrency_limit=40).launch()
 
60
  force_download=True,
61
  token=TOKEN
62
  )
63
+ snapshot_download(
64
+ repo_id=RESULTS_GROUP_REPO,
65
+ local_dir=EVAL_RESULTS_GROUP_PATH,
66
+ repo_type="dataset",
67
+ tqdm_class=None,
68
+ etag_timeout=30,
69
+ force_download=True,
70
+ token=TOKEN)
71
  except Exception:
72
  restart_space()
73
 
 
400
  pass
401
 
402
  scheduler = BackgroundScheduler()
403
+ scheduler.add_job(restart_space, "interval", seconds=1000)
404
  scheduler.start()
405
  demo.queue(default_concurrency_limit=40).launch()
eval-results-group/.gitattributes ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
56
+ # Video files - compressed
57
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
58
+ *.webm filter=lfs diff=lfs merge=lfs -text
eval-results-group/gpt-4o-mini/results_2024-10-25T17:52:35.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"config": {"submitted_time": "2024-10-25T17:52:35", "model_name": "OpenAI/gpt-4o-mini"}, "results": {"MMLU": {"metric_name": 0.7}, "ARC": {"metric_name": 1.0}, "GSM8K": {"metric_name": 1.0}, "CQA": {"metric_name": 0.4168853395851045}, "Banking": {"metric_name": 0.7717097630831459}}}
src/envs.py CHANGED
@@ -15,6 +15,6 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
15
  # Local caches
16
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
17
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
18
- EVAL_RESULTS_GROUP_PATH = os.path.join(CACHE_PATH, "eval-results")
19
 
20
  API = HfApi(token=TOKEN)
 
15
  # Local caches
16
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
17
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
18
+ EVAL_RESULTS_GROUP_PATH = os.path.join(CACHE_PATH, "eval-results-group")
19
 
20
  API = HfApi(token=TOKEN)
src/leaderboard/read_evals.py CHANGED
@@ -212,11 +212,13 @@ def get_group_eval_results(results_path: str) -> list[EvalResultGroup]:
212
  eval_results[eval_name] = eval_result
213
 
214
  results = []
 
215
  for v in eval_results.values():
216
  try:
217
  v.to_dict() # we test if the dict version is complete
218
  results.append(v)
219
  except KeyError: # not all eval values present
 
220
  continue
221
 
222
  return results
 
212
  eval_results[eval_name] = eval_result
213
 
214
  results = []
215
+ print(eval_results)
216
  for v in eval_results.values():
217
  try:
218
  v.to_dict() # we test if the dict version is complete
219
  results.append(v)
220
  except KeyError: # not all eval values present
221
+ print("key error")
222
  continue
223
 
224
  return results
src/populate.py CHANGED
@@ -24,7 +24,6 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
24
  def get_leaderboard_group_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
25
  raw_data = get_group_eval_results(results_path)
26
  all_data_json = [v.to_dict() for v in raw_data]
27
-
28
  df = pd.DataFrame.from_records(all_data_json)
29
  df = df.sort_values(by=[AutoEvalColumnGroup.average.name], ascending=False)
30
  df = df[cols].round(decimals=2)
 
24
  def get_leaderboard_group_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
25
  raw_data = get_group_eval_results(results_path)
26
  all_data_json = [v.to_dict() for v in raw_data]
 
27
  df = pd.DataFrame.from_records(all_data_json)
28
  df = df.sort_values(by=[AutoEvalColumnGroup.average.name], ascending=False)
29
  df = df[cols].round(decimals=2)