Alina Lozovskaia commited on
Commit
6b9cbbe
1 Parent(s): 2293858

Updated populate.py

Browse files
Files changed (1) hide show
  1. src/populate.py +55 -46
src/populate.py CHANGED
@@ -1,65 +1,74 @@
1
  import json
2
  import os
3
-
4
  import pandas as pd
5
-
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
8
  from src.leaderboard.filter_models import filter_models_flags
9
  from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
- def get_leaderboard_df(
13
- results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list
14
- ) -> pd.DataFrame:
15
- raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
16
- all_data_json = [v.to_dict() for v in raw_data]
17
- all_data_json.append(baseline_row)
18
- filter_models_flags(all_data_json)
 
19
 
20
- df = pd.DataFrame.from_records(all_data_json)
21
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
22
- df = df[cols].round(decimals=2)
23
 
24
- # filter out if any of the benchmarks have not been produced
25
- df = df[has_no_nan_values(df, benchmark_cols)]
26
- return raw_data, df
 
 
27
 
28
 
29
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
30
- entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
31
  all_evals = []
32
-
33
  for entry in entries:
34
- if ".json" in entry:
35
- file_path = os.path.join(save_path, entry)
36
- with open(file_path) as fp:
37
- data = json.load(fp)
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
40
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
 
 
 
 
 
 
 
 
41
 
42
- all_evals.append(data)
43
- elif ".md" not in entry:
44
- # this is a folder
45
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
46
- for sub_entry in sub_entries:
47
- file_path = os.path.join(save_path, entry, sub_entry)
48
- with open(file_path) as fp:
49
- try:
50
- data = json.load(fp)
51
- except json.JSONDecodeError:
52
- print(f"Error reading {file_path}")
53
- continue
54
 
55
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
56
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
57
- all_evals.append(data)
58
 
59
- pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
60
- running_list = [e for e in all_evals if e["status"] == "RUNNING"]
61
- finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
62
- df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
63
- df_running = pd.DataFrame.from_records(running_list, columns=cols)
64
- df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
65
- return df_finished[cols], df_running[cols], df_pending[cols]
 
 
 
 
 
1
  import json
2
  import os
 
3
  import pandas as pd
 
4
  from src.display.formatting import has_no_nan_values, make_clickable_model
5
  from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
6
  from src.leaderboard.filter_models import filter_models_flags
7
  from src.leaderboard.read_evals import get_raw_eval_results
8
 
9
 
10
+ def _load_json_data(file_path):
11
+ """Safely load JSON data from a file."""
12
+ try:
13
+ with open(file_path, "r") as file:
14
+ return json.load(file)
15
+ except json.JSONDecodeError:
16
+ print(f"Error reading JSON from {file_path}")
17
+ return None # Or raise an exception
18
 
 
 
 
19
 
20
+ def _process_model_data(entry, model_name_key="model", revision_key="revision"):
21
+ """Enrich model data with clickable links and revisions."""
22
+ entry[EvalQueueColumn.model.name] = make_clickable_model(entry.get(model_name_key, ""))
23
+ entry[EvalQueueColumn.revision.name] = entry.get(revision_key, "main")
24
+ return entry
25
 
26
 
27
+ def get_evaluation_queue_df(save_path, cols):
28
+ """Generate dataframes for pending, running, and finished evaluation entries."""
29
  all_evals = []
30
+ entries = os.listdir(save_path)
31
  for entry in entries:
32
+ if entry.startswith(".") or entry.endswith(".md"):
33
+ continue
34
+ file_path = os.path.join(save_path, entry)
35
+ if os.path.isfile(file_path): # Check if it's a file
36
+ data = _load_json_data(file_path)
37
+ if data:
38
+ all_evals.append(_process_model_data(data))
39
+ else:
40
+ # Optionally handle directory contents if needed
41
+ sub_entries = os.listdir(file_path)
42
+ for sub_entry in sub_entries:
43
+ sub_file_path = os.path.join(file_path, sub_entry)
44
+ if os.path.isfile(sub_file_path):
45
+ data = _load_json_data(sub_file_path)
46
+ if data:
47
+ all_evals.append(_process_model_data(data))
48
 
49
+ # Organizing data by status
50
+ status_map = {
51
+ "PENDING": ["PENDING", "RERUN"],
52
+ "RUNNING": ["RUNNING"],
53
+ "FINISHED": ["FINISHED", "PENDING_NEW_EVAL"],
54
+ }
55
+ status_dfs = {status: [] for status in status_map}
56
+ for eval_data in all_evals:
57
+ for status, extra_statuses in status_map.items():
58
+ if eval_data["status"] in extra_statuses:
59
+ status_dfs[status].append(eval_data)
60
 
61
+ return tuple(pd.DataFrame(status_dfs[status], columns=cols) for status in ["FINISHED", "RUNNING", "PENDING"])
 
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
63
 
64
+ def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmark_cols):
65
+ """Retrieve and process leaderboard data."""
66
+ raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
67
+ all_data_json = [model.to_dict() for model in raw_data] + [baseline_row]
68
+ filter_models_flags(all_data_json)
69
+
70
+ df = pd.DataFrame.from_records(all_data_json)
71
+ df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
72
+ df = df[cols].round(decimals=2)
73
+ df = df[has_no_nan_values(df, benchmark_cols)]
74
+ return raw_data, df