Koshti10 commited on
Commit
f5ad77e
1 Parent(s): 0d0515c

Upload 8 files

Browse files
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 clembench
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from src.assets.text_content import TITLE, INTRODUCTION_TEXT
4
+ from src.leaderboard_utils import filter_search, get_github_data
5
+ from src.plot_utils import split_models, compare_plots
6
+
7
+ # from src.reload_utils import ReloadData
8
+ from src.reload import get_primary_leaderboard, get_open_models, get_closed_models, get_plot_df, get_version_names, get_version_df, get_prev_df
9
+
10
+ reload_time = 5
11
+
12
+ # # For Leaderboards
13
+ # # Get CSV data
14
+ # global primary_leaderboard_df, version_dfs, version_names
15
+ # primary_leaderboard_df, version_dfs, version_names = get_github_data()
16
+
17
+ # global prev_df
18
+ # prev_df = version_dfs[0]
19
+ # def select_prev_df(name):
20
+ # ind = version_names.index(name)
21
+ # prev_df = version_dfs[ind]
22
+ # return prev_df
23
+
24
+ # # For Plots
25
+ # global plot_df, OPEN_MODELS, CLOSED_MODELS
26
+ # plot_df = primary_leaderboard_df[0]
27
+ # MODELS = list(plot_df[list(plot_df.columns)[0]].unique())
28
+ # OPEN_MODELS, CLOSED_MODELS = split_models(MODELS)
29
+
30
+
31
+ # MAIN APPLICATION s
32
+ main_app = gr.Blocks()
33
+ with main_app:
34
+ gr.HTML(TITLE)
35
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
36
+
37
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
38
+ with gr.TabItem("🥇 CLEM Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
39
+ with gr.Row():
40
+ search_bar = gr.Textbox(
41
+ placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
42
+ show_label=False,
43
+ elem_id="search-bar",
44
+ )
45
+
46
+ leaderboard_table = gr.DataFrame(
47
+ value=get_primary_leaderboard,
48
+ elem_id="leaderboard-table",
49
+ interactive=False,
50
+ visible=True,
51
+ every=reload_time
52
+ )
53
+
54
+ # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
55
+ dummy_leaderboard_table = gr.Dataframe(
56
+ value=get_primary_leaderboard,
57
+ elem_id="leaderboard-table",
58
+ interactive=False,
59
+ visible=False,
60
+ every=reload_time
61
+ )
62
+
63
+ search_bar.submit(
64
+ filter_search,
65
+ [dummy_leaderboard_table, search_bar],
66
+ leaderboard_table,
67
+ queue=True
68
+ )
69
+
70
+ with gr.TabItem("📈 Plot", id=3):
71
+ with gr.Row():
72
+ open_models_selection = gr.CheckboxGroup(
73
+ choices=get_open_models(),
74
+ label="Open-weight Models 🌐",
75
+ value=[],
76
+ elem_id="value-select",
77
+ interactive=True,
78
+ every=reload_time
79
+ )
80
+
81
+ with gr.Row():
82
+ closed_models_selection = gr.CheckboxGroup(
83
+ choices=get_closed_models(),
84
+ label="Closed-weight Models 💼",
85
+ value=[],
86
+ elem_id="value-select-2",
87
+ interactive=True,
88
+ every=reload_time
89
+ )
90
+
91
+ with gr.Row():
92
+ with gr.Column():
93
+ show_all = gr.CheckboxGroup(
94
+ ["Select All Models"],
95
+ label="Show plot for all models 🤖",
96
+ value=[],
97
+ elem_id="value-select-3",
98
+ interactive=True,
99
+ )
100
+
101
+ with gr.Column():
102
+ show_names = gr.CheckboxGroup(
103
+ ["Show Names"],
104
+ label ="Show names of models on the plot 🏷️",
105
+ value=[],
106
+ elem_id="value-select-4",
107
+ interactive=True,
108
+ )
109
+
110
+ with gr.Column():
111
+ show_legend = gr.CheckboxGroup(
112
+ ["Show Legend"],
113
+ label ="Show legend on the plot 💡",
114
+ value=[],
115
+ elem_id="value-select-5",
116
+ interactive=True,
117
+ )
118
+
119
+ with gr.Row():
120
+ dummy_plot_df = gr.DataFrame(
121
+ value=get_plot_df,
122
+ visible=False,
123
+ every=reload_time
124
+ )
125
+
126
+ with gr.Row():
127
+ with gr.Column():
128
+ # Output block for the plot
129
+ plot_output = gr.Plot()
130
+
131
+ open_models_selection.change(
132
+ compare_plots,
133
+ [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
134
+ plot_output,
135
+ queue=True
136
+ )
137
+
138
+ closed_models_selection.change(
139
+ compare_plots,
140
+ [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
141
+ plot_output,
142
+ queue=True
143
+ )
144
+
145
+ show_all.change(
146
+ compare_plots,
147
+ [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
148
+ plot_output,
149
+ queue=True
150
+ )
151
+
152
+ show_names.change(
153
+ compare_plots,
154
+ [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
155
+ plot_output,
156
+ queue=True
157
+ )
158
+
159
+ show_legend.change(
160
+ compare_plots,
161
+ [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
162
+ plot_output,
163
+ queue=True
164
+ )
165
+
166
+ with gr.TabItem("🔄 Versions and Details", elem_id="details", id=2):
167
+ with gr.Row():
168
+ version_select = gr.Dropdown(
169
+ choices=get_version_names(),
170
+ label="Select Version 🕹️",
171
+ value=get_version_names()[0],
172
+ every=reload_time
173
+ )
174
+ with gr.Row():
175
+ search_bar_prev = gr.Textbox(
176
+ placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
177
+ show_label=False,
178
+ elem_id="search-bar-2",
179
+ )
180
+
181
+ prev_table = gr.Dataframe(
182
+ value=get_prev_df,
183
+ elem_id="leaderboard-table",
184
+ interactive=False,
185
+ visible=True,
186
+ every=reload_time
187
+ )
188
+
189
+ dummy_prev_table = gr.Dataframe(
190
+ value=get_prev_df,
191
+ elem_id="leaderboard-table",
192
+ interactive=False,
193
+ visible=False,
194
+ every=reload_time
195
+ )
196
+
197
+ search_bar_prev.submit(
198
+ filter_search,
199
+ [dummy_prev_table, search_bar_prev],
200
+ prev_table,
201
+ queue=True
202
+ )
203
+
204
+ version_select.change(
205
+ get_prev_df,
206
+ [version_select],
207
+ prev_table,
208
+ queue=True,
209
+ every=reload_time
210
+ )
211
+ main_app.load()
212
+
213
+ main_app.queue()
214
+
215
+ main_app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.43.2
2
+ pandas==2.0.0
3
+ plotly==5.18.0
src/assets/text_content.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = """<h1 align="center" id="space-title"> 🏆 CLEM Leaderboard</h1>"""
2
+
3
+ INTRODUCTION_TEXT = """
4
+ <h6 align="center">
5
+ The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
6
+
7
+ The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://arxiv.org/abs/2305.13455).
8
+
9
+ Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
10
+
11
+ All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
12
+ """
13
+
14
+ SHORT_NAMES = {
15
+ "t0.0": "",
16
+ "claude-v1.3": "cl-1.3",
17
+ "claude-2": "cl-2",
18
+ "claude-2.1": "cl-2.1",
19
+ "claude-instant-1.2": "cl-ins-1.2",
20
+ "gpt-3.5-turbo-0613": "3.5-0613",
21
+ "gpt-3.5-turbo-1106": "3.5-1106",
22
+ "gpt-4-0613": "4-0613",
23
+ "gpt-4-1106-preview": "4-1106",
24
+ "gpt-4-0314": "4-0314",
25
+ "gpt-4": "4",
26
+ "text-davinci-003": "3",
27
+ "luminous-supreme": "lm",
28
+ "koala-13b": "k-13b",
29
+ "falcon-40b": "fal-40b",
30
+ "falcon-7b-instruct": "fal-7b",
31
+ "falcon-40b-instruct": "flc-i-40b",
32
+ "oasst-12b": "oas-12b",
33
+ "oasst-sft-4-pythia-12b-epoch-3.5": "ost-12b",
34
+ "vicuna-13b": "vic-13b",
35
+ "vicuna-33b-v1.3": "vic-33b-v1.3",
36
+ "sheep-duck-llama-2-70b-v1.1": "sd-l2-70b-v1.1",
37
+ "sheep-duck-llama-2-13b": "sd-l2-13b",
38
+ "WizardLM-70b-v1.0": "w-70b-v1.0",
39
+ "CodeLlama-34b-Instruct-hf": "cl-34b",
40
+ "command": "com",
41
+ "Mistral-7B-Instruct-v0.1": "m-i-7b-v0.1",
42
+ "Wizard-Vicuna-13B-Uncensored-HF": "vcn-13b",
43
+ "llama-2-13b-chat-hf": "l2-13b",
44
+ "llama-2-70b-chat-hf": "l2-70b",
45
+ "llama-2-7b-chat-hf": "l2-7b",
46
+ "koala-13B-HF": "k-13b",
47
+ "WizardLM-13b-v1.2": "w-13b-v1.2",
48
+ "vicuna-7b-v1.5": "vic-7b-v1.5",
49
+ "vicuna-13b-v1.5": "vic-13b-v1.5",
50
+ "gpt4all-13b-snoozy": "g4a-13b-s",
51
+ "zephyr-7b-alpha":"z-7b-a",
52
+ "zephyr-7b-beta":"z-7b-b"
53
+ }
src/leaderboard_utils.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import requests, json
4
+ from io import StringIO
5
+
6
+ from datetime import datetime
7
+
8
+
9
+ def get_github_data():
10
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
11
+ print(f"LOADING GITHUB DATAAA.... at time = {current_time}")
12
+ '''
13
+ Get data from csv files on Github
14
+ Args:
15
+ None
16
+ Returns:
17
+ latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
18
+ all_dfs: list of dataframes for previous versions + latest version including columns for all games
19
+ all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
20
+ '''
21
+ uname = "kushal-10"
22
+ repo = "clembench-runs"
23
+ json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
24
+ resp = requests.get(json_url)
25
+ if resp.status_code == 200:
26
+ json_data = json.loads(resp.text)
27
+ versions = json_data['versions']
28
+ version_names = []
29
+ csv_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/"
30
+ for ver in versions:
31
+ version_names.append(ver['version'])
32
+ csv_path = ver['result_file'].split('/')[1:]
33
+ csv_path = '/'.join(csv_path)
34
+
35
+ #Sort by latest version
36
+ float_content = [float(s[1:]) for s in version_names]
37
+ float_content.sort(reverse=True)
38
+ version_names = ['v'+str(s) for s in float_content]
39
+
40
+ DFS = []
41
+ for version in version_names:
42
+ result_url = csv_url+ version + '/' + csv_path
43
+ csv_response = requests.get(result_url)
44
+ if csv_response.status_code == 200:
45
+ df = pd.read_csv(StringIO(csv_response.text))
46
+ df = process_df(df)
47
+ df = df.sort_values(by=list(df.columns)[1], ascending=False) # Sort by clemscore
48
+ DFS.append(df)
49
+ else:
50
+ print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
51
+
52
+ # Only keep relavant columns for the main leaderboard
53
+ latest_df_dummy = DFS[0]
54
+ all_columns = list(latest_df_dummy.columns)
55
+ keep_columns = all_columns[0:4]
56
+ latest_df_dummy = latest_df_dummy.drop(columns=[c for c in all_columns if c not in keep_columns])
57
+
58
+ latest_df = [latest_df_dummy]
59
+ all_dfs = []
60
+ all_vnames = []
61
+ for df, name in zip(DFS, version_names):
62
+ all_dfs.append(df)
63
+ all_vnames.append(name)
64
+ return latest_df, all_dfs, all_vnames
65
+
66
+ else:
67
+ print(f"Failed to read JSON file: Status Code : {resp.status_code}")
68
+
69
+ def process_df(df: pd.DataFrame) -> pd.DataFrame:
70
+ '''
71
+ Process dataframe
72
+ - Remove repition in model names
73
+ - Convert datatypes to sort by "float" instead of "str" for sorting
74
+ - Update column names
75
+ Args:
76
+ df: Unprocessed Dataframe (after using update_cols)
77
+ Returns:
78
+ df: Processed Dataframe
79
+ '''
80
+
81
+ # Change column type to float from str
82
+ list_column_names = list(df.columns)
83
+ model_col_name = list_column_names[0]
84
+ for col in list_column_names:
85
+ if col != model_col_name:
86
+ df[col] = df[col].astype(float)
87
+
88
+ # Remove repetition in model names, if any
89
+ models_list = []
90
+ for i in range(len(df)):
91
+ model_name = df.iloc[i][model_col_name]
92
+ splits = model_name.split('--')
93
+ splits = [split.replace('-t0.0', '') for split in splits] # Comment to not remove -t0.0
94
+ if splits[0] == splits[1]:
95
+ models_list.append(splits[0])
96
+ else:
97
+ models_list.append(splits[0] + "--" + splits[1])
98
+ df[model_col_name] = models_list
99
+
100
+ # Update column names
101
+ update = ['Model', 'Clemscore', '% Played', 'Quality Score']
102
+ game_metrics = list_column_names[4:]
103
+
104
+ for col in game_metrics:
105
+ splits = col.split(',')
106
+ update.append(splits[0].capitalize() + "" + splits[1])
107
+
108
+ map_cols = {}
109
+ for i in range(len(update)):
110
+ map_cols[list_column_names[i]] = str(update[i])
111
+
112
+ df = df.rename(columns=map_cols)
113
+ return df
114
+
115
+ def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
116
+ '''
117
+ Filter the dataframe based on the search query
118
+ Args:
119
+ df: Unfiltered dataframe
120
+ query: a string of queries separated by ";"
121
+ Return:
122
+ filtered_df: Dataframe containing searched queries in the 'Model' column
123
+ '''
124
+ queries = query.split(';')
125
+ list_cols = list(df.columns)
126
+ df_len = len(df)
127
+ filtered_models = []
128
+ models_list = list(df[list_cols[0]])
129
+ for q in queries:
130
+ q = q.lower()
131
+ q = q.strip()
132
+ for i in range(df_len):
133
+ model_name = models_list[i]
134
+ if q in model_name.lower():
135
+ filtered_models.append(model_name) # Append model names containing query q
136
+
137
+ filtered_df = df[df[list_cols[0]].isin(filtered_models)]
138
+
139
+ if query == "":
140
+ return df
141
+
142
+ return filtered_df
src/plot_utils.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+
4
+ from src.assets.text_content import SHORT_NAMES
5
+
6
+ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list):
7
+ '''
8
+ Takes in a list of models for a plotly plot
9
+ Args:
10
+ df: A dummy dataframe of latest version
11
+ LIST: List of models to plot
12
+ ALL: Either [] or ["Show All Models"] - toggle view to plot all models
13
+ NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
14
+ LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
15
+ Returns:
16
+ Fig: plotly figure
17
+ '''
18
+
19
+ # Get list of all models and append short names column to df
20
+ list_columns = list(df.columns)
21
+ ALL_LIST = list(df[list_columns[0]].unique())
22
+ short_names = label_map(ALL_LIST)
23
+ list_short_names = list(short_names.values())
24
+ df["Short"] = list_short_names
25
+
26
+ if ALL:
27
+ LIST = ALL_LIST
28
+ # Filter dataframe based on the provided list of models
29
+ df = df[df[list_columns[0]].isin(LIST)]
30
+
31
+
32
+ if NAMES:
33
+ fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
34
+ color_discrete_map={"category1": "blue", "category2": "red"},
35
+ hover_name=list_columns[0], template="plotly_white", text="Short")
36
+ fig.update_traces(textposition='top center')
37
+ else:
38
+ fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
39
+ color_discrete_map={"category1": "blue", "category2": "red"},
40
+ hover_name=list_columns[0], template="plotly_white")
41
+
42
+ if not LEGEND:
43
+ fig.update_layout(showlegend=False)
44
+
45
+ fig.update_layout(
46
+ xaxis_title='% Played',
47
+ yaxis_title='Quality Score',
48
+ title='Overview of benchmark results',
49
+ height=1000
50
+ )
51
+
52
+ fig.update_xaxes(range=[-5, 105])
53
+ fig.update_yaxes(range=[-5, 105])
54
+
55
+ return fig
56
+
57
+
58
+ # ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
59
+ def compare_plots(df: pd.DataFrame, LIST1: list, LIST2: list, ALL:list, NAMES:list, LEGEND: list):
60
+ '''
61
+ Quality Score v/s % Played plot by selecting models
62
+ Args:
63
+ df: A dummy dataframe of latest version
64
+ LIST1: The list of open source models to show in the plot, updated from frontend
65
+ LIST2: The list of commercial models to show in the plot, updated from frontend
66
+ ALL: Either [] or ["Show All Models"] - toggle view to plot all models
67
+ NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
68
+ LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
69
+ Returns:
70
+ fig: The plot
71
+ '''
72
+
73
+ # Combine lists for Open source and commercial models
74
+ LIST = LIST1 + LIST2
75
+ fig = plotly_plot(df, LIST, ALL, NAMES, LEGEND)
76
+
77
+ return fig
78
+
79
+ def shorten_model_name(full_name):
80
+ # Split the name into parts
81
+ parts = full_name.split('-')
82
+
83
+ # Process the name parts to keep only the parts with digits (model sizes and versions)
84
+ short_name_parts = [part for part in parts if any(char.isdigit() for char in part)]
85
+
86
+ if len(parts) == 1:
87
+ short_name = ''.join(full_name[0:min(3, len(full_name))])
88
+ else:
89
+ # Join the parts to form the short name
90
+ short_name = '-'.join(short_name_parts)
91
+
92
+ # Remove any leading or trailing hyphens
93
+ short_name = full_name[0] + '-'+ short_name.strip('-')
94
+
95
+ return short_name
96
+
97
+ def label_map(model_list: list) -> dict:
98
+ '''
99
+ Generate a map from long names to short names, to plot them in frontend graph
100
+ Define the short names in src/assets/text_content.py
101
+ Args:
102
+ model_list: A list of long model names
103
+ Returns:
104
+ short_name: A dict from long to short name
105
+ '''
106
+ short_names = {}
107
+ for model_name in model_list:
108
+ if model_name in SHORT_NAMES:
109
+ short_name = SHORT_NAMES[model_name]
110
+ else:
111
+ short_name = shorten_model_name(model_name)
112
+
113
+ # Define the short name and indicate both models are same
114
+ short_names[model_name] = short_name
115
+
116
+ return short_names
117
+
118
+ def split_models(MODEL_LIST: list):
119
+ '''
120
+ Split the models into open source and commercial
121
+ '''
122
+ open_models = []
123
+ comm_models = []
124
+
125
+ for model in MODEL_LIST:
126
+ if model.startswith(('gpt-', 'claude-', 'command')):
127
+ comm_models.append(model)
128
+ else:
129
+ open_models.append(model)
130
+
131
+ open_models.sort(key=lambda o: o.upper())
132
+ comm_models.sort(key=lambda c: c.upper())
133
+ return open_models, comm_models
src/reload.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Isolated functions to reload the leaderboard data and plot the results
2
+
3
+ from src.leaderboard_utils import filter_search, get_github_data
4
+ from src.plot_utils import split_models, compare_plots
5
+
6
+ def get_primary_leaderboard():
7
+ '''
8
+ Returns
9
+ primary_leaderboard_df[0]: Dataframe containing the primary leaderboard (laterst version of the benchmark results)
10
+ '''
11
+ print("Initializing Reload...........")
12
+
13
+ primary_leaderboard_df, _, _ = get_github_data()
14
+ print(primary_leaderboard_df)
15
+ return primary_leaderboard_df[0]
16
+
17
+ def get_open_models():
18
+ '''
19
+ Returns
20
+ open_models: Checkbox group containing the open models
21
+ '''
22
+ primary_leaderboard_df, _, _ = get_github_data()
23
+ temp_df = primary_leaderboard_df[0]
24
+ models = list(temp_df[list(temp_df.columns)[0]].unique())
25
+ open_models, _ = split_models(models)
26
+ return open_models
27
+
28
+ def get_closed_models():
29
+ '''
30
+ Returns
31
+ closed_models: Checkbox group containing the closed models
32
+ '''
33
+ primary_leaderboard_df, _, _ = get_github_data()
34
+ temp_df = primary_leaderboard_df[0]
35
+ models = list(temp_df[list(temp_df.columns)[0]].unique())
36
+ _, closed_models = split_models(models)
37
+ return closed_models
38
+
39
+ def get_plot_df():
40
+ '''
41
+ Returns
42
+ plot_df: Dataframe containing the results of latest version for plotting
43
+ '''
44
+ primary_leaderboard_df, _, _ = get_github_data()
45
+ plot_df = primary_leaderboard_df[0]
46
+ return plot_df
47
+
48
+ def get_version_names():
49
+ '''
50
+ Returns
51
+ version_names: List containing the versions of the benchmark results for dropdown selection
52
+ '''
53
+ _, _, version_names = get_github_data()
54
+ return version_names
55
+
56
+ def get_version_df():
57
+ '''
58
+ Returns
59
+ version_dfs: Dataframe containing the benchmark results for all versions
60
+ '''
61
+ _, version_dfs, _ = get_github_data()
62
+ return version_dfs
63
+
64
+ def get_prev_df(name='initial'):
65
+ '''
66
+ Returns
67
+ prev_df: Dataframe containing the benchmark results for the previous versions (default = latest version)
68
+ '''
69
+ _, version_dfs, version_names = get_github_data()
70
+
71
+ if name == 'initial':
72
+ name = version_names[0]
73
+
74
+ ind = version_names.index(name)
75
+ prev_df = version_dfs[ind]
76
+ return prev_df
77
+
78
+
src/reload_utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Isolated functions to reload the leaderboard data and plot the results
2
+
3
+ from src.leaderboard_utils import filter_search, get_github_data
4
+ from src.plot_utils import split_models, compare_plots
5
+
6
+ #COMPONENTS TO RELOAD EVERY TIME
7
+ # leaderboard_table, dummy_leaderboard_table,
8
+ # open_models_selection, closed_models_selection, show_all, show_names, show_legend
9
+ #version_select, prev_table, dummy_prev_table
10
+
11
+ class ReloadData():
12
+ '''
13
+ A class containing methods to reload the leaderboard data and plot the results
14
+ The methods return individual component values directly to use 'every' arg in the component
15
+ '''
16
+
17
+ def __init__(self):
18
+ print("Initializing Reload...........")
19
+ self.primary_leaderboard_df, self.version_dfs, self.version_names = get_github_data()
20
+ self.plot_df = self.primary_leaderboard_df[0]
21
+ self.models = list(self.plot_df[list(self.plot_df.columns)[0]].unique())
22
+ print("Reload completed ....... Here's a reloaded dataframe for latest version")
23
+ print(self.primary_leaderboard_df)
24
+
25
+ def get_primary_leaderboard(self):
26
+ '''
27
+ Returns
28
+ self.primary_leaderboard_df[0]: Dataframe containing the primary leaderboard (laterst version of the benchmark results)
29
+ '''
30
+ return self.primary_leaderboard_df[0]
31
+
32
+ def get_open_models(self):
33
+ '''
34
+ Returns
35
+ open_models: Checkbox group containing the open models
36
+ '''
37
+ self.open_models, _ = split_models(self.models)
38
+ return self.open_models
39
+
40
+ def get_closed_models(self):
41
+ '''
42
+ Returns
43
+ closed_models: Checkbox group containing the closed models
44
+ '''
45
+ _, self.closed_models = split_models(self.models)
46
+ return self.closed_models
47
+
48
+ def get_plot_df(self):
49
+ '''
50
+ Returns
51
+ plot_df: Dataframe containing the results of latest version for plotting
52
+ '''
53
+ return self.plot_df
54
+
55
+ def get_version_names(self):
56
+ '''
57
+ Returns
58
+ version_names: List containing the versions of the benchmark results for dropdown selection
59
+ '''
60
+ return self.version_names
61
+
62
+ def get_version_df(self):
63
+ '''
64
+ Returns
65
+ version_dfs: Dataframe containing the benchmark results for all versions
66
+ '''
67
+ return self.version_dfs
68
+
69
+ def get_prev_df(self, name='initial'):
70
+ '''
71
+ Returns
72
+ prev_df: Dataframe containing the benchmark results for the previous versions (default = latest version)
73
+ '''
74
+ if name == 'initial':
75
+ name = self.version_names[0]
76
+
77
+ ind = self.version_names.index(name)
78
+ self.prev_df = self.version_dfs[ind]
79
+ return self.prev_df
80
+
81
+
82
+