Corey Morris
commited on
Commit
•
dc21a69
1
Parent(s):
a125eb8
Table now displays the columns that have the top differences
Browse files
app.py
CHANGED
@@ -86,6 +86,19 @@ def create_line_chart(df, model_names, metrics):
|
|
86 |
fig.update_layout(showlegend=True)
|
87 |
return fig
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
|
91 |
data_provider = ResultDataProcessor()
|
@@ -257,32 +270,20 @@ if selected_x_column != selected_y_column: # Avoid creating a plot with the s
|
|
257 |
else:
|
258 |
st.write("Please select different columns for the x and y axes.")
|
259 |
|
260 |
-
|
261 |
# Section to select a model and display radar and line charts
|
262 |
st.header("Compare selected models to models the closest 5 models on MMLU average")
|
263 |
st.write("This is to demonstrate that while the average score is useful, there is a lot of variation in performance on individual tasks.")
|
264 |
selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
|
265 |
-
metrics_to_compare = ['MMLU_abstract_algebra', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_average', 'MMLU_moral_scenarios']
|
266 |
-
closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
|
267 |
-
|
268 |
-
st.dataframe(filtered_data.loc[closest_models, metrics_to_compare])
|
269 |
|
270 |
-
#
|
271 |
-
|
272 |
-
# Calculate the absolute differences for each task between the target model and the closest models
|
273 |
-
differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
|
274 |
-
# Unstack the differences and sort by the largest absolute difference
|
275 |
-
top_differences = differences.unstack().nlargest(num_differences)
|
276 |
-
# Convert the top differences to a DataFrame for display
|
277 |
-
top_differences_table = pd.DataFrame({
|
278 |
-
'Task': [idx[0] for idx in top_differences.index],
|
279 |
-
'Difference': top_differences.values
|
280 |
-
})
|
281 |
-
return top_differences_table, top_differences_table['Task'].tolist()
|
282 |
|
283 |
# Find the top 10 tasks with the largest differences and convert to a DataFrame
|
284 |
top_differences_table, top_differences_tasks = find_top_differences_table(filtered_data, selected_model_name, closest_models)
|
285 |
|
|
|
|
|
|
|
286 |
# Display the table in the Streamlit app
|
287 |
st.markdown("## Top Differences")
|
288 |
st.dataframe(top_differences_table)
|
|
|
86 |
fig.update_layout(showlegend=True)
|
87 |
return fig
|
88 |
|
89 |
+
def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
|
90 |
+
# Calculate the absolute differences for each task between the target model and the closest models
|
91 |
+
differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
|
92 |
+
# Unstack the differences and sort by the largest absolute difference
|
93 |
+
top_differences = differences.unstack().nlargest(num_differences)
|
94 |
+
# Convert the top differences to a DataFrame for display
|
95 |
+
top_differences_table = pd.DataFrame({
|
96 |
+
'Task': [idx[0] for idx in top_differences.index],
|
97 |
+
'Difference': top_differences.values
|
98 |
+
})
|
99 |
+
# Ensure that only unique tasks are returned
|
100 |
+
unique_top_differences_tasks = list(set(top_differences_table['Task'].tolist()))
|
101 |
+
return top_differences_table, unique_top_differences_tasks
|
102 |
|
103 |
|
104 |
data_provider = ResultDataProcessor()
|
|
|
270 |
else:
|
271 |
st.write("Please select different columns for the x and y axes.")
|
272 |
|
|
|
273 |
# Section to select a model and display radar and line charts
|
274 |
st.header("Compare selected models to models the closest 5 models on MMLU average")
|
275 |
st.write("This is to demonstrate that while the average score is useful, there is a lot of variation in performance on individual tasks.")
|
276 |
selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
|
|
|
|
|
|
|
|
|
277 |
|
278 |
+
# Get the closest 5 models to the selected model based on MMLU average
|
279 |
+
closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
# Find the top 10 tasks with the largest differences and convert to a DataFrame
|
282 |
top_differences_table, top_differences_tasks = find_top_differences_table(filtered_data, selected_model_name, closest_models)
|
283 |
|
284 |
+
# Display the DataFrame for the closest models and the top differences tasks
|
285 |
+
st.dataframe(filtered_data.loc[closest_models, top_differences_tasks])
|
286 |
+
|
287 |
# Display the table in the Streamlit app
|
288 |
st.markdown("## Top Differences")
|
289 |
st.dataframe(top_differences_table)
|