Corey Morris
commited on
Commit
•
2db58a0
1
Parent(s):
9695a47
Added explanation for the plot and a dataframe of the models
Browse files
app.py
CHANGED
@@ -259,7 +259,8 @@ else:
|
|
259 |
|
260 |
|
261 |
# Section to select a model and display radar and line charts
|
262 |
-
st.header("Compare
|
|
|
263 |
selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
|
264 |
metrics_to_compare = ['MMLU_abstract_algebra', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_average', 'MMLU_moral_scenarios']
|
265 |
closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
|
@@ -270,6 +271,11 @@ fig_line = create_line_chart(filtered_data, closest_models, metrics_to_compare)
|
|
270 |
st.plotly_chart(fig_radar)
|
271 |
st.plotly_chart(fig_line)
|
272 |
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
|
275 |
# end of custom scatter plots
|
|
|
259 |
|
260 |
|
261 |
# Section to select a model and display radar and line charts
|
262 |
+
st.header("Compare selected models to models the closest 5 models on MMLU average")
|
263 |
+
st.write("This is to demonstrate that while the average score is useful, there is a lot of variation in performance on individual tasks.")
|
264 |
selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
|
265 |
metrics_to_compare = ['MMLU_abstract_algebra', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_average', 'MMLU_moral_scenarios']
|
266 |
closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
|
|
|
271 |
st.plotly_chart(fig_radar)
|
272 |
st.plotly_chart(fig_line)
|
273 |
|
274 |
+
# show MMLU_average at the beginning of the dataframe
|
275 |
+
|
276 |
+
st.dataframe(filtered_data.loc[closest_models, metrics_to_compare])
|
277 |
+
|
278 |
+
|
279 |
|
280 |
|
281 |
# end of custom scatter plots
|