CoreyMorris
commited on
Commit
•
3ebf7a7
1
Parent(s):
443052d
Updated with new results 11-21
Browse files- app.py +2 -2
- processed_data_2023-11-21.csv +0 -0
- results +1 -1
app.py
CHANGED
@@ -112,7 +112,7 @@ def find_top_differences_table(df, target_model, closest_models, num_differences
|
|
112 |
|
113 |
# st.title('Model Evaluation Results including MMLU by task')
|
114 |
st.title('Interactive Portal for Analyzing Open Source Large Language Models')
|
115 |
-
st.markdown("""***Last updated November
|
116 |
st.markdown("""**Models that are suspected to have training data contaminated with evaluation data have been removed.**""")
|
117 |
st.markdown("""
|
118 |
This page provides a way to explore the results for individual tasks and compare models across tasks. Data for the benchmarks hellaswag, arc_challenge, and truthfulQA have also been included for comparison.
|
@@ -121,7 +121,7 @@ st.markdown("""
|
|
121 |
""")
|
122 |
|
123 |
# Load the data into memory
|
124 |
-
data_path = "processed_data_2023-11-
|
125 |
data_df = load_csv_data(data_path)
|
126 |
# drop the column Unnamed: 0
|
127 |
data_df.rename(columns={'Unnamed: 0': "Model Name"}, inplace=True)
|
|
|
112 |
|
113 |
# st.title('Model Evaluation Results including MMLU by task')
|
114 |
st.title('Interactive Portal for Analyzing Open Source Large Language Models')
|
115 |
+
st.markdown("""***Last updated November 21th***""")
|
116 |
st.markdown("""**Models that are suspected to have training data contaminated with evaluation data have been removed.**""")
|
117 |
st.markdown("""
|
118 |
This page provides a way to explore the results for individual tasks and compare models across tasks. Data for the benchmarks hellaswag, arc_challenge, and truthfulQA have also been included for comparison.
|
|
|
121 |
""")
|
122 |
|
123 |
# Load the data into memory
|
124 |
+
data_path = "processed_data_2023-11-21.csv"
|
125 |
data_df = load_csv_data(data_path)
|
126 |
# drop the column Unnamed: 0
|
127 |
data_df.rename(columns={'Unnamed: 0': "Model Name"}, inplace=True)
|
processed_data_2023-11-21.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit ed5d75491130aac5d145db63d458f2eaf410745a
|