|
import streamlit as st |
|
from my_model.results.demo import ResultDemonstrator |
|
from my_model.config import evaluation_config as config |
|
|
|
def run_demo(): |
|
""" |
|
Run the interactive Streamlit demo for visualizing model evaluation results and analysis. |
|
""" |
|
st.set_page_config(page_title="Model Evaluation Results and Analyses", |
|
layout="wide", |
|
initial_sidebar_state="expanded") |
|
demo = ResultDemonstrator() |
|
col1, col2 = st.columns([1, 4]) |
|
with col1: |
|
|
|
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) |
|
|
|
if section_type == "Evaluation Results & Analysis": |
|
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", |
|
"Prompt Length (token count) Impact on Performance"], index=2) |
|
if analysis_type == "Prompt Length (token count) Impact on Performance": |
|
|
|
model_name = st.radio("Select Model Size", config.MODEL_NAMES) |
|
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) |
|
elif section_type == 'Evaluation Samples': |
|
samples_button = st.button("Generate Random Samples") |
|
with col2: |
|
if section_type == "Evaluation Results & Analysis": |
|
if analysis_type == "Prompt Length (token count) Impact on Performance": |
|
for conf in config.MODEL_CONFIGURATIONS: |
|
with st.expander(conf): |
|
demo.plot_token_count_vs_scores(conf, model_name, score_name) |
|
elif analysis_type == "Main & Ablation Results": |
|
demo.display_main_results() |
|
elif analysis_type == "Results per Question Category": |
|
demo.display_ablation_results_per_question_category() |
|
elif section_type == 'Evaluation Samples': |
|
if samples_button: |
|
demo.show_samples(3) |