KB-VQA-E / my_model /tabs /results.py
m7mdal7aj's picture
Update my_model/tabs/results.py
05beea4 verified
raw
history blame
2.25 kB
import streamlit as st
from my_model.results.demo import ResultDemonstrator
from my_model.config import evaluation_config as config
def run_demo():
"""
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
"""
st.set_page_config(page_title="Model Evaluation Results and Analyses",
layout="wide",
initial_sidebar_state="expanded")
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
col1, col2 = st.columns([1, 4])
with col1:
# User selects the evaluation analysis aspect
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
# Only show analysis type if the section type is "Evaluation Results & Analysis"
if section_type == "Evaluation Results & Analysis":
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
"Prompt Length (token count) Impact on Performance"], index=2)
if analysis_type == "Prompt Length (token count) Impact on Performance":
# Based on the selection, other options appear
model_name = st.radio("Select Model Size", config.MODEL_NAMES)
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
elif section_type == 'Evaluation Samples':
samples_button = st.button("Generate Random Samples")
with col2:
if section_type == "Evaluation Results & Analysis":
if analysis_type == "Prompt Length (token count) Impact on Performance":
for conf in config.MODEL_CONFIGURATIONS:
with st.expander(conf):
demo.plot_token_count_vs_scores(conf, model_name, score_name)
elif analysis_type == "Main & Ablation Results":
demo.display_main_results()
elif analysis_type == "Results per Question Category":
demo.display_ablation_results_per_question_category()
elif section_type == 'Evaluation Samples':
if samples_button:
demo.show_samples(3)