File size: 2,246 Bytes
dab7e6b
d6824cb
 
fb754b1
d6824cb
fb754b1
d6824cb
fb754b1
d6824cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05beea4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import streamlit as st
from my_model.results.demo import ResultDemonstrator
from my_model.config import evaluation_config as config 

def run_demo():
    """
    Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
    """
    st.set_page_config(page_title="Model Evaluation Results and Analyses",
                       layout="wide",
                       initial_sidebar_state="expanded")
    demo = ResultDemonstrator()  # Instantiate the ResultDemonstrator class
    col1, col2 = st.columns([1, 4])
    with col1:
        # User selects the evaluation analysis aspect
        section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
        # Only show analysis type if the section type is "Evaluation Results & Analysis"
        if section_type == "Evaluation Results & Analysis":
            analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
                                                     "Prompt Length (token count) Impact on Performance"], index=2)
            if analysis_type == "Prompt Length (token count) Impact on Performance":
                # Based on the selection, other options appear
                model_name = st.radio("Select Model Size", config.MODEL_NAMES)
                score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
        elif section_type == 'Evaluation Samples':
            samples_button = st.button("Generate Random Samples")
    with col2:
        if section_type == "Evaluation Results & Analysis":
            if analysis_type == "Prompt Length (token count) Impact on Performance":
                for conf in config.MODEL_CONFIGURATIONS:
                    with st.expander(conf):
                        demo.plot_token_count_vs_scores(conf, model_name, score_name)
            elif analysis_type == "Main & Ablation Results":
                demo.display_main_results()
            elif analysis_type == "Results per Question Category":
                demo.display_ablation_results_per_question_category()
        elif section_type == 'Evaluation Samples':
            if samples_button:
                demo.show_samples(3)