KB-VQA-E

Running

App Files Files Community

KB-VQA-E / my_model /tabs /results.py

m7mdal7aj

Update my_model/tabs/results.py

05beea4 verified 6 months ago

raw

history blame

2.25 kB

	import streamlit as st
	from my_model.results.demo import ResultDemonstrator
	from my_model.config import evaluation_config as config

	def run_demo():
	"""
	Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
	"""
	st.set_page_config(page_title="Model Evaluation Results and Analyses",
	layout="wide",
	initial_sidebar_state="expanded")
	demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
	col1, col2 = st.columns([1, 4])
	with col1:
	# User selects the evaluation analysis aspect
	section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
	# Only show analysis type if the section type is "Evaluation Results & Analysis"
	if section_type == "Evaluation Results & Analysis":
	analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
	"Prompt Length (token count) Impact on Performance"], index=2)
	if analysis_type == "Prompt Length (token count) Impact on Performance":
	# Based on the selection, other options appear
	model_name = st.radio("Select Model Size", config.MODEL_NAMES)
	score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
	elif section_type == 'Evaluation Samples':
	samples_button = st.button("Generate Random Samples")
	with col2:
	if section_type == "Evaluation Results & Analysis":
	if analysis_type == "Prompt Length (token count) Impact on Performance":
	for conf in config.MODEL_CONFIGURATIONS:
	with st.expander(conf):
	demo.plot_token_count_vs_scores(conf, model_name, score_name)
	elif analysis_type == "Main & Ablation Results":
	demo.display_main_results()
	elif analysis_type == "Results per Question Category":
	demo.display_ablation_results_per_question_category()
	elif section_type == 'Evaluation Samples':
	if samples_button:
	demo.show_samples(3)