Update my_model/tabs/dataset_analysis.py
Browse files
my_model/tabs/dataset_analysis.py
CHANGED
@@ -246,33 +246,37 @@ class OKVQADatasetAnalyzer:
|
|
246 |
|
247 |
|
248 |
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
|
252 |
okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
|
253 |
-
|
254 |
-
val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH,
|
255 |
-
save_to_csv=False)
|
256 |
-
train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH ,
|
257 |
-
save_to_csv=False)
|
258 |
|
|
|
|
|
|
|
259 |
|
|
|
|
|
260 |
|
261 |
-
|
262 |
-
config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
|
263 |
-
|
264 |
with st.container():
|
265 |
st.markdown("## Overview of KB-VQA Datasets")
|
266 |
col1, col2 = st.columns([2, 1])
|
267 |
with col1:
|
268 |
st.write(" ")
|
269 |
with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
|
270 |
-
st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
and the KB.\n""")
|
276 |
with st.expander("2 - Factual VQA (FVQA)"):
|
277 |
st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
|
278 |
images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
|
@@ -296,6 +300,8 @@ def run_dataset_analyzer():
|
|
296 |
st.markdown("#### KB-VQA Datasets Comparison")
|
297 |
st.write(datasets_comparison_table, use_column_width=True)
|
298 |
st.write("-----------------------")
|
|
|
|
|
299 |
with st.container():
|
300 |
st.write("\n" * 10)
|
301 |
st.markdown("## OK-VQA Dataset")
|
@@ -307,16 +313,14 @@ def run_dataset_analyzer():
|
|
307 |
with st.expander("Questions Distribution over Knowledge Category"):
|
308 |
df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
|
309 |
st.markdown("#### Questions Distribution over Knowledge Category")
|
310 |
-
dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over "
|
311 |
-
"Knowledge Category")
|
312 |
|
313 |
with st.expander("Distribution of Question Keywords"):
|
314 |
-
|
315 |
-
#with st.expander("Distribution of Question Keywords"):
|
316 |
dataset_analyzer.categorize_questions()
|
317 |
st.markdown("#### Distribution of Question Keywords")
|
318 |
dataset_analyzer.plot_question_distribution()
|
319 |
|
|
|
320 |
with st.container():
|
321 |
with st.expander("Show Dataset Samples"):
|
322 |
st.write(train_data[:10])
|
|
|
246 |
|
247 |
|
248 |
|
249 |
+
|
250 |
+
def run_dataset_analyzer() -> None:
|
251 |
+
"""
|
252 |
+
Executes the dataset analysis process and displays the results using Streamlit.
|
253 |
+
This function provides an overview of the dataset, it utilizes the OKVQADatasetAnalyzer to visualize
|
254 |
+
the data.
|
255 |
+
"""
|
256 |
+
|
257 |
+
# Load datasets from Excel
|
258 |
datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
|
259 |
okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
+
# Process OK-VQA datasets for validation and training
|
262 |
+
val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH, save_to_csv=False)
|
263 |
+
train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH, save_to_csv=False)
|
264 |
|
265 |
+
# Initialize the dataset analyzer
|
266 |
+
dataset_analyzer = OKVQADatasetAnalyzer(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
|
267 |
|
268 |
+
# Display KB-VQA datasets overview
|
|
|
|
|
269 |
with st.container():
|
270 |
st.markdown("## Overview of KB-VQA Datasets")
|
271 |
col1, col2 = st.columns([2, 1])
|
272 |
with col1:
|
273 |
st.write(" ")
|
274 |
with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
|
275 |
+
st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest datasets in this domain, KB-VQA
|
276 |
+
comprises 700 images and 2,402 questions, with each question associated with both an image
|
277 |
+
and a knowledge base (KB). The KB encapsulates facts about the world, including object
|
278 |
+
names, properties, and relationships, aiming to foster models capable of answering
|
279 |
+
questions through reasoning over both the image and the KB.\n""")
|
|
|
280 |
with st.expander("2 - Factual VQA (FVQA)"):
|
281 |
st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
|
282 |
images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
|
|
|
300 |
st.markdown("#### KB-VQA Datasets Comparison")
|
301 |
st.write(datasets_comparison_table, use_column_width=True)
|
302 |
st.write("-----------------------")
|
303 |
+
|
304 |
+
# Display OK-VQA dataset details
|
305 |
with st.container():
|
306 |
st.write("\n" * 10)
|
307 |
st.markdown("## OK-VQA Dataset")
|
|
|
313 |
with st.expander("Questions Distribution over Knowledge Category"):
|
314 |
df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
|
315 |
st.markdown("#### Questions Distribution over Knowledge Category")
|
316 |
+
dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over Knowledge Category")
|
|
|
317 |
|
318 |
with st.expander("Distribution of Question Keywords"):
|
|
|
|
|
319 |
dataset_analyzer.categorize_questions()
|
320 |
st.markdown("#### Distribution of Question Keywords")
|
321 |
dataset_analyzer.plot_question_distribution()
|
322 |
|
323 |
+
# Display sample data
|
324 |
with st.container():
|
325 |
with st.expander("Show Dataset Samples"):
|
326 |
st.write(train_data[:10])
|