KB-VQA-E

Running

App Files Files Community

m7mdal7aj commited on Feb 24

Commit

a650af8

•

1 Parent(s): 8a2cc2c

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -113

app.py CHANGED Viewed

@@ -10,151 +10,140 @@ from my_model.object_detection import detect_and_draw_objects
 from my_model.captioner.image_captioning import get_caption
 from my_model.utilities import free_gpu_resources
 from my_model.KBVQA import KBVQA, prepare_kbvqa_model
-def answer_question(image, question, caption, detected_objects_str, model):
-    answer = model.generate_answer(question, caption, detected_objects_str)
-    st.image(image)
-    st.write(caption)
-    st.write("----------------")
-    st.write(detected_objects_str)
-    return answer
-def get_caption(image):
-    return "Generated caption for the image"
-def free_gpu_resources():
-    pass
-# Sample images (assuming these are paths to your sample images)
-sample_images = ["Files/sample1.jpg", "Files/sample2.jpg", "Files/sample3.jpg",
-                 "Files/sample4.jpg", "Files/sample5.jpg", "Files/sample6.jpg",
-                 "Files/sample7.jpg"]
-def analyze_image(image, model, show_processed_image=False):
-    img = copy.deepcopy(image)
-    caption = model.get_caption(img)
-    image_with_boxes, detected_objects_str = model.detect_objects(img)
-    if show_processed_image:
-        st.image(image_with_boxes)
-    return caption, detected_objects_str
-def image_qa_app(kbvqa):
-    # Initialize session state for storing the current image and its Q&A history
-    if 'current_image' not in st.session_state:
-        st.session_state['current_image'] = None
-    if 'qa_history' not in st.session_state:
-        st.session_state['qa_history'] = []
-    if 'analysis_done' not in st.session_state:
-        st.session_state['analysis_done'] = False
-    if 'answer_in_progress' not in st.session_state:
-        st.session_state['answer_in_progress'] = False
-    # Display sample images as clickable thumbnails
-    st.write("Choose from sample images:")
-    cols = st.columns(len(sample_images))
-    for idx, sample_image_path in enumerate(sample_images):
-        with cols[idx]:
-            image = Image.open(sample_image_path)
-            st.image(image, use_column_width=True)
-            if st.button(f'Select Sample Image {idx + 1}', key=f'sample_{idx}'):
-                st.session_state['current_image'] = image
-                st.session_state['qa_history'] = []
-                st.session_state['analysis_done'] = False
-                st.session_state['answer_in_progress'] = False
-    # Image uploader
     uploaded_image = st.file_uploader("Or upload an Image", type=["png", "jpg", "jpeg"])
     if uploaded_image is not None:
-        image = Image.open(uploaded_image)
-        st.session_state['current_image'] = image
         st.session_state['qa_history'] = []
         st.session_state['analysis_done'] = False
         st.session_state['answer_in_progress'] = False
     if st.session_state.get('current_image') and not st.session_state.get('analysis_done', False):
         if st.button('Analyze Image'):
-            caption, detected_objects_str = analyze_image(st.session_state['current_image'], kbvqa)
             st.session_state['caption'] = caption
             st.session_state['detected_objects_str'] = detected_objects_str
             st.session_state['analysis_done'] = True
-    # Get Answer button
     if st.session_state.get('analysis_done', False):
         question = st.text_input("Ask a question about this image:")
         if st.button('Get Answer'):
-            answer = answer_question(st.session_state['current_image'], question, st.session_state.get('caption', ''), st.session_state.get('detected_objects_str', ''), kbvqa)
             st.session_state['qa_history'].append((question, answer))
-        # Display all Q&A
         for q, a in st.session_state.get('qa_history', []):
             st.text(f"Q: {q}\nA: {a}\n")
-    # Reset the answer_in_progress flag after displaying the answer
-    if st.session_state['answer_in_progress']:
-        st.session_state['answer_in_progress'] = False
 def run_inference():
     st.title("Run Inference")
     method = st.selectbox(
         "Choose a method:",
         ["Fine-Tuned Model", "In-Context Learning (n-shots)"],
-        index=0  # Default to the first option
     )
-    detection_model = st.selectbox(
-        "Choose a model for object detection:",
-        ["yolov5", "detic"],
-        index=0  # Default to the first option
-    )
-    # Initialize session state for the model
     if method == "Fine-Tuned Model":
-        if 'kbvqa' not in st.session_state:
-            st.session_state['kbvqa'] = None
-        # Button to load KBVQA models
-        if st.button('Load KBVQA Model'):
-            if st.session_state['kbvqa'] is not None:
-                st.write("Model already loaded.")
-            else:
-                # Call the function to load models and show progress
-                st.session_state['kbvqa'] = prepare_kbvqa_model(detection_model)
-            if st.session_state['kbvqa']:
-                st.write("Model is ready for inference.")
-                # Set default confidence based on the selected model
-                default_confidence = 0.2 if detection_model == "yolov5" else 0.4
-                # Slider for confidence level
-                confidence_level = st.slider(
-                    "Select Detection Confidence Level",
-                    min_value=0.1,
-                    max_value=0.9,
-                    value=default_confidence,
-                    step=0.1
-                )
-                st.session_state['kbvqa'].detection_confidence = confidence_level
-        if st.session_state['kbvqa']:
-            image_qa_app(st.session_state['kbvqa'])
-    else:
-        st.write(f'{method} model is not ready for inference yet')
-# Main function
 def main():
     st.sidebar.title("Navigation")
     selection = st.sidebar.radio("Go to", ["Home", "Dataset Analysis", "Evaluation Results", "Run Inference", "Dissertation Report"])
     if selection == "Home":
-        st.title("MultiModal Learning for Knowledg-Based Visual Question Answering")
         st.write("Home page content goes here...")
     elif selection == "Dissertation Report":
         st.title("Dissertation Report")
         st.write("Click the link below to view the PDF.")
@@ -166,22 +155,29 @@ def main():
             mime="application/octet-stream"
         )
     elif selection == "Evaluation Results":
         st.title("Evaluation Results")
         st.write("This is a Place Holder until the contents are uploaded.")
     elif selection == "Dataset Analysis":
         st.title("OK-VQA Dataset Analysis")
         st.write("This is a Place Holder until the contents are uploaded.")
     elif selection == "Run Inference":
         run_inference()
-    elif selection == "Object Detection":
-        run_object_detection()
 if __name__ == "__main__":
-    main()

 from my_model.captioner.image_captioning import get_caption
 from my_model.utilities import free_gpu_resources
 from my_model.KBVQA import KBVQA, prepare_kbvqa_model
+import my_model.utilities.st_config as st_config
+class ImageHandler:
+    @staticmethod
+    def analyze_image(image, model, show_processed_image=False):
+        img = copy.deepcopy(image)
+        caption = model.get_caption(img)
+        image_with_boxes, detected_objects_str = model.detect_objects(img)
+        if show_processed_image:
+            st.image(image_with_boxes)
+        return caption, detected_objects_str
+    @staticmethod
+    def free_gpu_resources():
+        # Implementation for freeing GPU resources
+        free_gpu_resources()
+class QuestionAnswering:
+    @staticmethod
+    def answer_question(image, question, caption, detected_objects_str, model):
+        answer = model.generate_answer(question, caption, detected_objects_str)
+        st.image(image)
+        st.write(caption)
+        st.write("----------------")
+        st.write(detected_objects_str)
+        return answer
+class UIComponents:
+    @staticmethod
+    def display_image_selection(sample_images):
+        cols = st.columns(len(sample_images))
+        for idx, sample_image_path in enumerate(sample_images):
+            with cols[idx]:
+                image = Image.open(sample_image_path)
+                st.image(image, use_column_width=True)
+                if st.button(f'Select Sample Image {idx + 1}', key=f'sample_{idx}'):
+                    st.session_state['current_image'] = image
+                    st.session_state['qa_history'] = []
+                    st.session_state['analysis_done'] = False
+                    st.session_state['answer_in_progress'] = False
+def load_kbvqa_model(detection_model):
+    """Load KBVQA Model based on the selected detection model."""
+    if st.session_state.get('kbvqa') is not None:
+        st.write("Model already loaded.")
+    else:
+        st.session_state['kbvqa'] = prepare_kbvqa_model(detection_model)
+        if st.session_state['kbvqa']:
+            st.write("Model is ready for inference.")
+            return True
+    return False
+def set_model_confidence(detection_model):
+    """Set the confidence level for the detection model."""
+    default_confidence = 0.2 if detection_model == "yolov5" else 0.4
+    confidence_level = st.slider(
+        "Select Detection Confidence Level",
+        min_value=0.1,
+        max_value=0.9,
+        value=default_confidence,
+        step=0.1
+    )
+    st.session_state['kbvqa'].detection_confidence = confidence_level
+def image_qa_app(kbvqa_model):
+    """Streamlit app interface for image QA."""
+    sample_images = st_config.SAMPLE_IMAGES
+    UIComponents.display_image_selection(sample_images)
     uploaded_image = st.file_uploader("Or upload an Image", type=["png", "jpg", "jpeg"])
     if uploaded_image is not None:
+        st.session_state['current_image'] = Image.open(uploaded_image)
         st.session_state['qa_history'] = []
         st.session_state['analysis_done'] = False
         st.session_state['answer_in_progress'] = False
     if st.session_state.get('current_image') and not st.session_state.get('analysis_done', False):
         if st.button('Analyze Image'):
+            caption, detected_objects_str = ImageHandler.analyze_image(st.session_state['current_image'], kbvqa_model)
             st.session_state['caption'] = caption
             st.session_state['detected_objects_str'] = detected_objects_str
             st.session_state['analysis_done'] = True
     if st.session_state.get('analysis_done', False):
         question = st.text_input("Ask a question about this image:")
         if st.button('Get Answer'):
+            answer = QuestionAnswering.answer_question(
+                st.session_state['current_image'],
+                question,
+                st.session_state.get('caption', ''),
+                st.session_state.get('detected_objects_str', ''),
+                kbvqa_model
+            )
             st.session_state['qa_history'].append((question, answer))
         for q, a in st.session_state.get('qa_history', []):
             st.text(f"Q: {q}\nA: {a}\n")
 def run_inference():
+    """Main function to run inference based on the selected method."""
     st.title("Run Inference")
     method = st.selectbox(
         "Choose a method:",
         ["Fine-Tuned Model", "In-Context Learning (n-shots)"],
+        index=0
     )
     if method == "Fine-Tuned Model":
+        detection_model = st.selectbox(
+            "Choose a model for object detection:",
+            ["yolov5", "detic"],
+            index=0
+        )
+        if 'kbvqa' not in st.session_state or st.session_state['detection_model'] != detection_model:
+            st.session_state['detection_model'] = detection_model
+            if load_kbvqa_model(detection_model):
+                set_model_confidence(detection_model)
+                image_qa_app(st.session_state['kbvqa'])
 def main():
     st.sidebar.title("Navigation")
     selection = st.sidebar.radio("Go to", ["Home", "Dataset Analysis", "Evaluation Results", "Run Inference", "Dissertation Report"])
     if selection == "Home":
+        st.title("MultiModal Learning for Knowledge-Based Visual Question Answering")
         st.write("Home page content goes here...")
     elif selection == "Dissertation Report":
         st.title("Dissertation Report")
         st.write("Click the link below to view the PDF.")
             mime="application/octet-stream"
         )
     elif selection == "Evaluation Results":
         st.title("Evaluation Results")
         st.write("This is a Place Holder until the contents are uploaded.")
     elif selection == "Dataset Analysis":
         st.title("OK-VQA Dataset Analysis")
         st.write("This is a Place Holder until the contents are uploaded.")
     elif selection == "Run Inference":
         run_inference()
 if __name__ == "__main__":
+    main()