KB-VQA-E

Running

App Files Files Community

m7mdal7aj commited on May 13

Commit

020595f

•

1 Parent(s): 589a546

Update my_model/dataset/dataset_processor.py

Browse files

Files changed (1) hide show

my_model/dataset/dataset_processor.py +8 -1

my_model/dataset/dataset_processor.py CHANGED Viewed

@@ -35,6 +35,7 @@ class OKVQADatasetProcessor:
         self.df_answers = pd.DataFrame(self.annotations)
         self.merged_df = None
     def load_data_files(self) -> Tuple[List[dict], List[dict]]:
         """
         Loads the question and annotation data from JSON files.
@@ -52,6 +53,7 @@ class OKVQADatasetProcessor:
         return questions, annotations
     @staticmethod
     def find_most_frequent(my_list: List[str]) -> Optional[str]:
         """
@@ -69,6 +71,7 @@ class OKVQADatasetProcessor:
         most_common = counter.most_common(1)
         return most_common[0][0]
     def merge_data(self) -> None:
         """
         Merges the question and answer DataFrames on a common key.
@@ -81,9 +84,10 @@ class OKVQADatasetProcessor:
         self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
     def join_words_with_hyphen(self, sentence):
         return '-'.join(sentence.split())
     def process_answers(self) -> None:
         """
         Processes answers from merged DataFrame by extracting and identifying the most frequent answers.
@@ -103,6 +107,7 @@ class OKVQADatasetProcessor:
         self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
             self.join_words_with_hyphen)
     def get_processed_data(self) -> Optional[pd.DataFrame]:
         """
         Retrieves the processed DataFrame.
@@ -117,6 +122,7 @@ class OKVQADatasetProcessor:
             print("DataFrame is empty or not processed yet.")
             return None
     def save_to_csv(self, df: pd.DataFrame, saved_file_name: Optional[str]) -> None:
         """
         Saves the DataFrame to a CSV file.
@@ -134,6 +140,7 @@ class OKVQADatasetProcessor:
         else:
             df.to_csv("data.csv", index=None)
     def display_dataframe(self) -> None:
         """
         Displays the processed DataFrame.

         self.df_answers = pd.DataFrame(self.annotations)
         self.merged_df = None
     def load_data_files(self) -> Tuple[List[dict], List[dict]]:
         """
         Loads the question and annotation data from JSON files.
         return questions, annotations
     @staticmethod
     def find_most_frequent(my_list: List[str]) -> Optional[str]:
         """
         most_common = counter.most_common(1)
         return most_common[0][0]
     def merge_data(self) -> None:
         """
         Merges the question and answer DataFrames on a common key.
         self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
     def join_words_with_hyphen(self, sentence):
         return '-'.join(sentence.split())
     def process_answers(self) -> None:
         """
         Processes answers from merged DataFrame by extracting and identifying the most frequent answers.
         self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
             self.join_words_with_hyphen)
     def get_processed_data(self) -> Optional[pd.DataFrame]:
         """
         Retrieves the processed DataFrame.
             print("DataFrame is empty or not processed yet.")
             return None
     def save_to_csv(self, df: pd.DataFrame, saved_file_name: Optional[str]) -> None:
         """
         Saves the DataFrame to a CSV file.
         else:
             df.to_csv("data.csv", index=None)
     def display_dataframe(self) -> None:
         """
         Displays the processed DataFrame.