Spaces:

hudsonhayes
/

Mutiple-URL-Doc-Chemical-identifier

Sleeping

App Files Files Community

Karthikeyan commited on Jul 20, 2023

Commit

e8c317c

•

1 Parent(s): 077dd97

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -128

app.py CHANGED Viewed

@@ -18,7 +18,12 @@ import tempfile
 import pandas as pd
 import re
 class ChemicalIdentifier:
     def __init__(self):
         openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -30,17 +35,19 @@ class ChemicalIdentifier:
         console_handler.setFormatter(formatter)
         self.logger.addHandler(console_handler)
-    def upload_via_url(self,url:str)->List:
         """
         Uploads a file from a given URL and returns the loaded document.
         Args:
             url (str): The URL of the file to be uploaded.
         Returns:
             Document: The loaded document.
         Raises:
             ValueError: If the URL is not valid or the file cannot be fetched.
         """
@@ -69,22 +76,19 @@ class ChemicalIdentifier:
              raise ValueError("Error occurred while uploading the file") from e
-    def find_chemicals(self,text:str)->str:
         """
         Extracts chemical names from the given text.
         Args:
             text (str): The text to extract chemical names from.
         Returns:
             str: The extracted chemical names in bullet form.
         Raises:
             ValueError: If an error occurs during the extraction process.
         """
         try:
-          prompt = f"List out only all the Chemicals Names in the give text in bullet form.{text}"
           response = openai.Completion.create(
               model="text-davinci-003",
               prompt=prompt,
@@ -104,7 +108,7 @@ class ChemicalIdentifier:
             raise ValueError("Error occurred while finding chemicals") from e
-    def get_chemicals(self,urls:str)->str:
         """
         Retrieves chemicals from the provided URLs.
@@ -121,9 +125,9 @@ class ChemicalIdentifier:
         try:
           total_chemical=[]
           for url in urls.split(','):
-            webpage_text = self.upload_via_url(url)
-            chemicals = self.find_chemicals(webpage_text)
-            total_chemical.append(chemicals)
           list_of_chemicals = "".join(total_chemical)
           return list_of_chemicals
@@ -131,12 +135,6 @@ class ChemicalIdentifier:
             self.logger.error("Error occurred while getting chemicals from URLs: %s", str(e))
             raise ValueError("Error occurred while getting chemicals from URLs") from e
-    def get_empty_state(self):
-        """ Create empty Knowledge base"""
-        return {"knowledge_base": None}
     def create_knowledge_base(self,docs):
         """Create a knowledge base from the given documents.
@@ -165,44 +163,11 @@ class ChemicalIdentifier:
         # Return the resulting knowledge base
         return knowledge_base
-    def upload_file(self,file_paths):
-        """Upload a file and create a knowledge base from its contents.
-        Args:
-            file_paths : The files to uploaded.
-        Returns:
-            tuple: A tuple containing the file name and the knowledge base.
-        """
-        file_paths = [single_file_path.name for single_file_path in file_paths]
-        loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
-        # Load the contents of the file using the loader
-        docs = []
-        for loader in loaders:
-            docs.extend(loader.load())
-        # Create a knowledge base from the loaded documents using the create_knowledge_base() method
-        knowledge_base = self.create_knowledge_base(docs)
-        # Return a tuple containing the file name and the knowledge base
-        return file_paths, {"knowledge_base": knowledge_base}
-    def answer_question(self,urls, state):
-        """Answer a question based on the current knowledge base.
-        Args:
-            state (dict): The current state containing the knowledge base.
-        Returns:
-            str: The answer to the question.
-        """
-        result = self.get_chemicals(urls)
-        # Retrieve the knowledge base from the state dictionary
-        knowledge_base = state["knowledge_base"]
         # Set the question for which we want to find the answer
         question = "Identify the Chemical Capabilities Only"
@@ -229,90 +194,95 @@ class ChemicalIdentifier:
         # Run the question-answering chain on the input documents and question
         response = chain.run(input_documents=docs, question=question)
-        Answer = response+"\n"+result
         # Return the response as the answer to the question
-        return Answer
-    def extract_excel_data(self,file_path):
-        # Read the Excel file
-        df = pd.read_excel(file_path)
-        # Flatten the data to a single list
-        data_list = []
-        for _, row in df.iterrows():
-            data_list.extend(row.tolist())
-        return data_list
-    def comparing_chemicals(self,urls,state):
-        chemicals = self.answer_question(urls,state)
-        excel_file_path = "Capability.xlsx"
-        chemistry_capability = self.extract_excel_data(excel_file_path)
-        response = openai.Completion.create(
-        engine="text-davinci-003",
-        prompt= f"""Analyse the following text delimited by triple backticks to return the comman chemicals.
-                  text : ```{chemicals}  {chemistry_capability}```.
-                  result should be in bullet points format.
-                 """,
-        max_tokens=300,
-        n=1,
-        stop=None,
-        temperature=0,
-        top_p=1.0,
-        frequency_penalty=0.0,
-        presence_penalty=0.0
-        )
-        result = response.choices[0].text.strip()
-        return result
     def gradio_interface(self)->None:
         """
         Starts the Gradio interface for chemical identification.
         """
-        try:
-          with gr.Blocks(css="style.css",theme='karthikeyan-adople/hudsonhayes-dark1') as demo:
-            gr.HTML("""<center><img src="https://hudsonandhayes.co.uk/wp-content/uploads/2023/01/Group-479.svg" height="110px" width="280px"></center>""")
-            state = gr.State(self.get_empty_state())
-            gr.HTML("""<center><h1 style="color:#fff">Chemical Identifier</h1></center>""")
-            with gr.Column(elem_id="col-container"):
-              with gr.Row(elem_id="row-flex"):
-                  url = gr.Textbox(label="URL")
-              with gr.Row(elem_id="row-flex"):
-                with gr.Accordion("Upload Files", open = False):
-                  with gr.Row():
-                    with gr.Column(scale=0.90, min_width=160):
-                        file_output = gr.File()
-                    with gr.Column(scale=0.10, min_width=160):
-                        upload_button = gr.UploadButton(
-                            "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
-                            file_count = "multiple",variant="primary")
-                        load_pdf = gr.Button("Load PDF")
-                        status = gr.Textbox(label="Status", placeholder="", interactive=False)
-              with gr.Row():
-                with gr.Column(scale=1, min_width=0):
-                  compare_btn = gr.Button(value="Analyse",variant="primary")
-              with gr.Row():
-                with gr.Column(scale=1, min_width=0):
-                  compared_result = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=10)
-            upload_button.upload(self.upload_file, upload_button, [file_output,state])
-            compare_btn.click(self.comparing_chemicals,[url,state],compared_result)
-          demo.launch()
-        except Exception as e:
-            self.logger.error("Error occurred while launching Gradio interface: %s", str(e))
-            raise ValueError("Error occurred while launching Gradio interface") from e
 if __name__ == "__main__":
   logging.basicConfig(level=logging.DEBUG)
   chemical_identifier = ChemicalIdentifier()
   chemical_identifier.gradio_interface()

 import pandas as pd
 import re
+# Create and Declare Global Varibale "result"
+results = ''
 class ChemicalIdentifier:
     def __init__(self):
         openai.api_key = os.getenv("OPENAI_API_KEY")
         console_handler.setFormatter(formatter)
         self.logger.addHandler(console_handler)
+    def get_empty_state(self):
+        """ Create empty Knowledge base"""
+        return {"knowledge_base": None}
+    def get_content_from_url(self,url:str)->List:
         """
         Uploads a file from a given URL and returns the loaded document.
         Args:
             url (str): The URL of the file to be uploaded.
         Returns:
             Document: The loaded document.
         Raises:
             ValueError: If the URL is not valid or the file cannot be fetched.
         """
              raise ValueError("Error occurred while uploading the file") from e
+    def extract_chemical_names(self,text:str)->str:
         """
         Extracts chemical names from the given text.
         Args:
             text (str): The text to extract chemical names from.
         Returns:
             str: The extracted chemical names in bullet form.
         Raises:
             ValueError: If an error occurs during the extraction process.
         """
         try:
+          prompt = f"Identify the Chemical Names Only give text in bullet form {text}. Don't Generate any extra chemicals apart from given text"
           response = openai.Completion.create(
               model="text-davinci-003",
               prompt=prompt,
             raise ValueError("Error occurred while finding chemicals") from e
+    def get_chemicals_for_url(self,urls:str)->str:
         """
         Retrieves chemicals from the provided URLs.
         try:
           total_chemical=[]
           for url in urls.split(','):
+            webpage_text = self.get_content_from_url(url)
+            chemicals = self.extract_chemical_names(webpage_text)
+            total_chemical.append(str(url)+"\n"+chemicals+"\n\n")
           list_of_chemicals = "".join(total_chemical)
           return list_of_chemicals
             self.logger.error("Error occurred while getting chemicals from URLs: %s", str(e))
             raise ValueError("Error occurred while getting chemicals from URLs") from e
     def create_knowledge_base(self,docs):
         """Create a knowledge base from the given documents.
         # Return the resulting knowledge base
         return knowledge_base
+    def file_path_show(self,file_paths):
+      file_paths = [single_file_path.name for single_file_path in file_paths]
+      return file_paths
+    def get_chemicals_for_file(self,state,knowledge_base):
         # Set the question for which we want to find the answer
         question = "Identify the Chemical Capabilities Only"
         # Run the question-answering chain on the input documents and question
         response = chain.run(input_documents=docs, question=question)
         # Return the response as the answer to the question
+        return response
+    def identify_chemicals_in_files(self,file_paths,state):
+        """Upload a file and create a knowledge base from its contents.
+        Args:
+            file_paths : The files to uploaded.
+        Returns:
+            tuple: A tuple containing the file name and the knowledge base.
+        """
+        file_paths = [single_file_path.name for single_file_path in file_paths]
+        for file_obj in file_paths:
+           loader = UnstructuredFileLoader(file_obj, strategy="fast")
+           # Load the contents of the file using the loader
+           docs =loader.load()
+           # Create a knowledge base from the loaded documents using the create_knowledge_base() method
+           knowledge_base = self.create_knowledge_base(docs)
+           pdf_name = os.path.basename(file_obj)
+           global results
+           final_ans = self.get_chemicals_for_file(state,knowledge_base)
+           results += pdf_name+"\n"+final_ans+"\n\n"
+        # Return a tuple containing the file name and the knowledge base
+        return results
+    def get_final_result(self,urls,file_paths,state):
+        if urls:
+          if file_paths:
+            urls_chemicals = self.get_chemicals_for_url(urls)
+            file_chemicals = self.identify_chemicals_in_files(file_paths,state)
+            chemicals = urls_chemicals + file_chemicals
+            return chemicals
+          else:
+            urls_chemicals = self.get_chemicals_for_url(urls)
+            return urls_chemicals
+        elif file_paths:
+            file_chemicals = self.identify_chemicals_in_files(file_paths,state)
+            return file_chemicals
+        else:
+          return "No Files Uploaded"
     def gradio_interface(self)->None:
         """
         Starts the Gradio interface for chemical identification.
         """
+        with gr.Blocks(css="style.css",theme='karthikeyan-adople/hudsonhayes-dark1') as demo:
+          gr.HTML("""<center><img src="https://hudsonandhayes.co.uk/wp-content/uploads/2023/01/Group-479.svg" height="110px" width="280px"></center>""")
+          state = gr.State(self.get_empty_state())
+          gr.HTML("""<center><h1 style="color:#fff">Chemical Identifier</h1></center>""")
+          with gr.Column(elem_id="col-container"):
+            with gr.Row(elem_id="row-flex"):
+                url = gr.Textbox(label="URL")
+            with gr.Row(elem_id="row-flex"):
+              with gr.Accordion("Upload Files", open = False):
+                with gr.Row():
+                  with gr.Column(scale=0.90, min_width=160):
+                      file_output = gr.File()
+                  with gr.Column(scale=0.10, min_width=160):
+                      upload_button = gr.UploadButton(
+                          "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
+                          file_count = "multiple",variant="primary")
+            with gr.Row():
+              with gr.Column(scale=1, min_width=0):
+                compare_btn = gr.Button(value="Analyse",variant="primary")
+            with gr.Row():
+              with gr.Column(scale=1, min_width=0):
+                compared_result = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=10)
+            upload_button.upload(self.file_path_show, upload_button, [file_output])
+            compare_btn.click(self.get_final_result,[url,upload_button,state],compared_result)
+        demo.launch()
 if __name__ == "__main__":
   logging.basicConfig(level=logging.DEBUG)
   chemical_identifier = ChemicalIdentifier()
   chemical_identifier.gradio_interface()