Spaces:

jhonparra18
/

ocr-LLM-image-summarizer

Runtime error

App Files Files Community

jhonparra18 commited on Aug 26, 2023

Commit

9a299ce

•

1 Parent(s): 78d87d7

updated app logic

Browse files

Files changed (4) hide show

app.py +20 -11
app_utils.py +6 -0
image_processor.py +7 -2
text_summarizer.py +14 -14

app.py CHANGED Viewed

@@ -6,10 +6,15 @@ from app_utils import TEMP_DIR_NAME,save_uploaded_file,reset_chat
 import os
 import sys
 from text_summarizer import agent
 BOT_DEFAULT_MSG="Hello 👋 I'm a test AI assistant to help you with your questions about an input file, or feel free to ask me anything"
-st.set_page_config(page_title="Invoice|Receipt LLM Summarizer",layout='wide',page_icon=":shark:")
 IMAGE_TMP_PATH=None
 with st.sidebar:
@@ -24,16 +29,23 @@ with st.sidebar:
         IMAGE_TMP_PATH=os.path.join(TEMP_DIR_NAME,input_image.name)
         st.markdown(f"<h1 style='text-align: center;'> Image Uploaded and saved<br>",unsafe_allow_html=True)
         st.image(Image.open(IMAGE_TMP_PATH))
-    st.markdown("***")
     st.button("Reset Chat History", type="secondary", on_click=reset_chat,use_container_width=True)
-    st.markdown("[![Foo](https://img.icons8.com/material-outlined/96/000000/github.png)](https://github.com/statscol/invoice-llm-summarizer)")
 # Initialize chat history based on streamlit doc for chat applications https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
 if "messages" not in st.session_state:
     st.session_state.messages = []
 # Display chat messages from history on app rerun
@@ -41,21 +53,18 @@ for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
-# Set default message on chat
-with st.chat_message("assistant"):
-    st.write(BOT_DEFAULT_MSG)
 if prompt := st.chat_input("Write a message to the AI assistant | Escribe un mensaje para el asistente de IA"):
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    prompt_ad=f'{prompt}, image path: {IMAGE_TMP_PATH}' if input_image is not None else prompt
     ##streamlit callback https://python.langchain.com/docs/integrations/callbacks/streamlit
     st_callback = StreamlitCallbackHandler(st.container())
     #hotfix to errors
     try:
-        response = response = agent.run(prompt_ad,callbacks=[st_callback])
     except ValueError as e:
         response = "Sorry i could't understand your last question."
     with st.chat_message("assistant"):

 import os
 import sys
 from text_summarizer import agent
+from image_processor import ImageProcessor
 BOT_DEFAULT_MSG="Hello 👋 I'm a test AI assistant to help you with your questions about an input file, or feel free to ask me anything"
+st.set_page_config(page_title="Invoice | Receipt LLM Summarizer",layout='wide',page_icon=":shark:")
+#placeholders for temporal image path and an image processor in case we want to read img text separately
 IMAGE_TMP_PATH=None
+PROCESSOR=ImageProcessor()
+img_text=""
 with st.sidebar:
         IMAGE_TMP_PATH=os.path.join(TEMP_DIR_NAME,input_image.name)
         st.markdown(f"<h1 style='text-align: center;'> Image Uploaded and saved<br>",unsafe_allow_html=True)
         st.image(Image.open(IMAGE_TMP_PATH))
+        st.markdown("***")
+        inject_text=st.checkbox(label="Inject OCR output",value=False,help="Injects text found in the image without using the agent Action (Speeds response)")
+        if inject_text:
+            img_text=PROCESSOR.run(IMAGE_TMP_PATH)
+    st.markdown("***")
     st.button("Reset Chat History", type="secondary", on_click=reset_chat,use_container_width=True)
+    _,col_c,_=st.columns(3)
+    with col_c:
+        st.markdown("[![Foo](https://img.icons8.com/material-outlined/96/000000/github.png)](https://github.com/statscol/invoice-llm-summarizer)")
 # Initialize chat history based on streamlit doc for chat applications https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
 if "messages" not in st.session_state:
     st.session_state.messages = []
+    st.session_state.messages.append({"role": "assistant", "content": BOT_DEFAULT_MSG})
 # Display chat messages from history on app rerun
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 if prompt := st.chat_input("Write a message to the AI assistant | Escribe un mensaje para el asistente de IA"):
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    prompt_ad=f'{prompt}, img path: {IMAGE_TMP_PATH}' if (input_image is not None and not inject_text) else f'{prompt} text: {img_text}'
     ##streamlit callback https://python.langchain.com/docs/integrations/callbacks/streamlit
     st_callback = StreamlitCallbackHandler(st.container())
     #hotfix to errors
     try:
+        response = agent.run(prompt_ad,callbacks=[st_callback])
     except ValueError as e:
         response = "Sorry i could't understand your last question."
     with st.chat_message("assistant"):

app_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import streamlit as st
@@ -12,3 +13,8 @@ def save_uploaded_file(uploadedfile):
 def reset_chat():
     st.session_state.messages = []

 import os
 import streamlit as st
 def reset_chat():
     st.session_state.messages = []
+def read_txt_file(path_txt:str):
+    with open(path_txt,'r') as f:
+        text=" ".join(f.readlines()).strip()
+    return text

image_processor.py CHANGED Viewed

@@ -73,13 +73,18 @@ class ImageProcessor(BaseTool):
         text=pytesseract.image_to_string(img,lang=lang,config=PYTESSERACT_DEFAULT_CONFIG)
         return text
-    def _run(self,img_path):
         img=self.process_image(str(img_path))
         text=self.img_to_text(img)
         return text
     # as used in langchain documentation https://python.langchain.com/docs/modules/agents/tools/custom_tools
-    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
     ) -> str:
         """Use the tool asynchronously."""
         raise NotImplementedError("custom_search does not support async")

         text=pytesseract.image_to_string(img,lang=lang,config=PYTESSERACT_DEFAULT_CONFIG)
         return text
+    def _run(self,img_path,save_to_disk=False):
         img=self.process_image(str(img_path))
         text=self.img_to_text(img)
+        if save_to_disk:
+            with open(f"/tmp/{str(img_pth).split('/')[-1].replace('.jpg','.txt')}",'w') as f:
+                f.write(text)
         return text
     # as used in langchain documentation https://python.langchain.com/docs/modules/agents/tools/custom_tools
+    async def _arun(self, img_path: str,save_to_disk=False, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
     ) -> str:
         """Use the tool asynchronously."""
         raise NotImplementedError("custom_search does not support async")

text_summarizer.py CHANGED Viewed

@@ -3,7 +3,9 @@ from langchain.schema import SystemMessage
 from langchain.agents import OpenAIFunctionsAgent,initialize_agent
 from langchain.agents import AgentType
 from langchain.chat_models import ChatOpenAI
-from langchain.chains.conversation.memory import ConversationBufferWindowMemory
 from dotenv import load_dotenv
 from config import OPEN_AI_MODEL_NAME,DEBUG_MODE_LLM
 from image_processor import ImageProcessor
@@ -18,6 +20,7 @@ system_message = SystemMessage(content="""You are an expert invoice, receipt sum
 #initial system prompt
 prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
 #define LLM to use
 llm = ChatOpenAI(temperature=0.1, model=OPEN_AI_MODEL_NAME,)
@@ -26,28 +29,25 @@ tools = [
     ImageProcessor()
 ]
-#memory placeholder
-conversational_memory = ConversationBufferWindowMemory(
-    memory_key='chat_history',
-    k=5,
-    return_messages=True
-)
 llm = ChatOpenAI(
     temperature=0,
-    model_name=OPEN_AI_MODEL_NAME,
-    max_tokens=2048
 )
 agent = initialize_agent(
-    agent=AgentType.OPENAI_FUNCTIONS, ## does not use memory
     tools=tools,
     llm=llm,
-    max_iterations=5,
     verbose=False,
     memory=conversational_memory,
-    early_stopping_method='generate',
     prompt=prompt
 )
-##TO DO, Remove agent and test sequential chain

 from langchain.agents import OpenAIFunctionsAgent,initialize_agent
 from langchain.agents import AgentType
 from langchain.chat_models import ChatOpenAI
+#from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from langchain.memory import ConversationBufferMemory
+from langchain.prompts import MessagesPlaceholder
 from dotenv import load_dotenv
 from config import OPEN_AI_MODEL_NAME,DEBUG_MODE_LLM
 from image_processor import ImageProcessor
 #initial system prompt
 prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
 #define LLM to use
 llm = ChatOpenAI(temperature=0.1, model=OPEN_AI_MODEL_NAME,)
     ImageProcessor()
 ]
+agent_kwargs = {
+    "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
+}
+conversational_memory = ConversationBufferMemory(memory_key="memory", return_messages=True)
 llm = ChatOpenAI(
     temperature=0,
+    model_name=OPEN_AI_MODEL_NAME
 )
 agent = initialize_agent(
+    agent=AgentType.OPENAI_FUNCTIONS,
     tools=tools,
     llm=llm,
+    max_iterations=10,
     verbose=False,
     memory=conversational_memory,
+    agent_kwargs=agent_kwargs,
     prompt=prompt
 )
+##TO DO, Remove agent and test sequential chain