Spaces:

hra
/

ChatGPT-SEC-Filings-QA

Runtime error

App Files Files Community

hra commited on Mar 31, 2023

Commit

db1003c

•

1 Parent(s): 8517284

Create app.py

Browse files

Files changed (1) hide show

app.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from llama_index  import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, Document,ServiceContext
+from langchain.llms import OpenAIChat
+from llama_index import download_loader
+from langchain.chains import LLMChain, TransformChain, SimpleSequentialChain
+from langchain.prompts import PromptTemplate
+from langchain.agents import initialize_agent, Tool,load_tools
+from langchain.chat_models import ChatOpenAI
+import gradio as gr
+import pandas as pd
+import openai
+import datetime
+from datetime import datetime, date, time, timedelta
+import os
+import regex
+import requests
+import json
+from sec_edgar_downloader._utils import get_filing_urls_to_download
+listofcategories=["10-K", "10-Q","8-K"]
+def getstuff(openapikey,category_selector,ticker_input):
+    dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p")
+    print(ticker_input)
+    print(dateforfilesave)
+    if openapikey=='':
+        return pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),'Error: Please provide OpenAPI key','Error: Please provide OpenAPI key'
+    os.environ['OPENAI_API_KEY'] = str(openapikey)
+    if category_selector=='10-K':
+        num_filings_needed=1
+    elif category_selector=='8-K':
+        num_filings_needed=8
+    elif category_selector=='10-Q':
+        num_filings_needed=4
+    else:
+        num_filings_needed=1
+    filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
+    files=[filings_temp[0].full_submission_url]
+    print('Came here1')
+    filetextcontentlist=[]
+    for each in files:
+      headers = {
+                    "User-Agent": '[email protected]',
+                    "Accept-Encoding": "gzip, deflate",
+                    "Host": "www.sec.gov",
+                }
+      resp=requests.get(each,headers=headers)
+      raw_10k  = resp.text
+      print('Came here2')
+      # Regex to find <DOCUMENT> tags
+      doc_start_pattern = re.compile(r'<DOCUMENT>')
+      doc_end_pattern = re.compile(r'</DOCUMENT>')
+      # Regex to find <TYPE> tag prceeding any characters, terminating at new line
+      type_pattern = re.compile(r'<TYPE>[^\n]+')
+      # Create 3 lists with the span idices for each regex
+      ### There are many <Document> Tags in this text file, each as specific exhibit like 10-K, EX-10.17 etc
+      ### First filter will give us document tag start <end> and document tag end's <start>
+      ### We will use this to later grab content in between these tags
+      doc_start_is = [x.end() for x in doc_start_pattern.finditer(raw_10k)]
+      doc_end_is = [x.start() for x in doc_end_pattern.finditer(raw_10k)]
+      ### Type filter is interesting, it looks for <TYPE> with Not flag as new line, ie terminare there, with + sign
+      ### to look for any char afterwards until new line \n. This will give us <TYPE> followed Section Name like '10-K'
+      ### Once we have have this, it returns String Array, below line will with find content after <TYPE> ie, '10-K'
+      ### as section names
+      doc_types = [x[len('<TYPE>'):] for x in type_pattern.findall(raw_10k)]
+      document = {}
+      # Create a loop to go through each section type and save only the 10-K section in the dictionary
+      for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
+          if doc_type == '10-K':
+              document[doc_type] = raw_10k[doc_start:doc_end]
+      item_content = BeautifulSoup(document['10-K'], 'lxml')
+      filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
+    print('Came here3')
+    temp=". ".join(filetextcontentlist).replace('\xa024',' ')
+    temp=temp.replace('\n',' ').strip()
+    temp=temp.split('.')
+    newlist=[]
+    for each in temp:
+      if len(each.split())>8:
+        newlist.append(each)
+    documents=[Document(t) for t in newlist]
+    index = GPTSimpleVectorIndex.from_documents(documents)
+    print('Came here4')
+    querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?"]
+    llm = ChatOpenAI(temperature=0)
+    llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))
+    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
+    answerlist=[]
+    for i in range(len(querylist)):
+        print(i,"Query: ",querylist[i])
+        response = index.query(
+        querylist[i],
+        llm_predictor=llm_predictor,
+        response_mode="tree_summarize",
+        similarity_top_k=min(int(len(documents)/3),20)
+        )
+        print(response.response)
+        if 'dataframe' in querylist[i]:
+            try:
+                pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')
+                jsonextract=pattern.findall(response.response)[0]
+                #print("json extract\n",jsonextract)
+                df_tmp=pd.read_json(jsonextract)
+                if len(df_tmp.columns)<=1:
+                    df=pd.DataFrame(df_tmp[df_tmp.columns[0]].tolist())
+                else:
+                    df=df_tmp
+            except:
+                df=pd.DataFrame()
+                df['message']=['Data insufficient to decipher']
+                df['action']=['try again in a few hours']
+            answerlist.append(df)
+        else:
+            answerlist.append(response.response)
+    print('Came to return statement')
+    return answerlist
+with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>ChatGPT SEC Filings Question Answers</center></h1>")
+    gr.Markdown(
+        """What are the products & services? What are the risks? What is the outlook? and much more. \n\nThis is a demo & showcases ChatGPT integrated with real data. It shows how to get real-time data and marry it with ChatGPT capabilities. This demonstrates 'Chain of Thought' thinking using ChatGPT.\n\n4 snapshots are provided for illustration (trends, sector outlook, news summary email, macro trends email)\n\nNote: llama-index & gpt-3.5-turbo are used. The analysis takes roughly 120 secs & may not always be consistent. If ChatGPT API is overloaded you will get an error\n ![visitors](https://visitor-badge.glitch.me/badge?page_id=hra.chatgpt-stock-news-snapshots)"""
+        )
+    with gr.Row() as row:
+        with gr.Column():
+            category_selector=gr.Dropdown(
+                listofcategories, label="Filing Categories", info="Select the filing you want..."
+                )
+            input1 = gr.Textbox(placeholder='Enter ticker (USA only)', lines=1,label='Ticker')
+        with gr.Column():
+            textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
+    with gr.Column():
+        btn = gr.Button("Generate \nAnswers")
+    with gr.Row() as row:
+        with gr.Column():
+            output1 = gr.Textbox(placeholder='', lines=4,label='Snapshot 1')
+        with gr.Column():
+            output2 = gr.Textbox(placeholder='', lines=4,label='Snapshot 2')
+    with gr.Row() as row:
+        with gr.Column():
+            output3 = gr.Textbox(placeholder='', lines=4,label='Snapshot 3')
+        with gr.Column():
+            output4 = gr.Textbox(placeholder='', lines=4,label='Snapshot 4')
+    btn.click(getstuff, inputs=[textboxopenapi,category_selector,input1],outputs=[output1,output2,output3,output4])
+demo.launch(debug=True)