Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, Document,ServiceContext
|
2 |
+
from langchain.llms import OpenAIChat
|
3 |
+
from llama_index import download_loader
|
4 |
+
from langchain.chains import LLMChain, TransformChain, SimpleSequentialChain
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain.agents import initialize_agent, Tool,load_tools
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
|
9 |
+
import gradio as gr
|
10 |
+
import pandas as pd
|
11 |
+
import openai
|
12 |
+
|
13 |
+
import datetime
|
14 |
+
from datetime import datetime, date, time, timedelta
|
15 |
+
import os
|
16 |
+
import regex
|
17 |
+
import requests
|
18 |
+
import json
|
19 |
+
from sec_edgar_downloader._utils import get_filing_urls_to_download
|
20 |
+
|
21 |
+
listofcategories=["10-K", "10-Q","8-K"]
|
22 |
+
|
23 |
+
|
24 |
+
def getstuff(openapikey,category_selector,ticker_input):
|
25 |
+
dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p")
|
26 |
+
print(ticker_input)
|
27 |
+
print(dateforfilesave)
|
28 |
+
if openapikey=='':
|
29 |
+
return pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),'Error: Please provide OpenAPI key','Error: Please provide OpenAPI key'
|
30 |
+
|
31 |
+
os.environ['OPENAI_API_KEY'] = str(openapikey)
|
32 |
+
|
33 |
+
if category_selector=='10-K':
|
34 |
+
num_filings_needed=1
|
35 |
+
elif category_selector=='8-K':
|
36 |
+
num_filings_needed=8
|
37 |
+
elif category_selector=='10-Q':
|
38 |
+
num_filings_needed=4
|
39 |
+
else:
|
40 |
+
num_filings_needed=1
|
41 |
+
filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
|
42 |
+
files=[filings_temp[0].full_submission_url]
|
43 |
+
print('Came here1')
|
44 |
+
filetextcontentlist=[]
|
45 |
+
for each in files:
|
46 |
+
headers = {
|
47 |
+
"User-Agent": '[email protected]',
|
48 |
+
"Accept-Encoding": "gzip, deflate",
|
49 |
+
"Host": "www.sec.gov",
|
50 |
+
}
|
51 |
+
resp=requests.get(each,headers=headers)
|
52 |
+
raw_10k = resp.text
|
53 |
+
print('Came here2')
|
54 |
+
# Regex to find <DOCUMENT> tags
|
55 |
+
doc_start_pattern = re.compile(r'<DOCUMENT>')
|
56 |
+
doc_end_pattern = re.compile(r'</DOCUMENT>')
|
57 |
+
# Regex to find <TYPE> tag prceeding any characters, terminating at new line
|
58 |
+
type_pattern = re.compile(r'<TYPE>[^\n]+')
|
59 |
+
|
60 |
+
# Create 3 lists with the span idices for each regex
|
61 |
+
|
62 |
+
### There are many <Document> Tags in this text file, each as specific exhibit like 10-K, EX-10.17 etc
|
63 |
+
### First filter will give us document tag start <end> and document tag end's <start>
|
64 |
+
### We will use this to later grab content in between these tags
|
65 |
+
doc_start_is = [x.end() for x in doc_start_pattern.finditer(raw_10k)]
|
66 |
+
doc_end_is = [x.start() for x in doc_end_pattern.finditer(raw_10k)]
|
67 |
+
|
68 |
+
### Type filter is interesting, it looks for <TYPE> with Not flag as new line, ie terminare there, with + sign
|
69 |
+
### to look for any char afterwards until new line \n. This will give us <TYPE> followed Section Name like '10-K'
|
70 |
+
### Once we have have this, it returns String Array, below line will with find content after <TYPE> ie, '10-K'
|
71 |
+
### as section names
|
72 |
+
doc_types = [x[len('<TYPE>'):] for x in type_pattern.findall(raw_10k)]
|
73 |
+
|
74 |
+
document = {}
|
75 |
+
|
76 |
+
# Create a loop to go through each section type and save only the 10-K section in the dictionary
|
77 |
+
for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
|
78 |
+
if doc_type == '10-K':
|
79 |
+
document[doc_type] = raw_10k[doc_start:doc_end]
|
80 |
+
item_content = BeautifulSoup(document['10-K'], 'lxml')
|
81 |
+
|
82 |
+
filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
|
83 |
+
|
84 |
+
print('Came here3')
|
85 |
+
temp=". ".join(filetextcontentlist).replace('\xa024',' ')
|
86 |
+
temp=temp.replace('\n',' ').strip()
|
87 |
+
temp=temp.split('.')
|
88 |
+
newlist=[]
|
89 |
+
for each in temp:
|
90 |
+
if len(each.split())>8:
|
91 |
+
newlist.append(each)
|
92 |
+
documents=[Document(t) for t in newlist]
|
93 |
+
index = GPTSimpleVectorIndex.from_documents(documents)
|
94 |
+
print('Came here4')
|
95 |
+
querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?"]
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
llm = ChatOpenAI(temperature=0)
|
100 |
+
llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))
|
101 |
+
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
|
102 |
+
|
103 |
+
answerlist=[]
|
104 |
+
for i in range(len(querylist)):
|
105 |
+
print(i,"Query: ",querylist[i])
|
106 |
+
response = index.query(
|
107 |
+
querylist[i],
|
108 |
+
llm_predictor=llm_predictor,
|
109 |
+
response_mode="tree_summarize",
|
110 |
+
similarity_top_k=min(int(len(documents)/3),20)
|
111 |
+
)
|
112 |
+
print(response.response)
|
113 |
+
if 'dataframe' in querylist[i]:
|
114 |
+
try:
|
115 |
+
pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')
|
116 |
+
jsonextract=pattern.findall(response.response)[0]
|
117 |
+
#print("json extract\n",jsonextract)
|
118 |
+
df_tmp=pd.read_json(jsonextract)
|
119 |
+
if len(df_tmp.columns)<=1:
|
120 |
+
df=pd.DataFrame(df_tmp[df_tmp.columns[0]].tolist())
|
121 |
+
else:
|
122 |
+
df=df_tmp
|
123 |
+
except:
|
124 |
+
df=pd.DataFrame()
|
125 |
+
df['message']=['Data insufficient to decipher']
|
126 |
+
df['action']=['try again in a few hours']
|
127 |
+
answerlist.append(df)
|
128 |
+
else:
|
129 |
+
answerlist.append(response.response)
|
130 |
+
|
131 |
+
print('Came to return statement')
|
132 |
+
return answerlist
|
133 |
+
|
134 |
+
with gr.Blocks() as demo:
|
135 |
+
gr.Markdown("<h1><center>ChatGPT SEC Filings Question Answers</center></h1>")
|
136 |
+
gr.Markdown(
|
137 |
+
"""What are the products & services? What are the risks? What is the outlook? and much more. \n\nThis is a demo & showcases ChatGPT integrated with real data. It shows how to get real-time data and marry it with ChatGPT capabilities. This demonstrates 'Chain of Thought' thinking using ChatGPT.\n\n4 snapshots are provided for illustration (trends, sector outlook, news summary email, macro trends email)\n\nNote: llama-index & gpt-3.5-turbo are used. The analysis takes roughly 120 secs & may not always be consistent. If ChatGPT API is overloaded you will get an error\n ![visitors](https://visitor-badge.glitch.me/badge?page_id=hra.chatgpt-stock-news-snapshots)"""
|
138 |
+
)
|
139 |
+
|
140 |
+
with gr.Row() as row:
|
141 |
+
with gr.Column():
|
142 |
+
category_selector=gr.Dropdown(
|
143 |
+
listofcategories, label="Filing Categories", info="Select the filing you want..."
|
144 |
+
)
|
145 |
+
input1 = gr.Textbox(placeholder='Enter ticker (USA only)', lines=1,label='Ticker')
|
146 |
+
with gr.Column():
|
147 |
+
textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
|
148 |
+
|
149 |
+
with gr.Column():
|
150 |
+
btn = gr.Button("Generate \nAnswers")
|
151 |
+
|
152 |
+
with gr.Row() as row:
|
153 |
+
with gr.Column():
|
154 |
+
output1 = gr.Textbox(placeholder='', lines=4,label='Snapshot 1')
|
155 |
+
with gr.Column():
|
156 |
+
output2 = gr.Textbox(placeholder='', lines=4,label='Snapshot 2')
|
157 |
+
with gr.Row() as row:
|
158 |
+
with gr.Column():
|
159 |
+
output3 = gr.Textbox(placeholder='', lines=4,label='Snapshot 3')
|
160 |
+
with gr.Column():
|
161 |
+
output4 = gr.Textbox(placeholder='', lines=4,label='Snapshot 4')
|
162 |
+
|
163 |
+
btn.click(getstuff, inputs=[textboxopenapi,category_selector,input1],outputs=[output1,output2,output3,output4])
|
164 |
+
|
165 |
+
|
166 |
+
demo.launch(debug=True)
|