Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -30,6 +30,9 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
|
|
30 |
print(dateforfilesave)
|
31 |
if openapikey=='':
|
32 |
return ["Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key",]
|
|
|
|
|
|
|
33 |
|
34 |
os.environ['OPENAI_API_KEY'] = str(openapikey)
|
35 |
|
@@ -43,7 +46,6 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
|
|
43 |
num_filings_needed=1
|
44 |
filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
|
45 |
files=[filings_temp[i].full_submission_url for i in range(len(filings_temp))]
|
46 |
-
print('Came here1')
|
47 |
filetextcontentlist=[]
|
48 |
for each in files:
|
49 |
headers = {
|
@@ -52,39 +54,26 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
|
|
52 |
"Host": "www.sec.gov",
|
53 |
}
|
54 |
resp=requests.get(each,headers=headers)
|
55 |
-
|
56 |
print('Came here2')
|
57 |
-
#
|
58 |
doc_start_pattern = re.compile(r'<DOCUMENT>')
|
59 |
doc_end_pattern = re.compile(r'</DOCUMENT>')
|
60 |
-
# Regex to find <TYPE> tag prceeding any characters, terminating at new line
|
61 |
type_pattern = re.compile(r'<TYPE>[^\n]+')
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
### There are many <Document> Tags in this text file, each as specific exhibit like 10-K, EX-10.17 etc
|
66 |
-
### First filter will give us document tag start <end> and document tag end's <start>
|
67 |
-
### We will use this to later grab content in between these tags
|
68 |
-
doc_start_is = [x.end() for x in doc_start_pattern.finditer(raw_10k)]
|
69 |
-
doc_end_is = [x.start() for x in doc_end_pattern.finditer(raw_10k)]
|
70 |
-
|
71 |
-
### Type filter is interesting, it looks for <TYPE> with Not flag as new line, ie terminare there, with + sign
|
72 |
-
### to look for any char afterwards until new line \n. This will give us <TYPE> followed Section Name like '10-K'
|
73 |
-
### Once we have have this, it returns String Array, below line will with find content after <TYPE> ie, '10-K'
|
74 |
-
### as section names
|
75 |
-
doc_types = [x[len('<TYPE>'):] for x in type_pattern.findall(raw_10k)]
|
76 |
|
77 |
document = {}
|
78 |
-
|
79 |
-
# Create a loop to go through each section type and save only the 10-K section in the dictionary
|
80 |
for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
|
81 |
if doc_type == category_selector:
|
82 |
-
document[doc_type] =
|
83 |
item_content = BeautifulSoup(document[category_selector], 'lxml')
|
84 |
|
85 |
filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
|
86 |
|
87 |
-
print('Came here3')
|
88 |
temp=". ".join(filetextcontentlist).replace('\xa024',' ')
|
89 |
temp=temp.replace('\n',' ').strip()
|
90 |
temp=temp.split('.')
|
@@ -94,7 +83,6 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
|
|
94 |
newlist.append(each)
|
95 |
documents=[Document(t) for t in newlist]
|
96 |
index = GPTSimpleVectorIndex.from_documents(documents)
|
97 |
-
print('Came here4')
|
98 |
querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?","What key technologies like AI, blockchain etc are mentioned?","What other company names/ competitors are mentioned?"]
|
99 |
if user_question=='':
|
100 |
querylist.append('What is the key summary?')
|
|
|
30 |
print(dateforfilesave)
|
31 |
if openapikey=='':
|
32 |
return ["Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key",]
|
33 |
+
|
34 |
+
if ticker_input=='':
|
35 |
+
return ["Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker",]
|
36 |
|
37 |
os.environ['OPENAI_API_KEY'] = str(openapikey)
|
38 |
|
|
|
46 |
num_filings_needed=1
|
47 |
filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
|
48 |
files=[filings_temp[i].full_submission_url for i in range(len(filings_temp))]
|
|
|
49 |
filetextcontentlist=[]
|
50 |
for each in files:
|
51 |
headers = {
|
|
|
54 |
"Host": "www.sec.gov",
|
55 |
}
|
56 |
resp=requests.get(each,headers=headers)
|
57 |
+
rawfile = resp.text
|
58 |
print('Came here2')
|
59 |
+
# Find text between <DOCUMENT> tags
|
60 |
doc_start_pattern = re.compile(r'<DOCUMENT>')
|
61 |
doc_end_pattern = re.compile(r'</DOCUMENT>')
|
|
|
62 |
type_pattern = re.compile(r'<TYPE>[^\n]+')
|
63 |
+
|
64 |
+
doc_start_is = [tmp.end() for tmp in doc_start_pattern.finditer(rawfile)]
|
65 |
+
doc_end_is = [tmp.start() for tmp in doc_end_pattern.finditer(rawfile)]
|
66 |
|
67 |
+
doc_types = [tmp[len('<TYPE>'):] for tmp in type_pattern.findall(rawfile)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
document = {}
|
|
|
|
|
70 |
for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
|
71 |
if doc_type == category_selector:
|
72 |
+
document[doc_type] = rawfile[doc_start:doc_end]
|
73 |
item_content = BeautifulSoup(document[category_selector], 'lxml')
|
74 |
|
75 |
filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
|
76 |
|
|
|
77 |
temp=". ".join(filetextcontentlist).replace('\xa024',' ')
|
78 |
temp=temp.replace('\n',' ').strip()
|
79 |
temp=temp.split('.')
|
|
|
83 |
newlist.append(each)
|
84 |
documents=[Document(t) for t in newlist]
|
85 |
index = GPTSimpleVectorIndex.from_documents(documents)
|
|
|
86 |
querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?","What key technologies like AI, blockchain etc are mentioned?","What other company names/ competitors are mentioned?"]
|
87 |
if user_question=='':
|
88 |
querylist.append('What is the key summary?')
|