Spaces:

hra
/

ChatGPT-SEC-Filings-QA

Runtime error

App Files Files Community

hra commited on Apr 3, 2023

Commit

b37597c

•

1 Parent(s): 9cd95d7

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -22

app.py CHANGED Viewed

@@ -30,6 +30,9 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
     print(dateforfilesave)
     if openapikey=='':
         return ["Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key",]
     os.environ['OPENAI_API_KEY'] = str(openapikey)
@@ -43,7 +46,6 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
         num_filings_needed=1
     filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
     files=[filings_temp[i].full_submission_url for i in range(len(filings_temp))]
-    print('Came here1')
     filetextcontentlist=[]
     for each in files:
       headers = {
@@ -52,39 +54,26 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
                     "Host": "www.sec.gov",
                 }
       resp=requests.get(each,headers=headers)
-      raw_10k  = resp.text
       print('Came here2')
-      # Regex to find <DOCUMENT> tags
       doc_start_pattern = re.compile(r'<DOCUMENT>')
       doc_end_pattern = re.compile(r'</DOCUMENT>')
-      # Regex to find <TYPE> tag prceeding any characters, terminating at new line
       type_pattern = re.compile(r'<TYPE>[^\n]+')
-      # Create 3 lists with the span idices for each regex
-      ### There are many <Document> Tags in this text file, each as specific exhibit like 10-K, EX-10.17 etc
-      ### First filter will give us document tag start <end> and document tag end's <start>
-      ### We will use this to later grab content in between these tags
-      doc_start_is = [x.end() for x in doc_start_pattern.finditer(raw_10k)]
-      doc_end_is = [x.start() for x in doc_end_pattern.finditer(raw_10k)]
-      ### Type filter is interesting, it looks for <TYPE> with Not flag as new line, ie terminare there, with + sign
-      ### to look for any char afterwards until new line \n. This will give us <TYPE> followed Section Name like '10-K'
-      ### Once we have have this, it returns String Array, below line will with find content after <TYPE> ie, '10-K'
-      ### as section names
-      doc_types = [x[len('<TYPE>'):] for x in type_pattern.findall(raw_10k)]
       document = {}
-      # Create a loop to go through each section type and save only the 10-K section in the dictionary
       for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
           if doc_type == category_selector:
-              document[doc_type] = raw_10k[doc_start:doc_end]
       item_content = BeautifulSoup(document[category_selector], 'lxml')
       filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
-    print('Came here3')
     temp=". ".join(filetextcontentlist).replace('\xa024',' ')
     temp=temp.replace('\n',' ').strip()
     temp=temp.split('.')
@@ -94,7 +83,6 @@ def getstuff(openapikey,category_selector,ticker_input,user_question):
         newlist.append(each)
     documents=[Document(t) for t in newlist]
     index = GPTSimpleVectorIndex.from_documents(documents)
-    print('Came here4')
     querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?","What key technologies like AI, blockchain etc are mentioned?","What other company names/ competitors are mentioned?"]
     if user_question=='':
         querylist.append('What is the key summary?')

     print(dateforfilesave)
     if openapikey=='':
         return ["Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key","Please provide OpenAPI Key",]
+    if ticker_input=='':
+        return ["Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker","Please enter Ticker",]
     os.environ['OPENAI_API_KEY'] = str(openapikey)
         num_filings_needed=1
     filings_temp=get_filing_urls_to_download(category_selector, ticker_input,num_filings_to_download=num_filings_needed,include_amends=False,before_date='2023-04-01',after_date='2022-01-01')
     files=[filings_temp[i].full_submission_url for i in range(len(filings_temp))]
     filetextcontentlist=[]
     for each in files:
       headers = {
                     "Host": "www.sec.gov",
                 }
       resp=requests.get(each,headers=headers)
+      rawfile  = resp.text
       print('Came here2')
+      # Find text between <DOCUMENT> tags
       doc_start_pattern = re.compile(r'<DOCUMENT>')
       doc_end_pattern = re.compile(r'</DOCUMENT>')
       type_pattern = re.compile(r'<TYPE>[^\n]+')
+      doc_start_is = [tmp.end() for tmp in doc_start_pattern.finditer(rawfile)]
+      doc_end_is = [tmp.start() for tmp in doc_end_pattern.finditer(rawfile)]
+      doc_types = [tmp[len('<TYPE>'):] for tmp in type_pattern.findall(rawfile)]
       document = {}
       for doc_type, doc_start, doc_end in zip(doc_types, doc_start_is, doc_end_is):
           if doc_type == category_selector:
+              document[doc_type] = rawfile[doc_start:doc_end]
       item_content = BeautifulSoup(document[category_selector], 'lxml')
       filetextcontentlist.append(str(item_content.text.encode('ascii','ignore')))
     temp=". ".join(filetextcontentlist).replace('\xa024',' ')
     temp=temp.replace('\n',' ').strip()
     temp=temp.split('.')
         newlist.append(each)
     documents=[Document(t) for t in newlist]
     index = GPTSimpleVectorIndex.from_documents(documents)
     querylist=['What are the main products/ services mentioned?','What are the major risks?',"What are the top investment focus areas?","What is the financial outlook of the company?","What key technologies like AI, blockchain etc are mentioned?","What other company names/ competitors are mentioned?"]
     if user_question=='':
         querylist.append('What is the key summary?')