from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor from langchain.llms import OpenAIChat from llama_index import download_loader import gradio as gr import pandas as pd import openai import datetime from datetime import datetime, date, time, timedelta import os import regex import requests import json HRA_TOKEN=os.getenv("HRA_TOKEN") listofcategories=["Earnings Announcements", "Overall","Automotive","Energy","Healthcare","Retail","Technology"] headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} url_hraprompts='https://us-central1-createinsightsproject.cloudfunctions.net/gethrahfprompts' data={"prompt_type":'chatgpt_stock_news_snapshot_sector',"hra_token":HRA_TOKEN} try: r = requests.post(url_hraprompts, data=json.dumps(data), headers=headers) except requests.exceptions.ReadTimeout as e: print(e) #print(r.content) sector_prompt_text=str(r.content, 'UTF-8').split('UNIQUE_SEPERATOR') print(sector_prompt_text) data={"prompt_type":'chatgpt_stock_news_snapshot_earnings',"hra_token":HRA_TOKEN} try: r = requests.post(url_hraprompts, data=json.dumps(data), headers=headers) except requests.exceptions.ReadTimeout as e: print(e) #print(r.content) earnings_prompt_text=str(r.content, 'UTF-8').split('UNIQUE_SEPERATOR') print(earnings_prompt_text) def getstuff(openapikey,category_selector): dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p") print(category_selector) print(dateforfilesave) if openapikey=='': return pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),pd.DataFrame(["Please provide OpenAPI Key"],columns=['ERROR']),'Error: Please provide OpenAPI key','Error: Please provide OpenAPI key' os.environ['OPENAI_API_KEY'] = str(openapikey) RssReader = download_loader("RssReader") reader = RssReader() whichone=listofcategories[listofcategories.index(category_selector)] querylist=sector_prompt_text if whichone=="Overall": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114" elif whichone=="Automotive": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000101" elif whichone=="Retail": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000116" elif whichone=="Technology": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19854910" elif whichone=="Healthcare": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000108" elif whichone=="Energy": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19836768" elif whichone=="Media": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000110" elif whichone=='Earnings Announcements': rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135" querylist=earnings_prompt_text else: rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135" ###should not come here but using earnings url querylist=earnings_prompt_text documents = reader.load_data([rssurl]) index = GPTSimpleVectorIndex(documents) llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo")) answerlist=[] for i in range(len(querylist)): print(i,"Query: ",querylist[i]) response = index.query( querylist[i], llm_predictor=llm_predictor, response_mode="tree_summarize", similarity_top_k=int(len(documents)/3) ) print(response.response) if 'dataframe' in querylist[i]: try: pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}') jsonextract=pattern.findall(response.response)[0] #print("json extract\n",jsonextract) df_tmp=pd.read_json(jsonextract) if len(df_tmp.columns)<=1: df=pd.DataFrame(df_tmp[df_tmp.columns[0]].tolist()) else: df=df_tmp except: df=pd.DataFrame() df['message']=['Data insufficient to decipher'] df['action']=['try again in a few hours'] answerlist.append(df) else: answerlist.append(response.response) print('Came to return statement') return answerlist with gr.Blocks() as demo: gr.Markdown("