from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor from langchain.llms import OpenAIChat from llama_index import download_loader import gradio as gr import pandas as pd import openai import datetime from datetime import datetime, date, time, timedelta import os import regex listofcategories=["Earnings Announcements", "Automotive","Energy","Healthcare","Retail","Technology"] def getstuff(openapikey,category_selector): dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p") print(category_selector) print(dateforfilesave) os.environ['OPENAI_API_KEY'] = str(openapikey) RssReader = download_loader("RssReader") reader = RssReader() whichone=listofcategories[listofcategories.index(category_selector)] querylist=["What are the top trends? Give output as a json (that can be converted to pandas dataframe) with 3 columns named trend, company mentioned & reason","Name the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"] if whichone=="Automotive": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000101" elif whichone=="Retail": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000116" elif whichone=="Technology": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19854910" elif whichone=="Healthcare": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000108" elif whichone=="Energy": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19836768" elif whichone=="Media": rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000110" elif whichone=='Earnings Announcements': rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135" querylist=["What are the top trends? Give output as a json (that can be converted to pandas dataframe) with 3 columns named trend, company mentioned & reason","Find the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"] else: rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135" ###should not come here but using earnings url querylist=["Basis companies that are doing well name the sectors with positive momentum? Give output as a json (that can be converted to pandas dataframe) with 3 columns named sector, company names & reason","Find the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"] documents = reader.load_data([rssurl]) index = GPTSimpleVectorIndex(documents) llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo")) answerlist=[] for i in range(len(querylist)): print(i,"Query: ",querylist[i]) response = index.query( querylist[i], llm_predictor=llm_predictor, response_mode="tree_summarize", similarity_top_k=int(len(documents)/3) ) print(response.response) if 'dataframe' in querylist[i]: try: pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}') jsonextract=pattern.findall(response.response)[0] #print("json extract\n",jsonextract) df_tmp=pd.read_json(jsonextract) df=pd.DataFrame(df_tmp[df_tmp.columns[0]].tolist()) except: df=pd.DataFrame() df['message']=['Data insufficient to decipher'] df['action']=['try again in a few hours'] answerlist.append(df) else: answerlist.append(response.response) print('Came to return statement') return answerlist with gr.Blocks() as demo: gr.Markdown("