from llama_index  import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor
from langchain.llms import OpenAIChat
from llama_index import download_loader
import gradio as gr
import pandas as pd
import openai

import datetime
from datetime import datetime, date, time, timedelta
import os
import regex

listofcategories=["Earnings Announcements", "Automotive","Energy","Healthcare","Retail","Technology"]

def getstuff(openapikey,category_selector):
    dateforfilesave=datetime.today().strftime("%d-%m-%Y %I:%M%p")
    print(category_selector)
    print(dateforfilesave)
    os.environ['OPENAI_API_KEY'] = str(openapikey)
    
    RssReader = download_loader("RssReader")
    reader = RssReader()
    whichone=listofcategories[listofcategories.index(category_selector)]

    querylist=["What are the top trends? Give output as a json (that can be converted to pandas dataframe) with 3 columns named trend, company mentioned & reason","Name the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"]

    if whichone=="Automotive":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000101"
    elif whichone=="Retail":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000116"
    elif whichone=="Technology":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19854910"
    elif whichone=="Healthcare":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000108"
    elif whichone=="Energy":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=19836768"
    elif whichone=="Media":
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000110"
    elif whichone=='Earnings Announcements':
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135"
        querylist=["What are the top trends? Give output as a json (that can be converted to pandas dataframe) with 3 columns named trend, company mentioned & reason","Find the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"]
    else:
        rssurl="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839135" ###should not come here but using earnings url
        querylist=["Basis companies that are doing well name the sectors with positive momentum? Give output as a json (that can be converted to pandas dataframe) with 3 columns named sector, company names & reason","Find the top & bottom performing companies? Give output as a json (that can be converted to pandas dataframe) with 4 columns named sector, company names, reason & top/bottom","You are an award winning email writer. Write an email summarizing the news. Do not say I am language model and cannot do this","You are an award winning email writer. Write an email summarizing the key macro trends basis the news.Do not say I am language model and cannot do this"]
    
    documents = reader.load_data([rssurl])
    index = GPTSimpleVectorIndex(documents)
    llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))

    answerlist=[]
    for i in range(len(querylist)):
        print(i,"Query: ",querylist[i])
        response = index.query(
        querylist[i], 
        llm_predictor=llm_predictor,
        response_mode="tree_summarize",
        similarity_top_k=int(len(documents)/3)
        )
        print(response.response)
        if 'dataframe' in querylist[i]:
            try:
                pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')
                jsonextract=pattern.findall(response.response)[0]
                #print("json extract\n",jsonextract)
                df_tmp=pd.read_json(jsonextract)
                df=pd.DataFrame(df_tmp[df_tmp.columns[0]].tolist())
            except:
                df=pd.DataFrame()
                df['message']=['Data insufficient to decipher']
                df['action']=['try again in a few hours']
            answerlist.append(df)
        else:
            answerlist.append(response.response)

    print('Came to return statement')
    return answerlist

with gr.Blocks() as demo:
    gr.Markdown("<h1><center>ChatGPT Stock News Snapshots</center></h1>")
    gr.Markdown(
        """What are the sectors with positive momentum? What are the macro trends? Which companies have momentum? Sector summaries and much more. \n\nThis is a demo & showcases ChatGPT integrated with real data. It shows how to get real-time data and marry it with ChatGPT capabilities. This demonstrates 'Chain of Thought' thinking using ChatGPT.\n\nNote: llama-index & gpt-3.5-turbo are used. The analysis takes roughly 120 secs & may not always be consistent. If ChatGPT API is overloaded you will get an error\n ![visitors](https://visitor-badge.glitch.me/badge?page_id=hra.chatgpt-stock-news-snapshots)"""
        )
    
    with gr.Row() as row:
        with gr.Column():
            textboxopenapi = gr.Textbox(placeholder="Enter OpenAPI Key...", lines=1,label='OpenAPI Key')
            category_selector=gr.Dropdown(
                listofcategories, label="Sector Options", info="Select the snapshot you want..."
                )
        with gr.Column():
            btn = gr.Button("Generate \nSnapshot")
        
    with gr.Row() as row:
        table1=gr.Dataframe(
            #headers=["Item", "Cost"],
            #datatype=["str", "str","str"],
            label="Snapshot 1",
        )
    with gr.Row() as row:
        table2=gr.Dataframe(
            #headers=["Item", "Cost"],
            #datatype=["str", "str","str"],
            label="Snapshot 2",
        )
    with gr.Row() as row:
        output1 = gr.Textbox(placeholder='', lines=4,label='Snapshot 3')
    with gr.Row() as row:
        output2 = gr.Textbox(placeholder='', lines=4,label='Snapshot 4')

    btn.click(getstuff, inputs=[textboxopenapi,category_selector],outputs=[table1,table2,output1,output2])
    
    
demo.launch(debug=True)