File size: 6,766 Bytes
bfb6182
 
 
 
 
 
 
 
 
 
a2f2d6c
bfb6182
 
 
 
 
 
 
 
 
a2f2d6c
 
bfb6182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2f2d6c
 
 
 
 
 
 
 
 
 
 
bfb6182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
from variables import *
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer
from optimum.pipelines import pipeline
import tweepy
import pandas as pd
import numpy as np
import plotly_express as px
import plotly.graph_objects as go
from datetime import datetime as dt
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode

st.set_page_config(
    page_title="Live FinTwitter Analysis",
    page_icon="πŸ“ˆ",
    layout="wide",
)

st.sidebar.header("Sentiment Analysis Score")

extract_time = dt.strftime(dt.today(),"%d_%B_%y_%H_%M")
    
@st.experimental_singleton(suppress_st_warning=True)
def load_models():
    '''load sentimant and topic clssification models'''
    sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
    topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
    
    return sent_pipe, topic_pipe

@st.cache(allow_output_mutation=True, suppress_st_warning=True)
def process_tweets(df,df_users):
    '''process tweets into a dataframe'''
    
    df['author'] = df['author'].astype(np.int64)
    
    df_merged = df.merge(df_users, on='author')

    tweet_list = df_merged['tweet'].tolist()
    
    sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
    
    sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
    
    topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
    
    df_group = pd.concat([df_merged,sentiment,topic],axis=1)

    df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)

    df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]

    df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)

    return df_tweets


    
sentiment_classifier, topic_classifier = load_models()
    
st.title('Live FinTwitter Sentiment & Topic Analysis with Tweepy and Transformers')

st.markdown(
    """
    This app uses Tweepy to extract tweets from twitter based on a list of popular accounts that tweet about markets/finance: 
    - The stream of tweets is processed via HuggingFace models for finance tweet sentiment and topic analysis:  
        - [Topic Classification](https://huggingface.co/nickmuchi/finbert-tone-finetuned-finance-topic-classification)  
        - [Sentiment Analysis](https://huggingface.co/nickmuchi/finbert-tone-finetuned-fintwitter-classification)  
    - The resulting sentiments and corresponding tweets are displayed, with graphs tracking the live sentiment and topics of financial market tweets in the Visualisation tab.  
    """
    )
    
refresh_stream = st.button('Refresh Stream')

if "update_but" not in st.session_state:
    st.session_state.update_but = False

if refresh_stream or st.session_state.update_but:
    st.session_state.update_but = True
    
    client = tweepy.Client(CONFIG['bearer_token'], wait_on_rate_limit=True)
    
    users = []
    all_tweets = []
    
    for res in tweepy.Paginator(client.get_list_tweets,
        id="1083517925049266176",
        user_fields=['username'],
        tweet_fields=['created_at','text'],
        expansions=['author_id'],
        max_results=100):
        
        all_tweets.append(res)
    

    with st.spinner('Generating sentiment and topic classification of tweets...'):

        tweets = [response.data for response in all_tweets]
        users = [response.includes['users'] for response in all_tweets]

        flat_users = [x for i in users for x in i]
        flat_tweets = [x for i in tweets for x in i]
        
        data = [(tweet.data['author_id'],tweet.data['text'],tweet.data['created_at']) for tweet in flat_tweets]
        df = pd.DataFrame(data,columns=['author','tweet','creation_time'])
        
        df['tweet'] = df['tweet'].replace(r'https?://\S+', '', regex=True).replace(r'www\S+', '', regex=True)
               
        users = client.get_users(ids=df['author'].unique().tolist())

        df_users = pd.DataFrame(data=list(set([(user.id,user.username) for user in users.data])),columns=['author','username'])

        df_tweets = process_tweets(df,df_users)

        # Get all tweets
        tweet_list = df_tweets['tweet'].tolist()

        # Open file and write to it
        with open(f'output/tweets_{extract_time}.txt', 'w') as file:
            for tweet in tweet_list:
                file.write(tweet + '\n')

        # Close the file
        file.close()
    
        st.session_state['tdf'] = df_tweets

    with st.container():

        st.write("Table of Influential FinTweets")

        gb = GridOptionsBuilder.from_dataframe(df_tweets)
        gb.configure_pagination(paginationPageSize=30,paginationAutoPageSize=False) #Add pagination
        gb.configure_side_bar() #Add a sidebar
        gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
        gb.configure_column('tweet',wrapText=True,autoHeight=True)#Enable multi-row selection
        gridOptions = gb.build()
        
        AgGrid(
            df_tweets,
            gridOptions=gridOptions,
            data_return_mode='AS_INPUT', 
            update_mode='MODEL_CHANGED', 
            fit_columns_on_grid_load=False,
            enable_enterprise_modules=True,
            theme='streamlit', #Add theme color to the table
            height=550, 
            width='100%'        
        )
        
    ## Display sentiment score
    pos_perc = df_tweets[df_tweets['sentiment']=='Bullish'].count()[0]*100/df_tweets.shape[0]
    neg_perc = df_tweets[df_tweets['sentiment']=='Bearish'].count()[0]*100/df_tweets.shape[0]
    neu_perc = df_tweets[df_tweets['sentiment']=='Neutral'].count()[0]*100/df_tweets.shape[0]
    
    sentiment_score = neu_perc+pos_perc-neg_perc
    
    fig_1 = go.Figure()
    
    fig_1.add_trace(go.Indicator(
        mode = "delta",
        value = sentiment_score,
        domain = {'row': 1, 'column': 1}))
    
    fig_1.update_layout(
        template = {'data' : {'indicator': [{
            'title': {'text': "Sentiment Score"},
            'mode' : "number+delta+gauge",
            'delta' : {'reference': 50}}]
                             }},
        autosize=False,
        width=250,
        height=250,
        margin=dict(
            l=5,
            r=5,
            b=5,
            pad=2
        )
    )
    
    with st.sidebar:
    
        st.plotly_chart(fig_1)

st.markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=nickmuchi-fintweet-sentiment-analysis)")