Spaces:
Build error
Build error
File size: 6,766 Bytes
bfb6182 a2f2d6c bfb6182 a2f2d6c bfb6182 a2f2d6c bfb6182 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import streamlit as st
from variables import *
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer
from optimum.pipelines import pipeline
import tweepy
import pandas as pd
import numpy as np
import plotly_express as px
import plotly.graph_objects as go
from datetime import datetime as dt
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode
st.set_page_config(
page_title="Live FinTwitter Analysis",
page_icon="π",
layout="wide",
)
st.sidebar.header("Sentiment Analysis Score")
extract_time = dt.strftime(dt.today(),"%d_%B_%y_%H_%M")
@st.experimental_singleton(suppress_st_warning=True)
def load_models():
'''load sentimant and topic clssification models'''
sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
return sent_pipe, topic_pipe
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
def process_tweets(df,df_users):
'''process tweets into a dataframe'''
df['author'] = df['author'].astype(np.int64)
df_merged = df.merge(df_users, on='author')
tweet_list = df_merged['tweet'].tolist()
sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
df_group = pd.concat([df_merged,sentiment,topic],axis=1)
df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)
df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]
df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)
return df_tweets
sentiment_classifier, topic_classifier = load_models()
st.title('Live FinTwitter Sentiment & Topic Analysis with Tweepy and Transformers')
st.markdown(
"""
This app uses Tweepy to extract tweets from twitter based on a list of popular accounts that tweet about markets/finance:
- The stream of tweets is processed via HuggingFace models for finance tweet sentiment and topic analysis:
- [Topic Classification](https://huggingface.co/nickmuchi/finbert-tone-finetuned-finance-topic-classification)
- [Sentiment Analysis](https://huggingface.co/nickmuchi/finbert-tone-finetuned-fintwitter-classification)
- The resulting sentiments and corresponding tweets are displayed, with graphs tracking the live sentiment and topics of financial market tweets in the Visualisation tab.
"""
)
refresh_stream = st.button('Refresh Stream')
if "update_but" not in st.session_state:
st.session_state.update_but = False
if refresh_stream or st.session_state.update_but:
st.session_state.update_but = True
client = tweepy.Client(CONFIG['bearer_token'], wait_on_rate_limit=True)
users = []
all_tweets = []
for res in tweepy.Paginator(client.get_list_tweets,
id="1083517925049266176",
user_fields=['username'],
tweet_fields=['created_at','text'],
expansions=['author_id'],
max_results=100):
all_tweets.append(res)
with st.spinner('Generating sentiment and topic classification of tweets...'):
tweets = [response.data for response in all_tweets]
users = [response.includes['users'] for response in all_tweets]
flat_users = [x for i in users for x in i]
flat_tweets = [x for i in tweets for x in i]
data = [(tweet.data['author_id'],tweet.data['text'],tweet.data['created_at']) for tweet in flat_tweets]
df = pd.DataFrame(data,columns=['author','tweet','creation_time'])
df['tweet'] = df['tweet'].replace(r'https?://\S+', '', regex=True).replace(r'www\S+', '', regex=True)
users = client.get_users(ids=df['author'].unique().tolist())
df_users = pd.DataFrame(data=list(set([(user.id,user.username) for user in users.data])),columns=['author','username'])
df_tweets = process_tweets(df,df_users)
# Get all tweets
tweet_list = df_tweets['tweet'].tolist()
# Open file and write to it
with open(f'output/tweets_{extract_time}.txt', 'w') as file:
for tweet in tweet_list:
file.write(tweet + '\n')
# Close the file
file.close()
st.session_state['tdf'] = df_tweets
with st.container():
st.write("Table of Influential FinTweets")
gb = GridOptionsBuilder.from_dataframe(df_tweets)
gb.configure_pagination(paginationPageSize=30,paginationAutoPageSize=False) #Add pagination
gb.configure_side_bar() #Add a sidebar
gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
gb.configure_column('tweet',wrapText=True,autoHeight=True)#Enable multi-row selection
gridOptions = gb.build()
AgGrid(
df_tweets,
gridOptions=gridOptions,
data_return_mode='AS_INPUT',
update_mode='MODEL_CHANGED',
fit_columns_on_grid_load=False,
enable_enterprise_modules=True,
theme='streamlit', #Add theme color to the table
height=550,
width='100%'
)
## Display sentiment score
pos_perc = df_tweets[df_tweets['sentiment']=='Bullish'].count()[0]*100/df_tweets.shape[0]
neg_perc = df_tweets[df_tweets['sentiment']=='Bearish'].count()[0]*100/df_tweets.shape[0]
neu_perc = df_tweets[df_tweets['sentiment']=='Neutral'].count()[0]*100/df_tweets.shape[0]
sentiment_score = neu_perc+pos_perc-neg_perc
fig_1 = go.Figure()
fig_1.add_trace(go.Indicator(
mode = "delta",
value = sentiment_score,
domain = {'row': 1, 'column': 1}))
fig_1.update_layout(
template = {'data' : {'indicator': [{
'title': {'text': "Sentiment Score"},
'mode' : "number+delta+gauge",
'delta' : {'reference': 50}}]
}},
autosize=False,
width=250,
height=250,
margin=dict(
l=5,
r=5,
b=5,
pad=2
)
)
with st.sidebar:
st.plotly_chart(fig_1)
st.markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=nickmuchi-fintweet-sentiment-analysis)") |