Spaces:
Build error
Build error
##Variables | |
import os | |
import streamlit as st | |
import pathlib | |
from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.chat_models.openai import ChatOpenAI | |
from langchain import VectorDBQA | |
import pandas as pd | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts.chat import ( | |
ChatPromptTemplate, | |
SystemMessagePromptTemplate, | |
AIMessagePromptTemplate, | |
HumanMessagePromptTemplate, | |
) | |
from langchain.schema import ( | |
AIMessage, | |
HumanMessage, | |
SystemMessage | |
) | |
def get_latest_file(): | |
'''Get the latest file from output folder''' | |
# set the directory path | |
directory_path = "output/" | |
# create a list of all text files in the directory and sort by modification time | |
text_files = sorted(pathlib.Path(directory_path).glob("*.txt"), key=lambda f: f.stat().st_mtime) | |
# get the latest modified file | |
latest_file = text_files[-1] | |
# open the file and read its contents | |
with open(latest_file, "r") as f: | |
file_contents = f.read() | |
return file_contents | |
def process_tweets(file,embed_model,query): | |
'''Process file with latest tweets''' | |
# Split tweets int chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
texts = text_splitter.split_text(file) | |
model = bi_enc_dict[embed_model] | |
if model == "hkunlp/instructor-large": | |
emb = HuggingFaceInstructEmbeddings(model_name=model, | |
query_instruction='Represent the Financial question for retrieving supporting documents: ', | |
embed_instruction='Represent the Financial document for retrieval: ') | |
elif model == "sentence-transformers/all-mpnet-base-v2": | |
emb = HuggingFaceEmbeddings(model_name=model) | |
docsearch = FAISS.from_texts(texts, emb) | |
chain_type_kwargs = {"prompt": prompt} | |
chain = VectorDBQA.from_chain_type( | |
ChatOpenAI(temperature=0), | |
chain_type="stuff", | |
vectorstore=docsearch, | |
chain_type_kwargs=chain_type_kwargs | |
) | |
result = chain({"query": query}, return_only_outputs=True) | |
return result | |
CONFIG = { | |
"bearer_token": os.environ.get("bearer_token") | |
} | |
sent_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-fintwitter-classification' | |
topic_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-finance-topic-classification' | |
task = 'text-classification' | |
sentiments = {"0": "Bearish", "1": "Bullish", "2": "Neutral"} | |
topics = { | |
"0": "Analyst Update", | |
"1": "Fed | Central Banks", | |
"2": "Company | Product News", | |
"3": "Treasuries | Corporate Debt", | |
"4": "Dividend", | |
"5": "Earnings", | |
"6": "Energy | Oil", | |
"7": "Financials", | |
"8": "Currencies", | |
"9": "General News | Opinion", | |
"10": "Gold | Metals | Materials", | |
"11": "IPO", | |
"12": "Legal | Regulation", | |
"13": "M&A | Investments", | |
"14": "Macro", | |
"15": "Markets", | |
"16": "Politics", | |
"17": "Personnel Change", | |
"18": "Stock Commentary", | |
"19": "Stock Movement", | |
} | |
user_name = [ | |
"Investing.com", | |
"(((The Daily Shot)))", | |
"Bloomberg Markets", | |
"FirstSquawk", | |
"MarketWatch", | |
"markets", | |
"FinancialTimes", | |
"CNBC", | |
"ReutersBiz", | |
"BreakingNews", | |
"LiveSquawk", | |
"NYSE", | |
"WSJmarkets", | |
"FT", | |
"TheStreet", | |
"ftfinancenews", | |
"BloombergTV", | |
"Nasdaq", | |
"NYSE", | |
"federalreserve", | |
"NewYorkFed", | |
"sffed", | |
"WSJCentralBanks", | |
"RichmondFed", | |
"ecb", | |
"stlouisfed", | |
"WorldBank", | |
"MarketCurrents", | |
"OpenOutcrier", | |
"BullTradeFinder", | |
"WallStChatter", | |
"Briefingcom", | |
"SeekingAlpha", | |
"realDonaldTrump", | |
"AswathDamodaran", | |
"ukarlewitz", | |
"alphatrends", | |
"Investor666", | |
"ACInvestorBlog", | |
"ZorTrades", | |
"ScottNations", | |
"TradersCorner", | |
"TraderGoalieOne", | |
"option_snipper", | |
"jasonleavitt", | |
"LMT978", | |
"OptionsHawk", | |
"andrewbtodd", | |
"Terri1618", | |
"SunriseTrader", | |
"traderstewie", | |
"TMLTrader", | |
"IncredibleTrade", | |
"NYFedResearch", | |
"YahooFinance", | |
"business", | |
"economics", | |
"IMFNews", | |
"Market_Screener", | |
"QuickTake", | |
"NewsFromBW", | |
"BNCommodities", | |
] | |
user_id = [ | |
"988955288", | |
"423769635", | |
"69620713", | |
"59393368", | |
"3295423333", | |
"624413", | |
"69620713", | |
"4898091", | |
"20402945", | |
"15110357", | |
"6017542", | |
"21323268", | |
"28164923", | |
"18949452", | |
"15281391", | |
"11014272", | |
"35002876", | |
"18639734", | |
"21323268", | |
"26538229", | |
"15072071", | |
"117237387", | |
"327484803", | |
"16532451", | |
"83466368", | |
"71567590", | |
"27860681", | |
"15296897", | |
"2334614718", | |
"2222635612", | |
"3382363841", | |
"72928001", | |
"23059499", | |
"25073877", | |
"33216611", | |
"37284991", | |
"15246621", | |
"293458690", | |
"55561590", | |
"18560146", | |
"244978426", | |
"85523269", | |
"276714687", | |
"2806294664", | |
"16205561", | |
"1064700308", | |
"61342056", | |
"184126162", | |
"405820375", | |
"787439438964068352", | |
"52166809", | |
"2715646770", | |
"47247213", | |
"374672240", | |
"19546277", | |
"34713362", | |
"144274618", | |
"25098482", | |
"102325185", | |
"252751061", | |
"976297820532518914", | |
"804556370", | |
] | |
def convert_user_names(user_name: list): | |
'''convert user_names to tweepy format''' | |
users = [] | |
for user in user_name: | |
users.append(f"from:{user}") | |
return " OR ".join(users) |