Spaces:
No application file
No application file
kristada673
commited on
Commit
•
db82306
1
Parent(s):
4ffaa17
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, gradio
|
2 |
+
from langchain.document_loaders import UnstructuredPDFLoader
|
3 |
+
from langchain.indexes import VectorstoreIndexCreator
|
4 |
+
from vectorstore import VectorstoreIndexCreator
|
5 |
+
|
6 |
+
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
|
7 |
+
|
8 |
+
text_folder = '10K_Annual_Reports'
|
9 |
+
loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
|
10 |
+
|
11 |
+
# Create the index, if it does not exist, and save it
|
12 |
+
if not os.path.isfile('VectorStoreIndex/chroma-embeddings.parquet'):
|
13 |
+
from langchain.vectorstores import Chroma
|
14 |
+
index = VectorstoreIndexCreator(vectorstore_cls=Chroma, vectorstore_kwargs={ "persist_directory": "VectorStoreIndex/"}).from_loaders(loaders)
|
15 |
+
index.vectorstore.persist()
|
16 |
+
|
17 |
+
# Load the saved index
|
18 |
+
index_saved = VectorstoreIndexCreator().from_persistent_index("VectorStoreIndex/")
|
19 |
+
|
20 |
+
description = """This is an AI conversational agent where you provide it with the annual reports of companies, and it can study it and answer any questions
|
21 |
+
you have about it. Currently, the LLM has been trained on the following companies' 10-K reports: Amazon, Apple, Alphabet (Google), Meta (Facebook), Microsoft,
|
22 |
+
Netflix and Tesla.' I plan to include more companies' 10-K reports in future.
|
23 |
+
|
24 |
+
Once the LLM is trained on a new 10-K report, it stores the vector embeddings of the document locally using ChromaDB to make the querying faster and also to
|
25 |
+
save time and money on creating the vector embeddings for the same document in future.
|
26 |
+
|
27 |
+
The LLM's universe is only the 10-K reports it has been trained on; it cannot pull information from the internet. So, you can ask it about anything that's
|
28 |
+
contained in their 10-K reports. If it cannot find an answer to your query within the 10-K reports, it will reply with "I don't know". Some example of questions
|
29 |
+
you can ask are:
|
30 |
+
|
31 |
+
- What are the risks for Tesla?
|
32 |
+
- What was Google's earnings for the last fiscal year?
|
33 |
+
- Who are the competetors of Apple?
|
34 |
+
|
35 |
+
An example of querying about something the LLM's training did not include:
|
36 |
+
|
37 |
+
- Query: "What is Tesco?"
|
38 |
+
- Response: " Tesco is not mentioned in the context, so I don't know."
|
39 |
+
"""
|
40 |
+
|
41 |
+
def chat_response(query):
|
42 |
+
return index_saved.query(query)
|
43 |
+
|
44 |
+
interface = gradio.Interface(fn=chat_response, inputs="text", outputs="text", title='Annual Reports GPT', description=description)
|
45 |
+
|
46 |
+
interface.launch() #server_name="0.0.0.0", server_port=8080, share=True)
|