kristada673 commited on
Commit
db82306
1 Parent(s): 4ffaa17

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, gradio
2
+ from langchain.document_loaders import UnstructuredPDFLoader
3
+ from langchain.indexes import VectorstoreIndexCreator
4
+ from vectorstore import VectorstoreIndexCreator
5
+
6
+ os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
7
+
8
+ text_folder = '10K_Annual_Reports'
9
+ loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
10
+
11
+ # Create the index, if it does not exist, and save it
12
+ if not os.path.isfile('VectorStoreIndex/chroma-embeddings.parquet'):
13
+ from langchain.vectorstores import Chroma
14
+ index = VectorstoreIndexCreator(vectorstore_cls=Chroma, vectorstore_kwargs={ "persist_directory": "VectorStoreIndex/"}).from_loaders(loaders)
15
+ index.vectorstore.persist()
16
+
17
+ # Load the saved index
18
+ index_saved = VectorstoreIndexCreator().from_persistent_index("VectorStoreIndex/")
19
+
20
+ description = """This is an AI conversational agent where you provide it with the annual reports of companies, and it can study it and answer any questions
21
+ you have about it. Currently, the LLM has been trained on the following companies' 10-K reports: Amazon, Apple, Alphabet (Google), Meta (Facebook), Microsoft,
22
+ Netflix and Tesla.' I plan to include more companies' 10-K reports in future.
23
+
24
+ Once the LLM is trained on a new 10-K report, it stores the vector embeddings of the document locally using ChromaDB to make the querying faster and also to
25
+ save time and money on creating the vector embeddings for the same document in future.
26
+
27
+ The LLM's universe is only the 10-K reports it has been trained on; it cannot pull information from the internet. So, you can ask it about anything that's
28
+ contained in their 10-K reports. If it cannot find an answer to your query within the 10-K reports, it will reply with "I don't know". Some example of questions
29
+ you can ask are:
30
+
31
+ - What are the risks for Tesla?
32
+ - What was Google's earnings for the last fiscal year?
33
+ - Who are the competetors of Apple?
34
+
35
+ An example of querying about something the LLM's training did not include:
36
+
37
+ - Query: "What is Tesco?"
38
+ - Response: " Tesco is not mentioned in the context, so I don't know."
39
+ """
40
+
41
+ def chat_response(query):
42
+ return index_saved.query(query)
43
+
44
+ interface = gradio.Interface(fn=chat_response, inputs="text", outputs="text", title='Annual Reports GPT', description=description)
45
+
46
+ interface.launch() #server_name="0.0.0.0", server_port=8080, share=True)