Spaces:
Runtime error
Runtime error
# chainlit run app.py -w | |
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python) | |
# OpenAI Chat completion | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
import sys | |
import getpass | |
# import nest_asyncio | |
# import pandas as pd | |
import faiss | |
import openai | |
import chainlit as cl # importing chainlit for our app | |
# https://docs.chainlit.io/api-reference/step-class#update-a-step | |
# DEPRICATED: from chainlit.prompt import Prompt, PromptMessage # importing prompt tools | |
import llama_index | |
from llama_index.core import Settings | |
from llama_index.core import VectorStoreIndex | |
from llama_index.core import StorageContext | |
from llama_index.vector_stores.faiss import FaissVectorStore | |
from llama_index.core import set_global_handler | |
from llama_index.core.node_parser import MarkdownElementNodeParser | |
from llama_index.llms.openai import OpenAI | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
# from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker | |
from llama_parse import LlamaParse | |
from openai import AsyncOpenAI # importing openai for API usage | |
# The following line is needed to run locally. Without it, it finds the GPU cards of my PC. | |
# os.environ["CUDA_VISIBLE_DEVICES"] = "" | |
# GET KEYS | |
LLAMA_CLOUD_API_KEY= os.getenv('LLAMA_CLOUD_API_KEY') | |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY") | |
""" | |
os.environ["LLAMA_CLOUD_API_KEY"] = getpass.getpass("LLamaParse API Key:") | |
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:") | |
# os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ") | |
""" | |
# nest_asyncio.apply() #not needed for the app | |
# PARSING the pdf file with LlamaParse | |
parser = LlamaParse( | |
result_type="markdown", | |
verbose=True, | |
language="en", | |
num_workers=2, | |
) | |
nvidia_docs = parser.load_data(["./nvidia_2tables.pdf"]) | |
# Note: nvidia_docs contains only one file (it could contain more). nvidia_docs[0] is the pdf we loaded. | |
# print(nvidia_docs[0].text[:1000]) | |
# Getting Settings out of llama_index.core which is a major part of their v0.10 update! | |
Settings.llm = OpenAI(model="gpt-3.5-turbo") | |
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") | |
# Using MarkdownElementNodeParser to help make sense of our Markdown objects so we can leverage the potentially structured information in the parsed documents. | |
# Unclear if the following is needed as I do not know if there are Markdown objects | |
node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo"), num_workers=8) | |
nodes = node_parser.get_nodes_from_documents(documents=[nvidia_docs[0]]) | |
""" | |
# Let's see what's in the metadata of the nodes: | |
for nd in nodes: | |
print(nd.metadata) | |
for k,v in nd: | |
if k=='table_df': | |
print(nd) | |
""" | |
# Now we extract our `base_nodes` and `objects` to create the `VectorStoreIndex`. | |
base_nodes, objects = node_parser.get_nodes_and_objects(nodes) | |
# We could use the VectorStoreIndex from llama_index.core | |
# Or we can use the llama_index FAISS llama-index-vector-stores-faiss | |
# Here we will use the faiss, and setting its vectors' dimension. | |
faiss_dim = 1536 | |
faiss_index = faiss.IndexFlatL2(faiss_dim) # default param overwrite=False, so it will append new vector. | |
# Parameter "overwrite=True" suppresses appending a vector. | |
# Creating the FaissVectorStore and its recursicve_index_faiss | |
llama_faiss_vector_store = FaissVectorStore(faiss_index=faiss_index) | |
storage_context = StorageContext.from_defaults(vector_store=llama_faiss_vector_store) | |
recursive_index_faiss = VectorStoreIndex(nodes=base_nodes+objects, storage_context=storage_context) | |
# Now we can build our Recursive Query Engine with reranking! | |
# We'll need to do a couple steps: | |
# 1. Initalize our reranker using `FlagEmbeddingReranker` powered by the `BAAI/bge-reranker-large`. | |
# 2. Set up our recursive query engine! | |
# Will attempt to not use the reranker to see if it will not time-out on huggingface. | |
# reranker = FlagEmbeddingReranker( | |
# top_n=5, | |
# model="BAAI/bge-reranker-large", | |
# ) | |
recursive_query_engine = recursive_index_faiss.as_query_engine( | |
similarity_top_k=5, | |
# we will not post_precess the answer with the reranker: It takes too long... | |
# node_postprocessors=[reranker], | |
verbose=True | |
) | |
""" | |
# Create pandas dataframe to store query+generated response+added truth | |
columns=["Query", "Response", "Truth"] | |
gen_df = pd.DataFrame(columns=columns,dtype='str') | |
""" | |
# ChatOpenAI Templates | |
system_template = """Use the following pieces of context to answer the user's question. | |
If you don't know the answer, say that you don't know, do not try to make up an answer. | |
ALWAYS return a "SOURCES" part in your answer. | |
The "SOURCES" part should be a reference to the source inside the document from which you got your answer. | |
You are a helpful assistant who always speaks in a pleasant tone! """ | |
user_template = """ Think through your response step by step.""" | |
#user_query = "Who are the E-VP, Operations - and how old are they?" | |
""" test function | |
def retriever_resp(prompt): | |
import time | |
response = "this is my response" | |
time.sleep(5) | |
return response | |
""" | |
# marks a function that should be run each time the chatbot receives a message from a user | |
async def main(message: cl.Message): | |
settings = cl.user_session.get("settings") | |
# user_query is populated from what the user types | |
user_query = message.content | |
# Add instructions before and after the user query which will not show in the app. | |
prompt = system_template+user_query+user_template | |
response = recursive_query_engine.query(prompt) | |
str_resp ="{}".format(response) | |
msg = cl.Message(content= str_resp) | |
await msg.send() |