Spaces:

yashasgupta
/

rag_system

Sleeping

App Files Files Community

rag_system / app.py

yashasgupta

Update app.py

8f74b08 verified 4 months ago

raw

history blame

3.33 kB

	import streamlit as st
	from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
	from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
	import os
	import nltk
	nltk.download("punkt")

	st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
	st.header("AI Chatbot :robot_face:")

	os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
	# Creating a template

	chat_template = ChatPromptTemplate.from_messages([
	# System Message establishes bot's role and general behavior guidelines
	SystemMessage(content="""You are a Helpful AI Bot.
	You take the context and question from user. Your answer should be based on the specific context."""),
	# Human Message Prompt Template
	HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
	Context:
	{context}

	Question:
	{question}

	Answer: """)
	])

	#user's question.
	#how many results we want to print.

	from langchain_google_genai import ChatGoogleGenerativeAI

	chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")

	from langchain_core.output_parsers import StrOutputParser

	output_parser = StrOutputParser()

	chain = chat_template \| chat_model \| output_parser

	from langchain_community.document_loaders import PDFMinerLoader
	from langchain_text_splitters import NLTKTextSplitter

	uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")

	pdf_loader = PDFMinerLoader(uploaded_file)
	dat_nik = pdf_loader.load()
	text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
	chunks = test_splitter.split_documents(dat_nik)

	# dat = PDFMinerLoader("2404.07143.pdf")
	# dat_nik =dat.load()
	# # Split the document into chunks


	# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)

	# chunks = text_splitter.split_documents(dat_nik)
	# Creating Chunks Embedding
	# We are just loading OpenAIEmbeddings
	from langchain_google_genai import GoogleGenerativeAIEmbeddings

	embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

	# vectors = embeddings.embed_documents(chunks)
	# Store the chunks in vector store
	from langchain_community.vectorstores import Chroma

	# Creating a New Chroma Database
	db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")

	# saving the database on drive
	db.persist()
	# Setting a Connection with the ChromaDB
	db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
	# Converting CHROMA db_connection to Retriever Object, which retrieves top 5 results
	retriever = db_connection.as_retriever(search_kwargs={"k": 5})


	from langchain_core.runnables import RunnablePassthrough #takes user's question.

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	# format chunks: takes the 5 results, combines all the chunks and displays one output.
	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| chat_template
	\| chat_model
	\| output_parser
	)

	user_input = st.text_area("Ask Questions to AI")
	if st.button("Submit"):
	st.subheader(":green[Query:]")
	st.subheader(user_input)
	response = rag_chain.invoke(user_input)
	st.subheader(":green[Response:-]")
	st.write(response)