rag_system / app.py
yashasgupta's picture
Update app.py
20e3703 verified
# import streamlit as st
# from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
# from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
# import os
# import nltk
# import io
# import fitz
# nltk.download("punkt")
# st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
# st.header("AI Chatbot :robot_face:")
# os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
# # Creating a template
# chat_template = ChatPromptTemplate.from_messages([
# # System Message establishes bot's role and general behavior guidelines
# SystemMessage(content="""You are a Helpful AI Bot.
# You take the context and question from user. Your answer should be based on the specific context."""),
# # Human Message Prompt Template
# HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
# Context:
# {context}
# Question:
# {question}
# Answer: """)
# ])
# #user's question.
# #how many results we want to print.
# from langchain_google_genai import ChatGoogleGenerativeAI
# chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
# from langchain_core.output_parsers import StrOutputParser
# output_parser = StrOutputParser()
# chain = chat_template | chat_model | output_parser
# from langchain_community.document_loaders import PDFMinerLoader
# from langchain_text_splitters import NLTKTextSplitter
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain_community.vectorstores import Chroma
# from langchain_core.runnables import RunnablePassthrough
# def extract_text_from_pdf(pdf_file):
# document = fitz.open(stream=pdf_file, filetype="pdf")
# text = ""
# for page_num in range(len(document)):
# page = document.load_page(page_num)
# text += page.get_text()
# return text
# uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
# if uploaded_file is not None:
# pdf_file = io.BytesIO(uploaded_file.read())
# text = extract_text_from_pdf(pdf_file)
# #pdf_loader = PDFMinerLoader(pdf_file)
# #dat_nik = pdf_loader.load()
# text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
# chunks = text_splitter.split_documents([text])
# embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
# db.persist()
# db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
# retriever = db_connection.as_retriever(search_kwargs={"k": 5})
# def format_docs(docs):
# return "\n\n".join(doc.page_content for doc in docs)
# rag_chain = (
# {"context": retriever | format_docs, "question": RunnablePassthrough()}
# | chat_template
# | chat_model
# | output_parser
# )
# user_input = st.text_area("Ask Questions to AI")
# if st.button("Submit"):
# st.subheader(":green[Query:]")
# st.subheader(user_input)
# response = rag_chain.invoke(user_input)
# st.subheader(":green[Response:-]")
# st.write(response)
##################################################### chatgpt code model #############################################
import streamlit as st
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import os
import nltk
import io
import fitz
nltk.download("punkt")
st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
st.header("AI Chatbot :robot_face:")
# Set up environment variables
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
# Creating a template
chat_template = ChatPromptTemplate.from_messages([
SystemMessage(content="""You are a Helpful AI Bot.
You take the context and question from user. Your answer should be based on the specific context."""),
HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
Context:
{context}
Question:
{question}
Answer: """)
])
# Initialize chat model
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
# Initialize output parser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
# Initialize the chain
chain = chat_template | chat_model | output_parser
# Initialize document loaders and splitters
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
def extract_text_from_pdf(pdf_file):
document = fitz.open(stream=pdf_file, filetype="pdf")
text = ""
for page_num in range(len(document)):
page = document.load_page(page_num)
text += page.get_text()
return text
# Streamlit file uploader
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Extract text from the uploaded PDF
pdf_file = io.BytesIO(uploaded_file.read())
text = extract_text_from_pdf(pdf_file)
# Split the document into chunks
text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents([text])
# Initialize embeddings and vectorstore
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db")
print(f"Current working directory: {os.getcwd()}")
# Check if the 'static' directory exists
if not os.path.exists('static'):
print("'static' directory does not exist. Creating it...")
os.makedirs('static')
db.persist()
db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| chat_template
| chat_model
| output_parser
)
user_input = st.text_area("Ask Questions to AI")
if st.button("Submit"):
st.subheader(":green[Query:]")
st.subheader(user_input)
response = rag_chain.invoke({"question": user_input})
st.subheader(":green[Response:]")
st.write(response)
else:
st.write("Please upload a PDF file to get started.")