Spaces:
Sleeping
Sleeping
File size: 6,913 Bytes
2a18488 8ce02a3 1501391 6483701 fa2ec69 4e4a24d f02f8c6 8ce02a3 2a18488 8ce02a3 2a18488 8ce02a3 2a18488 8ce02a3 1501391 8ce02a3 2a18488 8ce02a3 2a18488 8ce02a3 2a18488 1501391 8ce02a3 281101c cfb6e62 4e4a24d cfb6e62 2a18488 bd8a766 4a2a968 2a18488 281101c 4e4a24d 2a18488 87901a4 281101c 2a18488 281101c 2a18488 20e3703 281101c 2a18488 281101c cfb6e62 2a18488 cfb6e62 2a18488 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# import streamlit as st
# from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
# from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
# import os
# import nltk
# import io
# import fitz
# nltk.download("punkt")
# st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
# st.header("AI Chatbot :robot_face:")
# os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
# # Creating a template
# chat_template = ChatPromptTemplate.from_messages([
# # System Message establishes bot's role and general behavior guidelines
# SystemMessage(content="""You are a Helpful AI Bot.
# You take the context and question from user. Your answer should be based on the specific context."""),
# # Human Message Prompt Template
# HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
# Context:
# {context}
# Question:
# {question}
# Answer: """)
# ])
# #user's question.
# #how many results we want to print.
# from langchain_google_genai import ChatGoogleGenerativeAI
# chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
# from langchain_core.output_parsers import StrOutputParser
# output_parser = StrOutputParser()
# chain = chat_template | chat_model | output_parser
# from langchain_community.document_loaders import PDFMinerLoader
# from langchain_text_splitters import NLTKTextSplitter
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain_community.vectorstores import Chroma
# from langchain_core.runnables import RunnablePassthrough
# def extract_text_from_pdf(pdf_file):
# document = fitz.open(stream=pdf_file, filetype="pdf")
# text = ""
# for page_num in range(len(document)):
# page = document.load_page(page_num)
# text += page.get_text()
# return text
# uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
# if uploaded_file is not None:
# pdf_file = io.BytesIO(uploaded_file.read())
# text = extract_text_from_pdf(pdf_file)
# #pdf_loader = PDFMinerLoader(pdf_file)
# #dat_nik = pdf_loader.load()
# text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
# chunks = text_splitter.split_documents([text])
# embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
# db.persist()
# db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
# retriever = db_connection.as_retriever(search_kwargs={"k": 5})
# def format_docs(docs):
# return "\n\n".join(doc.page_content for doc in docs)
# rag_chain = (
# {"context": retriever | format_docs, "question": RunnablePassthrough()}
# | chat_template
# | chat_model
# | output_parser
# )
# user_input = st.text_area("Ask Questions to AI")
# if st.button("Submit"):
# st.subheader(":green[Query:]")
# st.subheader(user_input)
# response = rag_chain.invoke(user_input)
# st.subheader(":green[Response:-]")
# st.write(response)
##################################################### chatgpt code model #############################################
import streamlit as st
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import os
import nltk
import io
import fitz
nltk.download("punkt")
st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
st.header("AI Chatbot :robot_face:")
# Set up environment variables
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
# Creating a template
chat_template = ChatPromptTemplate.from_messages([
SystemMessage(content="""You are a Helpful AI Bot.
You take the context and question from user. Your answer should be based on the specific context."""),
HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
Context:
{context}
Question:
{question}
Answer: """)
])
# Initialize chat model
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
# Initialize output parser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
# Initialize the chain
chain = chat_template | chat_model | output_parser
# Initialize document loaders and splitters
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
def extract_text_from_pdf(pdf_file):
document = fitz.open(stream=pdf_file, filetype="pdf")
text = ""
for page_num in range(len(document)):
page = document.load_page(page_num)
text += page.get_text()
return text
# Streamlit file uploader
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Extract text from the uploaded PDF
pdf_file = io.BytesIO(uploaded_file.read())
text = extract_text_from_pdf(pdf_file)
# Split the document into chunks
text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents([text])
# Initialize embeddings and vectorstore
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db")
print(f"Current working directory: {os.getcwd()}")
# Check if the 'static' directory exists
if not os.path.exists('static'):
print("'static' directory does not exist. Creating it...")
os.makedirs('static')
db.persist()
db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| chat_template
| chat_model
| output_parser
)
user_input = st.text_area("Ask Questions to AI")
if st.button("Submit"):
st.subheader(":green[Query:]")
st.subheader(user_input)
response = rag_chain.invoke({"question": user_input})
st.subheader(":green[Response:]")
st.write(response)
else:
st.write("Please upload a PDF file to get started.")
|