import streamlit as st from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.memory import ConversationBufferMemory from langchain.llms import HuggingFaceHub from langchain.chains import RetrievalQA from transformers import AutoModelForCausalLM, AutoTokenizer from pdfminer.high_level import extract_text def get_pdf_text(files): full_text = "" for file in files: text = extract_text(file) text = text.replace("\n", " ") full_text = text + full_text return full_text st.title("Embedding Creation for Langchain") st.header("File Upload") files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf") if files: st.header("Start Conversion") if st.button("Ready!"): with st.spinner("Creating chain..."): full_text = get_pdf_text(files) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150) chunks = text_splitter.split_text(full_text) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_texts(chunks, embeddings) memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,) def retrieve_info(query): similar_response = db.similarity_search(query, k=3) page_contents_array = [doc.page_contents for doc in similar_response] page_contents = " ".join(page_contents_array) return page_contents st.header("Chatbot") st.subheader("Ask a question") question = st.text_input("Question") if question: st.subheader("Answer") answer = retrieve_info(question) st.write(answer)