File size: 1,922 Bytes
88b53a8 4862b9f a942c83 e9eaff4 f4781ea e9eaff4 5f87533 a942c83 f4781ea a942c83 e9eaff4 f4781ea e9eaff4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from transformers import AutoModelForCausalLM, AutoTokenizer
from pdfminer.high_level import extract_text
def get_pdf_text(files):
full_text = ""
for file in files:
text = extract_text(file)
text = text.replace("\n", " ")
full_text = text + full_text
return full_text
st.title("Embedding Creation for Langchain")
st.header("File Upload")
files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
if files:
st.header("Start Conversion")
if st.button("Ready!"):
with st.spinner("Creating chain..."):
full_text = get_pdf_text(files)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = text_splitter.split_text(full_text)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(chunks, embeddings)
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
llm = AutoModelForCausalLM.from_pretrained("red1xe/Llama-2-7B-codeGPT")
chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="retrieval-qa",
retriever=vectorstore.as_retriever(),
memory=memory,
)
st.success("Done!")
st.header("Start Chat")
st.subheader("Ask a question")
question = st.text_input("Question")
if st.button("Ask"):
with st.spinner("Thinking..."):
answer = chain.query(question)
st.success(answer) |