File size: 1,922 Bytes
88b53a8
4862b9f
a942c83
e9eaff4
 
 
 
 
 
f4781ea
e9eaff4
 
 
 
 
 
 
5f87533
a942c83
f4781ea
a942c83
e9eaff4
f4781ea
e9eaff4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from transformers import AutoModelForCausalLM, AutoTokenizer

from pdfminer.high_level import extract_text
def get_pdf_text(files):
    full_text = ""
    for file in files:
            text = extract_text(file)
            text = text.replace("\n", " ")
            full_text = text + full_text
    return full_text

st.title("Embedding Creation for Langchain")
st.header("File Upload")
files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
    
if files:
    st.header("Start Conversion")
    if st.button("Ready!"):
       with st.spinner("Creating chain..."):
        full_text = get_pdf_text(files)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
        chunks = text_splitter.split_text(full_text)
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = FAISS.from_texts(chunks, embeddings)
        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
        llm = AutoModelForCausalLM.from_pretrained("red1xe/Llama-2-7B-codeGPT")
        chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="retrieval-qa",
            retriever=vectorstore.as_retriever(),
            memory=memory,  
        )
        st.success("Done!")
        st.header("Start Chat")
        st.subheader("Ask a question")
        question = st.text_input("Question")
        if st.button("Ask"):
            with st.spinner("Thinking..."):
                answer = chain.query(question)
                st.success(answer)