red1xe commited on
Commit
e9eaff4
1 Parent(s): 5beeb23

some changes

Browse files
Files changed (1) hide show
  1. app.py +38 -7
app.py CHANGED
@@ -1,16 +1,47 @@
1
  import streamlit as st
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
 
 
 
 
 
 
4
  from pdfminer.high_level import extract_text
 
 
 
 
 
 
 
5
 
6
  st.title("Embedding Creation for Langchain")
7
  st.header("File Upload")
8
  files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
9
-
10
  if files:
11
- st.header("PDFs to Text")
12
- if st.button("Convert"):
13
- for file in files:
14
- text = extract_text(file)
15
- full_text = text.replace("\n", " ")
16
- st.write(full_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.memory import ConversationBufferMemory
6
+ from langchain.llms import HuggingFaceHub
7
+ from langchain.chains import RetrievalQA
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
+
10
  from pdfminer.high_level import extract_text
11
+ def get_pdf_text(files):
12
+ full_text = ""
13
+ for file in files:
14
+ text = extract_text(file)
15
+ text = text.replace("\n", " ")
16
+ full_text = text + full_text
17
+ return full_text
18
 
19
  st.title("Embedding Creation for Langchain")
20
  st.header("File Upload")
21
  files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
22
+
23
  if files:
24
+ st.header("Start Conversion")
25
+ if st.button("Ready!"):
26
+ with st.spinner("Creating chain..."):
27
+ full_text = get_pdf_text(files)
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
29
+ chunks = text_splitter.split_text(full_text)
30
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
+ vectorstore = FAISS.from_texts(chunks, embeddings)
32
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
33
+ llm = AutoModelForCausalLM.from_pretrained("red1xe/Llama-2-7B-codeGPT")
34
+ chain = RetrievalQA.from_chain_type(
35
+ llm=llm,
36
+ chain_type="retrieval-qa",
37
+ retriever=vectorstore.as_retriever(),
38
+ memory=memory,
39
+ )
40
+ st.success("Done!")
41
+ st.header("Start Chat")
42
+ st.subheader("Ask a question")
43
+ question = st.text_input("Question")
44
+ if st.button("Ask"):
45
+ with st.spinner("Thinking..."):
46
+ answer = chain.query(question)
47
+ st.success(answer)