red1xe commited on
Commit
5949a92
1 Parent(s): 4862b9f

some changes has been done

Browse files
Files changed (1) hide show
  1. app.py +28 -24
app.py CHANGED
@@ -1,40 +1,36 @@
1
  import os
2
  import time
3
  import streamlit as st
4
- from dotenv import load_dotenv
5
  from htmlTemplates import css, bot_template, user_template
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import Chroma
8
  from langchain.memory import ConversationBufferMemory
9
- from langchain.prompts import PromptTemplate
10
  from langchain.chains import RetrievalQA
11
- from langchain.llms import HuggingFaceHub
12
- from langchain import PromptTemplate
13
  from pdfminer.high_level import extract_text
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
15
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
16
 
17
 
18
  # Updated Prompt Template
19
- template = """You are an expert on TeamCenter. Use the following pieces of context to answer the question at the end.
20
- If you don't know the answer, it's okay to say that you don't know. Please don't try to make up an answer.
21
- Use two sentences minimum and keep the answer as concise as possible (maximum 200 characters each).
22
- Always use proper grammar and punctuation. End of the answer always say "End of answer" (without quotes).
23
 
24
- Context:
25
- {context}
26
 
27
- Question: {question}
28
- Helpful Answer (Two sentences minimum, maximum 200 characters each):"""
29
 
30
- tokenizer = AutoTokenizer.from_pretrained("red1xe/falcon-7b-codeGPT-3K")
31
- model = AutoModelForSeq2SeqLM.from_pretrained("red1xe/falcon-7b-codeGPT-3K")
32
- ## QA_CHAIN_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
33
 
34
- load_dotenv()
35
- persist_directory = os.environ.get('PERSIST_DIRECTORY')
36
- embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME")
37
- model_path = os.environ.get('MODEL_PATH')
 
 
 
 
 
38
 
39
  def get_vector_store(target_source_chunks):
40
  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
@@ -96,10 +92,18 @@ def main():
96
 
97
  if st.button('Start Chain'):
98
  with st.spinner('Working in progress ...'):
99
- vector_store = get_vector_store(target_source_chunks)
100
- st.session_state.conversation = get_conversation_chain(
101
- retriever=vector_store,
102
- )
 
 
 
 
 
 
 
 
103
 
104
  if user_question:
105
  handle_userinput(user_question)
 
1
  import os
2
  import time
3
  import streamlit as st
 
4
  from htmlTemplates import css, bot_template, user_template
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.vectorstores import Chroma
7
  from langchain.memory import ConversationBufferMemory
 
8
  from langchain.chains import RetrievalQA
 
 
9
  from pdfminer.high_level import extract_text
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
12
 
13
 
14
  # Updated Prompt Template
 
 
 
 
15
 
16
+ tokenizer = AutoTokenizer.from_pretrained("red1xe/Llama-2-7B-codeGPT")
17
+ model = AutoModelForCausalLM.from_pretrained("red1xe/Llama-2-7B-codeGPT")
18
 
19
+ persist_directory = 'db'
20
+ embeddings_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
21
 
22
+ def get_pdf_text(pdf_path):
23
+ return extract_text(pdf_path)
 
24
 
25
+ def get_pdf_text_chunks(pdf_text):
26
+ text_splitter = RecursiveCharacterTextSplitter()
27
+ return text_splitter.split_text(text=pdf_text, max_chunk_length=1000, min_chunk_length=100, overlap_length=100)
28
+
29
+ def create_vector_store(target_source_chunks):
30
+ embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
31
+ db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
32
+ db.add(target_source_chunks)
33
+ return db
34
 
35
  def get_vector_store(target_source_chunks):
36
  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
 
92
 
93
  if st.button('Start Chain'):
94
  with st.spinner('Working in progress ...'):
95
+ pdf_file = st.file_uploader("Upload PDF", type=['pdf'])
96
+ if pdf_file is not None:
97
+ pdf_text = get_pdf_text(pdf_file)
98
+ pdf_text_chunks = get_pdf_text_chunks(pdf_text)
99
+ st.session_state.vector_store = create_vector_store(pdf_text_chunks)
100
+ st.session_state.conversation = get_conversation_chain(
101
+ retriever=st.session_state.vector_store,
102
+ )
103
+ st.success('Vectorstore created successfully! You can start chatting now!')
104
+ else:
105
+ st.warning('Please upload a PDF file first!')
106
+
107
 
108
  if user_question:
109
  handle_userinput(user_question)