cookiemonster69 commited on
Commit
1038fbb
1 Parent(s): 45b0863

Upload 3 files

Browse files
Files changed (1) hide show
  1. utils.py +0 -49
utils.py CHANGED
@@ -1,49 +0,0 @@
1
- from langchain.text_splitter import CharacterTextSplitter
2
- from langchain.embeddings import HuggingFaceEmbeddings
3
- from langchain import FAISS
4
- from langchain.chains.question_answering import load_qa_chain
5
- from langchain.llms import openai
6
- from langchain.chat_models import ChatOpenAI
7
- from langchain.callbacks import get_openai_callback
8
- from PyPDF2 import PdfReader
9
-
10
- def process_text(text):
11
- text_splitter = CharacterTextSplitter(
12
- separator="\n",
13
- chunk_size=1000,
14
- chunk_overlap=200,
15
- length_function=len
16
- )
17
-
18
- chunks = text_splitter.split_text(text)
19
-
20
- embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
21
-
22
- knowledgeBase=FAISS.from_texts(chunks,embeddings)
23
-
24
- return knowledgeBase
25
-
26
- def summarizer(pdf):
27
- if pdf is not None:
28
- pdf_reader=PdfReader(pdf)
29
- text = ""
30
-
31
- for page in pdf_reader.pages:
32
- text += page.extract_text() or ""
33
-
34
- knowledgeBase = process_text(text)
35
-
36
- query="Summarize the content of the uploaded PDF file in 10-15 sentences."
37
-
38
- if query:
39
- docs=knowledgeBase.similarity_search(query)
40
-
41
- OpenAIModel = "gpt-3.5-turbo-16k"
42
- llm = ChatOpenAI(model=OpenAIModel, temperature=0.7)
43
-
44
- chain=load_qa_chain(llm, chain_type='stuff')
45
-
46
- with get_openai_callback() as cost:
47
- response=chain.run(input_documents=docs, question=query)
48
- print(cost)
49
- return response