Spaces:
Sleeping
Sleeping
yashasgupta
commited on
Commit
•
bd8a766
1
Parent(s):
47eafa2
Update app.py
Browse files
app.py
CHANGED
@@ -40,15 +40,24 @@ output_parser = StrOutputParser()
|
|
40 |
chain = chat_template | chat_model | output_parser
|
41 |
|
42 |
from langchain_community.document_loaders import PDFMinerLoader
|
43 |
-
dat = PDFMinerLoader("2404.07143.pdf")
|
44 |
-
dat_nik =dat.load()
|
45 |
-
# Split the document into chunks
|
46 |
-
|
47 |
from langchain_text_splitters import NLTKTextSplitter
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
chunks = text_splitter.split_documents(dat_nik)
|
52 |
# Creating Chunks Embedding
|
53 |
# We are just loading OpenAIEmbeddings
|
54 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
|
|
40 |
chain = chat_template | chat_model | output_parser
|
41 |
|
42 |
from langchain_community.document_loaders import PDFMinerLoader
|
|
|
|
|
|
|
|
|
43 |
from langchain_text_splitters import NLTKTextSplitter
|
44 |
|
45 |
+
uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
|
46 |
+
|
47 |
+
if uploaded_file is not None:
|
48 |
+
pdf_loader = PDFMinerLoader(uploaded_file)
|
49 |
+
dat_nik = pdf_loader.load()
|
50 |
+
text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
|
51 |
+
chunks = test_splitter.split_documents(dat_nik)
|
52 |
+
|
53 |
+
# dat = PDFMinerLoader("2404.07143.pdf")
|
54 |
+
# dat_nik =dat.load()
|
55 |
+
# # Split the document into chunks
|
56 |
+
|
57 |
+
|
58 |
+
# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
|
59 |
|
60 |
+
# chunks = text_splitter.split_documents(dat_nik)
|
61 |
# Creating Chunks Embedding
|
62 |
# We are just loading OpenAIEmbeddings
|
63 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|