red1xe commited on
Commit
f4781ea
1 Parent(s): 88b53a8

PDF to text

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -1,7 +1,14 @@
1
  import streamlit as st
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
 
4
 
5
  st.title("Embedding Creation for Langchain")
6
- st.header("This is a header")
7
  files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
4
+ from pdfminer.high_level import extract_text
5
 
6
  st.title("Embedding Creation for Langchain")
7
+ st.header("File Upload")
8
  files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
9
+
10
+ if files:
11
+ st.header("PDFs to Text")
12
+ for file in files:
13
+ text = extract_text(file)
14
+ st.write(text)