abadesalex's picture
Update to Qdrant db
47b5f0c
raw
history blame
396 Bytes
import io
import os
from fastapi import UploadFile
import pdfplumber
class ExtractTextFeature:
@staticmethod
async def extract_text_from_pdf(file: UploadFile) -> str:
content = await file.read()
with pdfplumber.open(io.BytesIO(content)) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
return text