import os import asyncio from concurrent.futures import ThreadPoolExecutor from fastapi import FastAPI, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware import numpy as np from PIL import Image from paddleocr import PaddleOCR from doctr.io import DocumentFile from doctr.models import ocr_predictor import io app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"] ) app.add_middleware(GZipMiddleware, minimum_size=1000) # Initialize models once at startup ocr_model = ocr_predictor(pretrained=True) paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True) # Get the number of available CPUs num_cpus = os.cpu_count() # Initialize ThreadPoolExecutor with dynamic number of workers executor = ThreadPoolExecutor(max_workers=num_cpus) def ocr_with_doctr(file): text_output = '' doc = DocumentFile.from_pdf(file) result = ocr_model(doc) for page in result.pages: for block in page.blocks: for line in block.lines: text_output += " ".join([word.value for word in line.words]) + "\n" return text_output def ocr_with_paddle(img): finaltext = '' result = paddle_ocr.ocr(img) for i in range(len(result[0])): text = result[0][i][1][0] finaltext += ' ' + text return finaltext def generate_text_from_image(img): return ocr_with_paddle(img) async def run_blocking_func(func, *args): loop = asyncio.get_event_loop() return await loop.run_in_executor(executor, func, *args) @app.post("/ocr/") async def perform_ocr(file: UploadFile = File(...)): file_bytes = await file.read() if file.filename.endswith('.pdf'): text_output = await run_blocking_func(ocr_with_doctr, io.BytesIO(file_bytes)) else: img = np.array(Image.open(io.BytesIO(file_bytes))) text_output = await run_blocking_func(generate_text_from_image, img) return {"ocr_text": text_output} @app.get("/test/") async def test_call(): return {"message": "Hi. I'm running"}