Spaces:
Running
Running
File size: 3,989 Bytes
52f1bcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
from typing import List
import cv2
import numpy as np
import math
import pypdfium2
from PIL import Image, ImageOps, ImageDraw
import torch
from surya.settings import settings
def convert_if_not_rgb(images: List[Image.Image]) -> List[Image.Image]:
new_images = []
for image in images:
if image.mode != "RGB":
image = image.convert("RGB")
new_images.append(image)
return new_images
def get_total_splits(image_size, processor):
img_height = list(image_size)[1]
max_height = settings.DETECTOR_IMAGE_CHUNK_HEIGHT
processor_height = processor.size["height"]
if img_height > max_height:
num_splits = math.ceil(img_height / processor_height)
return num_splits
return 1
def split_image(img, processor):
# This will not modify/return the original image - it will either crop, or copy the image
img_height = list(img.size)[1]
max_height = settings.DETECTOR_IMAGE_CHUNK_HEIGHT
processor_height = processor.size["height"]
if img_height > max_height:
num_splits = math.ceil(img_height / processor_height)
splits = []
split_heights = []
for i in range(num_splits):
top = i * processor_height
bottom = (i + 1) * processor_height
if bottom > img_height:
bottom = img_height
cropped = img.crop((0, top, img.size[0], bottom))
height = bottom - top
if height < processor_height:
cropped = ImageOps.pad(cropped, (img.size[0], processor_height), color=255, centering=(0, 0))
splits.append(cropped)
split_heights.append(height)
return splits, split_heights
return [img.copy()], [img_height]
def prepare_image_detection(img, processor):
new_size = (processor.size["width"], processor.size["height"])
# This double resize actually necessary for downstream accuracy
img.thumbnail(new_size, Image.Resampling.LANCZOS)
img = img.resize(new_size, Image.Resampling.LANCZOS) # Stretch smaller dimension to fit new size
img = np.asarray(img, dtype=np.uint8)
img = processor(img)["pixel_values"][0]
img = torch.from_numpy(img)
return img
def open_pdf(pdf_filepath):
return pypdfium2.PdfDocument(pdf_filepath)
def get_page_images(doc, indices: List, dpi=settings.IMAGE_DPI):
renderer = doc.render(
pypdfium2.PdfBitmap.to_pil,
page_indices=indices,
scale=dpi / 72,
)
images = list(renderer)
images = [image.convert("RGB") for image in images]
return images
def slice_bboxes_from_image(image: Image.Image, bboxes):
lines = []
for bbox in bboxes:
line = image.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
if line.size[0] == 0:
print(f"Warning: found an empty line with bbox {bbox}")
lines.append(line)
return lines
def slice_polys_from_image(image: Image.Image, polys):
image_array = np.array(image, dtype=np.uint8)
lines = []
for idx, poly in enumerate(polys):
lines.append(slice_and_pad_poly(image_array, poly))
return lines
def slice_and_pad_poly(image_array: np.array, coordinates):
# Draw polygon onto mask
coordinates = [(corner[0], corner[1]) for corner in coordinates]
bbox = [min([x[0] for x in coordinates]), min([x[1] for x in coordinates]), max([x[0] for x in coordinates]), max([x[1] for x in coordinates])]
# We mask out anything not in the polygon
cropped_polygon = image_array[bbox[1]:bbox[3], bbox[0]:bbox[2]].copy()
coordinates = [(x - bbox[0], y - bbox[1]) for x, y in coordinates]
# Pad the area outside the polygon with the pad value
mask = np.zeros(cropped_polygon.shape[:2], dtype=np.uint8)
cv2.fillPoly(mask, [np.int32(coordinates)], 1)
mask = np.stack([mask] * 3, axis=-1)
cropped_polygon[mask == 0] = settings.RECOGNITION_PAD_VALUE
rectangle_image = Image.fromarray(cropped_polygon)
return rectangle_image
|