Spaces:

Jiangxz01
/

SuryaOCR

Running

App Files Files

SuryaOCR / surya /postprocessing /heatmap.py

Jiangxz01

Upload 56 files

52f1bcb verified 24 days ago

raw

history blame

7.72 kB

	from typing import List, Tuple

	import numpy as np
	import cv2
	import math
	from PIL import ImageDraw, ImageFont

	from surya.postprocessing.fonts import get_font_path
	from surya.postprocessing.util import rescale_bbox
	from surya.schema import PolygonBox
	from surya.settings import settings
	from surya.postprocessing.text import get_text_size


	def keep_largest_boxes(boxes: List[PolygonBox]) -> List[PolygonBox]:
	new_boxes = []
	for box_obj in boxes:
	box = box_obj.bbox
	box_area = (box[2] - box[0]) * (box[3] - box[1])
	contained = False
	for other_box_obj in boxes:
	if other_box_obj.polygon == box_obj.polygon:
	continue

	other_box = other_box_obj.bbox
	other_box_area = (other_box[2] - other_box[0]) * (other_box[3] - other_box[1])
	if box == other_box:
	continue
	# find overlap percentage
	overlap = box_obj.intersection_pct(other_box_obj)
	if overlap > .9 and box_area < other_box_area:
	contained = True
	break
	if not contained:
	new_boxes.append(box_obj)
	return new_boxes


	def clean_contained_boxes(boxes: List[PolygonBox]) -> List[PolygonBox]:
	new_boxes = []
	for box_obj in boxes:
	box = box_obj.bbox
	contained = False
	for other_box_obj in boxes:
	if other_box_obj.polygon == box_obj.polygon:
	continue

	other_box = other_box_obj.bbox
	if box == other_box:
	continue
	if box[0] >= other_box[0] and box[1] >= other_box[1] and box[2] <= other_box[2] and box[3] <= other_box[3]:
	contained = True
	break
	if not contained:
	new_boxes.append(box_obj)
	return new_boxes


	def get_dynamic_thresholds(linemap, text_threshold, low_text, typical_top10_avg=0.7):
	# Find average intensity of top 10% pixels
	flat_map = linemap.ravel()
	top_10_count = int(len(flat_map) * 0.9)
	avg_intensity = np.mean(np.partition(flat_map, top_10_count)[top_10_count:])
	scaling_factor = np.clip(avg_intensity / typical_top10_avg, 0, 1) ** (1 / 2)

	low_text = np.clip(low_text * scaling_factor, 0.1, 0.6)
	text_threshold = np.clip(text_threshold * scaling_factor, 0.15, 0.8)

	return text_threshold, low_text


	def detect_boxes(linemap, text_threshold, low_text):
	# From CRAFT - https://github.com/clovaai/CRAFT-pytorch
	# Modified to return boxes and for speed, accuracy
	img_h, img_w = linemap.shape

	text_threshold, low_text = get_dynamic_thresholds(linemap, text_threshold, low_text)

	text_score_comb = (linemap > low_text).astype(np.uint8)
	label_count, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb, connectivity=4)

	det = []
	confidences = []
	max_confidence = 0

	for k in range(1, label_count):
	# size filtering
	size = stats[k, cv2.CC_STAT_AREA]
	if size < 10:
	continue

	# make segmentation map
	x, y, w, h = stats[k, [cv2.CC_STAT_LEFT, cv2.CC_STAT_TOP, cv2.CC_STAT_WIDTH, cv2.CC_STAT_HEIGHT]]

	try:
	niter = int(np.sqrt(min(w, h)))
	except ValueError:
	niter = 0

	buffer = 1
	sx, sy = max(0, x - niter - buffer), max(0, y - niter - buffer)
	ex, ey = min(img_w, x + w + niter + buffer), min(img_h, y + h + niter + buffer)

	mask = (labels[sy:ey, sx:ex] == k)
	selected_linemap = linemap[sy:ey, sx:ex][mask]
	line_max = np.max(selected_linemap)

	# thresholding
	if line_max < text_threshold:
	continue

	segmap = mask.astype(np.uint8)

	ksize = buffer + niter
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(ksize, ksize))
	selected_segmap = cv2.dilate(segmap, kernel)

	# make box
	indices = np.nonzero(selected_segmap)
	x_inds = indices[1] + sx
	y_inds = indices[0] + sy
	np_contours = np.column_stack((x_inds, y_inds))
	rectangle = cv2.minAreaRect(np_contours)
	box = cv2.boxPoints(rectangle)

	# align diamond-shape
	w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
	box_ratio = max(w, h) / (min(w, h) + 1e-5)
	if abs(1 - box_ratio) <= 0.1:
	l, r = min(np_contours[:, 0]), max(np_contours[:, 0])
	t, b = min(np_contours[:, 1]), max(np_contours[:, 1])
	box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)

	# make clock-wise order
	startidx = box.sum(axis=1).argmin()
	box = np.roll(box, 4-startidx, 0)
	box = np.array(box)

	confidence = line_max
	max_confidence = max(max_confidence, line_max)

	confidences.append(confidence)
	det.append(box)

	if max_confidence > 0:
	confidences = [c / max_confidence for c in confidences]
	return det, confidences


	def get_detected_boxes(textmap, text_threshold=None, low_text=None) -> List[PolygonBox]:
	if text_threshold is None:
	text_threshold = settings.DETECTOR_TEXT_THRESHOLD

	if low_text is None:
	low_text = settings.DETECTOR_BLANK_THRESHOLD

	textmap = textmap.copy()
	textmap = textmap.astype(np.float32)
	boxes, confidences = detect_boxes(textmap, text_threshold, low_text)
	# From point form to box form
	boxes = [PolygonBox(polygon=box, confidence=confidence) for box, confidence in zip(boxes, confidences)]
	return boxes


	def get_and_clean_boxes(textmap, processor_size, image_size, text_threshold=None, low_text=None) -> List[PolygonBox]:
	bboxes = get_detected_boxes(textmap, text_threshold, low_text)
	for bbox in bboxes:
	bbox.rescale(processor_size, image_size)
	bbox.fit_to_bounds([0, 0, image_size[0], image_size[1]])

	bboxes = clean_contained_boxes(bboxes)
	return bboxes



	def draw_bboxes_on_image(bboxes, image, labels=None, label_font_size=10, color: str \| list='red'):
	polys = []
	for bb in bboxes:
	# Clockwise polygon
	poly = [
	[bb[0], bb[1]],
	[bb[2], bb[1]],
	[bb[2], bb[3]],
	[bb[0], bb[3]]
	]
	polys.append(poly)

	return draw_polys_on_image(polys, image, labels, label_font_size=label_font_size, color=color)


	def draw_polys_on_image(corners, image, labels=None, box_padding=-1, label_offset=1, label_font_size=10, color: str \| list='red'):
	draw = ImageDraw.Draw(image)
	font_path = get_font_path()
	label_font = ImageFont.truetype(font_path, label_font_size)

	for i in range(len(corners)):
	poly = corners[i]
	poly = [(int(p[0]), int(p[1])) for p in poly]
	draw.polygon(poly, outline=color[i] if isinstance(color, list) else color, width=1)

	if labels is not None:
	label = labels[i]
	text_position = (
	min([p[0] for p in poly]) + label_offset,
	min([p[1] for p in poly]) + label_offset
	)
	text_size = get_text_size(label, label_font)
	box_position = (
	text_position[0] - box_padding + label_offset,
	text_position[1] - box_padding + label_offset,
	text_position[0] + text_size[0] + box_padding + label_offset,
	text_position[1] + text_size[1] + box_padding + label_offset
	)
	draw.rectangle(box_position, fill="white")
	draw.text(
	text_position,
	label,
	fill=color[i] if isinstance(color, list) else color,
	font=label_font
	)

	return image