Spaces:
Running
Running
File size: 5,470 Bytes
52f1bcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import copy
from typing import List, Tuple, Any, Optional
from pydantic import BaseModel, field_validator, computed_field
from surya.postprocessing.util import rescale_bbox
class PolygonBox(BaseModel):
polygon: List[List[float]]
confidence: Optional[float] = None
@field_validator('polygon')
@classmethod
def check_elements(cls, v: List[List[float]]) -> List[List[float]]:
if len(v) != 4:
raise ValueError('corner must have 4 elements')
for corner in v:
if len(corner) != 2:
raise ValueError('corner must have 2 elements')
return v
@property
def height(self):
return self.bbox[3] - self.bbox[1]
@property
def width(self):
return self.bbox[2] - self.bbox[0]
@property
def area(self):
return self.width * self.height
@computed_field
@property
def bbox(self) -> List[float]:
box = [self.polygon[0][0], self.polygon[0][1], self.polygon[1][0], self.polygon[2][1]]
if box[0] > box[2]:
box[0], box[2] = box[2], box[0]
if box[1] > box[3]:
box[1], box[3] = box[3], box[1]
return box
def rescale(self, processor_size, image_size):
# Point is in x, y format
page_width, page_height = processor_size
img_width, img_height = image_size
width_scaler = img_width / page_width
height_scaler = img_height / page_height
new_corners = copy.deepcopy(self.polygon)
for corner in new_corners:
corner[0] = int(corner[0] * width_scaler)
corner[1] = int(corner[1] * height_scaler)
self.polygon = new_corners
def fit_to_bounds(self, bounds):
new_corners = copy.deepcopy(self.polygon)
for corner in new_corners:
corner[0] = max(min(corner[0], bounds[2]), bounds[0])
corner[1] = max(min(corner[1], bounds[3]), bounds[1])
self.polygon = new_corners
def merge(self, other):
x1 = min(self.bbox[0], other.bbox[0])
y1 = min(self.bbox[1], other.bbox[1])
x2 = max(self.bbox[2], other.bbox[2])
y2 = max(self.bbox[3], other.bbox[3])
self.polygon = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
def intersection_area(self, other, x_margin=0, y_margin=0):
x_overlap = max(0, min(self.bbox[2] + x_margin, other.bbox[2] + x_margin) - max(self.bbox[0] - x_margin, other.bbox[0] - x_margin))
y_overlap = max(0, min(self.bbox[3] + y_margin, other.bbox[3] + y_margin) - max(self.bbox[1] - y_margin, other.bbox[1] - y_margin))
return x_overlap * y_overlap
def intersection_pct(self, other, x_margin=0, y_margin=0):
assert 0 <= x_margin <= 1
assert 0 <= y_margin <= 1
if self.area == 0:
return 0
if x_margin:
x_margin = int(min(self.width, other.width) * x_margin)
if y_margin:
y_margin = int(min(self.height, other.height) * y_margin)
intersection = self.intersection_area(other, x_margin, y_margin)
return intersection / self.area
class Bbox(BaseModel):
bbox: List[float]
@field_validator('bbox')
@classmethod
def check_4_elements(cls, v: List[float]) -> List[float]:
if len(v) != 4:
raise ValueError('bbox must have 4 elements')
return v
def rescale_bbox(self, orig_size, new_size):
self.bbox = rescale_bbox(self.bbox, orig_size, new_size)
def round_bbox(self, divisor):
self.bbox = [x // divisor * divisor for x in self.bbox]
@property
def height(self):
return self.bbox[3] - self.bbox[1]
@property
def width(self):
return self.bbox[2] - self.bbox[0]
@property
def area(self):
return self.width * self.height
@property
def polygon(self):
return [[self.bbox[0], self.bbox[1]], [self.bbox[2], self.bbox[1]], [self.bbox[2], self.bbox[3]], [self.bbox[0], self.bbox[3]]]
@property
def center(self):
return [(self.bbox[0] + self.bbox[2]) / 2, (self.bbox[1] + self.bbox[3]) / 2]
def intersection_pct(self, other):
if self.area == 0:
return 0
x_overlap = max(0, min(self.bbox[2], other.bbox[2]) - max(self.bbox[0], other.bbox[0]))
y_overlap = max(0, min(self.bbox[3], other.bbox[3]) - max(self.bbox[1], other.bbox[1]))
intersection = x_overlap * y_overlap
return intersection / self.area
class LayoutBox(PolygonBox):
label: str
class OrderBox(Bbox):
position: int
class ColumnLine(Bbox):
vertical: bool
horizontal: bool
class TextLine(PolygonBox):
text: str
confidence: Optional[float] = None
class OCRResult(BaseModel):
text_lines: List[TextLine]
languages: List[str] | None = None
image_bbox: List[float]
class TextDetectionResult(BaseModel):
bboxes: List[PolygonBox]
vertical_lines: List[ColumnLine]
heatmap: Any
affinity_map: Any
image_bbox: List[float]
class LayoutResult(BaseModel):
bboxes: List[LayoutBox]
segmentation_map: Any
image_bbox: List[float]
class OrderResult(BaseModel):
bboxes: List[OrderBox]
image_bbox: List[float]
class TableCell(Bbox):
row_id: int | None = None
col_id: int | None = None
text: str | None = None
class TableResult(BaseModel):
cells: List[TableCell]
rows: List[TableCell]
cols: List[TableCell]
image_bbox: List[float]
|