gaunernst commited on
Commit
90b9e1a
1 Parent(s): 2b10202

initial commit

Browse files
Files changed (3) hide show
  1. app.py +72 -0
  2. packages.txt +1 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import gradio as gr
3
+ import numpy as np
4
+ from paddleocr import PaddleOCR
5
+ from PIL import Image
6
+ from transformers import pipeline
7
+ from transformers.pipelines.document_question_answering import apply_tesseract
8
+
9
+ PIPE = pipeline("document-question-answering", "impira/layoutlm-document-qa")
10
+ OCR = PaddleOCR(
11
+ use_angle_cls=True,
12
+ lang="en",
13
+ det_limit_side_len=10_000,
14
+ det_db_score_mode="slow",
15
+ enable_mlkdnn=True,
16
+ )
17
+
18
+
19
+ PADDLE_OCR_LABEL = "PaddleOCR (en)"
20
+ TESSERACT_LABEL = "Tesseract (HF default)"
21
+
22
+
23
+ def predict(image: Image.Image, question: str, ocr_engine: str):
24
+ image_np = np.asarray(image)
25
+
26
+ if ocr_engine == PADDLE_OCR_LABEL:
27
+ ocr_result = OCR.ocr(image_np)[0]
28
+ words = [x[1][0] for x in ocr_result]
29
+ boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
30
+
31
+ for box in boxes:
32
+ cv2.polylines(image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3)
33
+
34
+ x1 = boxes[:, :, 0].min(1) * 1000 / image.width
35
+ y1 = boxes[:, :, 1].min(1) * 1000 / image.height
36
+ x2 = boxes[:, :, 0].max(1) * 1000 / image.width
37
+ y2 = boxes[:, :, 1].max(1) * 1000 / image.height
38
+
39
+ # (n_boxes, 4) in xyxy format
40
+ boxes = np.stack([x1, y1, x2, y2], axis=1).astype(int)
41
+
42
+ elif ocr_engine == TESSERACT_LABEL:
43
+ words, boxes = apply_tesseract(image, None, "")
44
+
45
+ for x1, y1, x2, y2 in boxes:
46
+ x1 = int(x1 * image.width / 1000)
47
+ y1 = int(y1 * image.height / 1000)
48
+ x2 = int(x2 * image.width / 1000)
49
+ y2 = int(y2 * image.height / 1000)
50
+ cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 255), 3)
51
+
52
+ else:
53
+ raise ValueError(f"Unsupported ocr_engine={ocr_engine}")
54
+
55
+ word_boxes = list(zip(words, boxes))
56
+ result = PIPE(image, question, word_boxes)[0]
57
+ return result["answer"], result["score"], image_np
58
+
59
+
60
+ gr.Interface(
61
+ fn=predict,
62
+ inputs=[
63
+ gr.Image(type="pil"),
64
+ "text",
65
+ gr.Radio([PADDLE_OCR_LABEL, TESSERACT_LABEL]),
66
+ ],
67
+ outputs=[
68
+ gr.Textbox(label="Answer"),
69
+ gr.Number(label="Score"),
70
+ gr.Image(label="OCR results"),
71
+ ],
72
+ ).launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr5
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://mirror.baidu.com/pypi/simple
2
+ numpy
3
+ torch
4
+ transformers
5
+ paddlepaddle==2.5.1
6
+ paddleocr
7
+ opencv-python-headless
8
+ pytesseract