gaunernst commited on
Commit
71a9e68
1 Parent(s): 64e942f

switch to Docker space

Browse files
Files changed (5) hide show
  1. Dockerfile +13 -0
  2. README.md +2 -5
  3. app.py +1 -19
  4. packages.txt +0 -2
  5. requirements.txt +0 -9
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ ARG PIP_NO_CACHE_DIR=1
4
+
5
+ RUN apt install tesseract-ocr
6
+ RUN wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
7
+ && dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
8
+
9
+ RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
10
+ RUN pip install paddlepaddle==2.5.1 -i https://mirror.baidu.com/pypi/simple
11
+ RUN pip install transformers pytesseract paddleocr gradio Pillow
12
+
13
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,9 @@
1
  ---
2
  title: Layoutlm Docvqa Paddleocr
3
- emoji: 🏃
4
  colorFrom: indigo
5
  colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 4.8.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Layoutlm Docvqa Paddleocr
3
+ emoji: 📄
4
  colorFrom: indigo
5
  colorTo: yellow
6
+ sdk: docker
 
 
 
7
  ---
8
 
9
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,19 +1,3 @@
1
- import sys
2
-
3
- if sys.platform == "linux":
4
- try:
5
- import paddle
6
-
7
- except ImportError:
8
- import os
9
-
10
- # install libssl1.1 on HF spaces
11
- os.system(
12
- "wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb"
13
- )
14
- os.system("dpkg -x libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb .")
15
- os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + ":./usr/lib/x86_64-linux-gnu"
16
-
17
  import cv2
18
  import gradio as gr
19
  import numpy as np
@@ -45,9 +29,7 @@ def predict(image: Image.Image, question: str, ocr_engine: str):
45
  boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
46
 
47
  for box in boxes:
48
- cv2.polylines(
49
- image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3
50
- )
51
 
52
  x1 = boxes[:, :, 0].min(1) * 1000 / image.width
53
  y1 = boxes[:, :, 1].min(1) * 1000 / image.height
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import cv2
2
  import gradio as gr
3
  import numpy as np
 
29
  boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
30
 
31
  for box in boxes:
32
+ cv2.polylines(image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3)
 
 
33
 
34
  x1 = boxes[:, :, 0].min(1) * 1000 / image.width
35
  y1 = boxes[:, :, 1].min(1) * 1000 / image.height
packages.txt DELETED
@@ -1,2 +0,0 @@
1
- sudo
2
- tesseract-ocr
 
 
 
requirements.txt DELETED
@@ -1,9 +0,0 @@
1
- --index-url https://download.pytorch.org/whl/cpu
2
- --extra-index-url https://mirror.baidu.com/pypi/simple
3
- numpy
4
- torch
5
- transformers
6
- paddlepaddle==2.5.1
7
- paddleocr
8
- opencv-python-headless
9
- pytesseract