kneelesh48 commited on
Commit
bbd8f5d
1 Parent(s): 8064bb2

Add application file

Browse files
app_blocks.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ from PIL import Image
4
+
5
+ def tesseract_ocr(filepath, languages):
6
+ image = Image.open(filepath)
7
+ return pytesseract.image_to_string(image=image, lang=', '.join(languages) if languages else None)
8
+
9
+ title = "Tesseract OCR"
10
+ description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
11
+ article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
12
+ examples = [
13
+ ["examples/eurotext.png", ["eng"]],
14
+ ["examples/tesseract_sample.png", ["jpn", "eng"]],
15
+ ["examples/chi.jpg", ["HanS", "HanT"]],
16
+ ]
17
+
18
+ with gr.Blocks(title=title) as demo:
19
+ gr.Markdown(f'<h1 style="text-align: center; margin-bottom: 1rem;">{title}</h1>')
20
+ gr.Markdown(description)
21
+ with gr.Row():
22
+ with gr.Column():
23
+ image = gr.Image(type="filepath", label="Input")
24
+ choices = pytesseract.get_languages()
25
+ with gr.Accordion("Languages", open=False):
26
+ languages = gr.CheckboxGroup(choices*40, type="value", value=["eng"], label='language')
27
+ with gr.Row():
28
+ btn_clear = gr.ClearButton([image, languages])
29
+ btn_submit = gr.Button(value="Submit", variant="primary")
30
+ with gr.Column():
31
+ text = gr.Textbox(label="Output")
32
+
33
+ btn_submit.click(tesseract_ocr, inputs=[image, languages], outputs=text)
34
+ btn_clear.add(text)
35
+
36
+ gr.Examples(
37
+ examples=examples,
38
+ inputs=[image, languages],
39
+ )
40
+
41
+ gr.Markdown(article)
42
+
43
+ if __name__ == '__main__':
44
+ demo.launch()
app_interface.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from PIL import Image
3
+
4
+ import gradio as gr
5
+
6
+ def tesseract_ocr(filepath, languages):
7
+ image = Image.open(filepath)
8
+ return pytesseract.image_to_string(image=image, lang=', '.join(languages))
9
+
10
+ title = "Tesseract OCR"
11
+ description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
12
+ article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
13
+ examples = [
14
+ ['examples/eurotext.png', ['eng']],
15
+ ['examples/tesseract_sample.png', ['jpn', 'eng']],
16
+ ['examples/chi.jpg', ['HanS', 'HanT']]
17
+ ]
18
+
19
+ choices = pytesseract.get_languages()
20
+
21
+ demo = gr.Interface(
22
+ fn=tesseract_ocr,
23
+ inputs=[
24
+ gr.Image(type="filepath", label="Input"),
25
+ gr.CheckboxGroup(choices*40, type="value", value=['eng'], label='language')
26
+ ],
27
+ outputs='text',
28
+ title=title,
29
+ description=description,
30
+ article=article,
31
+ examples=examples,
32
+ )
33
+
34
+ if __name__ == '__main__':
35
+ demo.launch(server_port=7861, enable_queue=True)
examples/F4BB08E1-08B9-448A-ADDD-B017EE3CF617.jpg ADDED
examples/Screenshot_6.png ADDED
examples/chi.jpg ADDED
examples/eurotext.png ADDED
examples/image0.jfif ADDED
Binary file (360 kB). View file
 
examples/tesseract_sample.png ADDED
examples/unknown (1).png ADDED
examples/weird_unicode_math_symbols.png ADDED
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr-all
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ pytesseract