invoice_document_headers_extraction_with_donut

Running

App Files Files Community

to-be commited on Feb 11, 2023

Commit

b7221a3

•

1 Parent(s): df954f0

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -5

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def process_document(image):
     sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
     sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
-    return processor.token2json(sequence)
 description = '<p>Using Donut model finetuned on Invoices for retrieval of following information:</p><ul><li><span style="color:black">DocType</span></span></li><li><span style="color:black">Currency</span></span></li><li><span style="color:black">DocumentDate</span></span></li><li><span style="color:black">GrossAmount</span></span></li><li><span style="color:black">InvoiceNumber</span></span></li><li><span style="color:black">NetAmount</span></span></li><li><span style="color:black">TaxAmount</span></span></li><li><span style="color:black">OrderNumber</span></span></li><li><span style="color:black">CreditorCountry</span></span></li></ul><p>To use it, simply upload your image and click &#39;submit&#39;, or click one of the examples to load them. Read more at the links below.</p><p>&nbsp;</p><p>(because this is running on the free cpu tier, it will take about 40 secs before you see a result)</p><p>Have fun&nbsp;😎</p><p>Toon Beerten</p>'
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
@@ -79,14 +79,20 @@ with gr.Blocks(css=css) as demo:
     gr.Markdown(title)
     gr.Markdown(description)
-    inp = gr.Image(label='Upload invoice here:',elem_id="inp")   #.style(height=400)
     with gr.Row().style():
         with gr.Column(scale=1):
-            gr.Examples([["example.jpg"], ["example_2.jpg"], ["example_3.jpg"]], inputs=[inp])
         with gr.Column():
-            out = gr.JSON(label='Extracted information:')
             btn = gr.Button("Extract")
-    btn.click(fn=process_document, inputs=inp, outputs=out)
 demo.launch()

     sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
     sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+    return processor.token2json(sequence), image
 description = '<p>Using Donut model finetuned on Invoices for retrieval of following information:</p><ul><li><span style="color:black">DocType</span></span></li><li><span style="color:black">Currency</span></span></li><li><span style="color:black">DocumentDate</span></span></li><li><span style="color:black">GrossAmount</span></span></li><li><span style="color:black">InvoiceNumber</span></span></li><li><span style="color:black">NetAmount</span></span></li><li><span style="color:black">TaxAmount</span></span></li><li><span style="color:black">OrderNumber</span></span></li><li><span style="color:black">CreditorCountry</span></span></li></ul><p>To use it, simply upload your image and click &#39;submit&#39;, or click one of the examples to load them. Read more at the links below.</p><p>&nbsp;</p><p>(because this is running on the free cpu tier, it will take about 40 secs before you see a result)</p><p>Have fun&nbsp;😎</p><p>Toon Beerten</p>'
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Row().style():
         with gr.Column(scale=1):
+            inp = gr.Image(label='Upload invoice here:')   #.style(height=400)
         with gr.Column():
+             gr.Examples([["example.jpg"], ["example_2.jpg"], ["example_3.jpg"]], inputs=[inp])
+    with gr.Row().style():
             btn = gr.Button("Extract")
+    with gr.Row().style():
+        with gr.Column(scale=1):
+            imgout = gr.Image(label='Uploaded document:',elem_id="inp")
+        with gr.Column(scale=1):
+            jsonout = gr.JSON(label='Extracted information:')
+    btn.click(fn=process_document, inputs=inp, outputs=[jsonout,imgout])
 demo.launch()