import gradio as gr import subprocess def nougat_ocr(file_name): print('******* inside nougat_ocr *******') # CLI Command to run cli_command = [ 'nougat', '--out', 'output', 'pdf', f'{file_name}', '--checkpoint', 'nougat' ] # Run the command and get .mmd file in an output folder subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) return def predict(pdf_file): print('******* inside predict *******') print(f"temporary file - {pdf_file.name}") pdf_name = pdf_file.name.split('/')[-1].split('.')[0] print(f"pdf file name - {pdf_name}") #! Get prediction for a PDF using nougat nougat_ocr(pdf_file.name) print("BAACCKKK") # Open the multimarkdown (.mmd) file for reading with open(f'output/{pdf_name}.mmd', 'r') as file: content = file.read() return content with gr.Blocks() as demo: gr.HTML("

Nougat: Neural Optical Understanding for Academic Documents

") gr.HTML("

Lukas Blecher et al. Paper, Project

") with gr.Row(): pdf_file = gr.File(label='Upload a PDF', scale=1) mkd = gr.Markdown('

OR

',scale=1) pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Provide a link', scale=1) btn = gr.Button() parsed_output = gr.Markdown() btn.click(predict, pdf_file, parsed_output ) demo.queue() demo.launch(debug=True)