Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import warnings | |
| import os | |
| import pix2struct, layoutlm, donut | |
| warnings.filterwarnings('ignore') | |
| desc = """Step into the DocVQA Sanctum, where three formidable models stand ready to tackle your document queries head-on! Discover the prowess of LayoutLM, Pix2Struct, and Donut as they decode your document images and provide insightful answers to your questions. | |
| From LayoutLM's adept layout analysis to Pix2Struct's prowess in structural understanding and Donut's skill in content comprehension, this demo offers a captivating showcase of cutting-edge document visual question answering (DocVQA) technologies. | |
| **Please Note:** Kindly allow a few moments for result generation, as the models are currently being inferred on CPU. | |
| For a brief overview of what document visual question answering is, check out my latest blog post [here](https://medium.com/@krishnapal2308/understanding-docvqa-document-visual-question-answering-9e3db222bfed).""" | |
| def process_image_and_generate_output(image, model_selection, question): | |
| result = '' | |
| if image is None: | |
| return "Please select an image", None | |
| if model_selection == "LayoutLM": | |
| result = layoutlm.get_result(image, question) | |
| return result | |
| if model_selection == 'Pix2Struct': | |
| result = pix2struct.get_result(image, question) | |
| return result | |
| if model_selection == 'Donut': | |
| result = donut.get_result(image, question) | |
| return result | |
| return result | |
| sample_images = [ | |
| [os.path.join(os.path.dirname(__file__), "images/1.png"), "LayoutLM", "What is the NIC Code?"], | |
| [os.path.join(os.path.dirname(__file__), "images/1.png"), "Pix2Struct", "What is the Age Group?"], | |
| [os.path.join(os.path.dirname(__file__), "images/1.png"), "Donut", "What is the Industry Group?"] | |
| ] | |
| # Create a dropdown to select sample image | |
| image_input = gr.Image(label="Upload Image", type='filepath') | |
| # Create a dropdown to choose the model | |
| model_selection_input = gr.Radio(["LayoutLM", "Pix2Struct", "Donut"], | |
| label="Choose Model") | |
| question_input = gr.Text(label="Question") | |
| iface = gr.Interface(fn=process_image_and_generate_output, | |
| inputs=[image_input, model_selection_input, question_input], | |
| outputs=gr.Text(label="Result"), | |
| allow_flagging='never', | |
| examples=sample_images, | |
| title="DocVQA Sanctum", description=desc) | |
| iface.launch() | |