Spaces:
Sleeping
Sleeping
| """from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse | |
| import gradio as gr | |
| from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM | |
| from PIL import Image | |
| import torch | |
| import fitz # PyMuPDF for PDF | |
| app = FastAPI() | |
| # ========== Image QA Setup ========== | |
| vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| def answer_question_from_image(image, question): | |
| if image is None or not question.strip(): | |
| return "Please upload an image and ask a question." | |
| inputs = vqa_processor(image, question, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = vqa_model(**inputs) | |
| predicted_id = outputs.logits.argmax(-1).item() | |
| return vqa_model.config.id2label[predicted_id] | |
| # ========== Gradio Interfaces ========== | |
| img_interface = gr.Interface( | |
| fn=answer_question_from_image, | |
| inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")], | |
| outputs="text", | |
| title="Image Question Answering" | |
| ) | |
| # ========== Combine and Mount ========== | |
| demo = gr.TabbedInterface( img_interface , "Image QA") | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| @app.get("/") | |
| def root(): | |
| return RedirectResponse(url="/") """ | |
| from transformers import ViltProcessor, ViltForQuestionAnswering | |
| import torch | |
| # Load image QA model once | |
| vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| def answer_question_from_image(image, question): | |
| if image is None or not question.strip(): | |
| return "Please upload an image and ask a question." | |
| inputs = vqa_processor(image, question, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = vqa_model(**inputs) | |
| predicted_id = outputs.logits.argmax(-1).item() | |
| return vqa_model.config.id2label[predicted_id] | |