| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, AutoModelForCausalLM, AutoTokenizer | |
| from PIL import Image | |
| import torch | |
| model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2") | |
| model_path = "microsoft/git-base-vqav2" | |
| dataset_name = "Multimodal-Fatima/OK-VQA_train" | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| def main(): | |
| demo = gr.Interface( | |
| fn=main, | |
| inputs=[gr.Slider(1, len(questions), step=1)], | |
| outputs=["image", "text", "text"], | |
| ) | |
| demo.launch(share=True) | |