Spaces:
Runtime error
Runtime error
| # app.py | |
| import gradio as gr | |
| from src.model_loader import load_model | |
| from src.video_utils import process_video_for_internvl3 | |
| from src.ar_prompts import generate_conversation_questions | |
| tokenizer, model = load_model() | |
| def evaluate_ar_multi_turn(video): | |
| pixel_values, num_patches_list, image_prefix = process_video_for_internvl3(video) | |
| conversation = generate_conversation_questions(include_descriptions=True) | |
| history = None | |
| visible_outputs = [] | |
| for i, question in enumerate(conversation): | |
| prompt = image_prefix + question if i == 0 else question | |
| output, history = model.chat( | |
| tokenizer, | |
| pixel_values, | |
| prompt, | |
| generation_config={"max_new_tokens": 1024}, | |
| num_patches_list=num_patches_list, | |
| history=history, | |
| return_history=True | |
| ) | |
| # 仅保留评测和拓展部分的回答(即从第3轮开始) | |
| if i >= 2: | |
| visible_outputs.append(output) | |
| # 多个输出拼接成文本显示 | |
| return "\n\n".join(visible_outputs) | |
| gr.Interface( | |
| fn=evaluate_ar_multi_turn, | |
| inputs=gr.Video(label="Upload your AR video"), | |
| outputs="text", | |
| title="InternVL3 AR Evaluation (Multi-turn)", | |
| description="Upload a short AR video clip. The model will sample frames and conduct a multi-turn dialogue to assess occlusion/rendering/placement/lighting." | |
| ).launch() | |