Spaces:

cd2412
/

VQA_Memory__RL_Controller

Running

App Files Files Community

cd2412 commited on Aug 31

Commit

cb277bc

verified ·

1 Parent(s): 72a1f41

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -93

app.py CHANGED Viewed

@@ -1,131 +1,63 @@
-# app.py
-"""
-VQA — Memory + RL Controller (Gradio app)
-- Drag-and-drop an image, ask a question, and see the model's answer + chosen strategy.
-- Tries to import `answer_with_controller` from controller.py. Falls back to a stub if missing.
-- Works on Hugging Face Spaces, Render, Docker, or local run.
-"""
-import os
-import sys
-import time
-import traceback
-import subprocess
 from typing import Tuple, Optional
-# Ensure gradio is available when running locally; Spaces installs from requirements.txt
 try:
     import gradio as gr
-except ImportError:  # pragma: no cover
     subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "gradio"])
     import gradio as gr
-from PIL import Image
-# -----------------------------
-# Attempt to import real handler
-# -----------------------------
 def _make_fallback():
-    def _fallback_answer_with_controller(
-        image: Image.Image,
-        question: str,
-        source: str = "auto",
-        distilled_model: str = "auto",
-    ) -> Tuple[str, str, int]:
-        # Replace with real inference to remove this placeholder.
         return "Placeholder answer (wire your models in controller.py).", "baseline", 0
     return _fallback_answer_with_controller
 try:
-    # Expect controller.py to define: answer_with_controller(image, question, source, distilled_model)
-    from controller import answer_with_controller  # type: ignore
 except Exception as e:
     print(f"[WARN] Using fallback controller because import failed: {e}", flush=True)
     answer_with_controller = _make_fallback()
-# -----------------------------
-# UI Constants
-# -----------------------------
 TITLE = "VQA — Memory + RL Controller"
-DESCRIPTION = (
-    "Upload an image, enter a question, and the controller will choose the best decoding strategy."
-)
 CONTROLLER_SOURCES = ["auto", "distilled", "ppo", "baseline"]
 DISTILLED_CHOICES = ["auto", "logreg", "mlp32"]
-# -----------------------------
-# Inference wrapper with guards
-# -----------------------------
-def vqa_demo_fn(
-    image: Optional[Image.Image],
-    question: str,
-    source: str,
-    distilled_model: str,
-) -> Tuple[str, str, float]:
-    """Safely run inference and return (answer, strategy_label, latency_ms)."""
-    # Input validation
     if image is None:
         return "Please upload an image.", "", 0.0
     question = (question or "").strip()
     if not question:
         return "Please enter a question.", "", 0.0
-    # Convert & measure latency
     t0 = time.perf_counter()
     try:
-        # Convert to RGB to avoid issues with PNG/L mode
         image_rgb = image.convert("RGB")
         pred, strategy_name, action_id = answer_with_controller(
-            image_rgb,
-            question,
-            source=source,
-            distilled_model=distilled_model,
         )
         latency_ms = (time.perf_counter() - t0) * 1000.0
-        # Friendly formatting
-        strategy_out = f"{action_id} → {strategy_name}"
-        return str(pred), strategy_out, round(latency_ms, 1)
     except Exception as err:
-        # Never crash the app — show a concise error to the user and log details to server
         latency_ms = (time.perf_counter() - t0) * 1000.0
         print("[ERROR] Inference failed:\n" + "".join(traceback.format_exc()), flush=True)
         return f"Error: {err}", "error", round(latency_ms, 1)
-# -----------------------------
-# Build Gradio Interface
-# -----------------------------
 with gr.Blocks(title=TITLE, analytics_enabled=False) as demo:
     gr.Markdown(f"### {TITLE}\n{DESCRIPTION}")
     with gr.Row():
         with gr.Column():
             img_in = gr.Image(
                 type="pil",
                 label="Image",
                 height=320,
-                sources=["upload", "drag-and-drop", "clipboard", "webcam"],
-                image_mode="RGB",
-            )
-            q_in = gr.Textbox(
-                label="Question",
-                placeholder="e.g., What colour is the bus?",
-                lines=2,
-                max_lines=4,
-            )
-            source_in = gr.Radio(
-                CONTROLLER_SOURCES,
-                value="auto",
-                label="Controller Source",
-            )
-            dist_in = gr.Radio(
-                DISTILLED_CHOICES,
-                value="auto",
-                label="Distilled Gate (if used)",
             )
             run_btn = gr.Button("Predict", variant="primary")
         with gr.Column():
             ans_out = gr.Textbox(label="Answer", interactive=False, lines=3, max_lines=6)
@@ -139,17 +71,7 @@ with gr.Blocks(title=TITLE, analytics_enabled=False) as demo:
         api_name="predict",
     )
-# -----------------------------
-# Launch
-# -----------------------------
 if __name__ == "__main__":
-    # Respect $PORT for Spaces/Render/Docker; default to 7860 locally
     port = int(os.getenv("PORT", "7860"))
-    # Queue improves robustness under load
     demo.queue(concurrency_count=2)
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=port,
-        share=False,          # set True only for local quick sharing
-        show_error=True,
-    )

+# app.py (fixed)
+import os, sys, time, traceback, subprocess
 from typing import Tuple, Optional
+from PIL import Image
 try:
     import gradio as gr
+except ImportError:
     subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "gradio"])
     import gradio as gr
 def _make_fallback():
+    def _fallback_answer_with_controller(image, question, source="auto", distilled_model="auto"):
         return "Placeholder answer (wire your models in controller.py).", "baseline", 0
     return _fallback_answer_with_controller
 try:
+    from controller import answer_with_controller
 except Exception as e:
     print(f"[WARN] Using fallback controller because import failed: {e}", flush=True)
     answer_with_controller = _make_fallback()
 TITLE = "VQA — Memory + RL Controller"
+DESCRIPTION = "Upload an image, enter a question, and the controller will choose the best decoding strategy."
 CONTROLLER_SOURCES = ["auto", "distilled", "ppo", "baseline"]
 DISTILLED_CHOICES = ["auto", "logreg", "mlp32"]
+def vqa_demo_fn(image: Optional[Image.Image], question: str, source: str, distilled_model: str) -> Tuple[str, str, float]:
     if image is None:
         return "Please upload an image.", "", 0.0
     question = (question or "").strip()
     if not question:
         return "Please enter a question.", "", 0.0
     t0 = time.perf_counter()
     try:
         image_rgb = image.convert("RGB")
         pred, strategy_name, action_id = answer_with_controller(
+            image_rgb, question, source=source, distilled_model=distilled_model
         )
         latency_ms = (time.perf_counter() - t0) * 1000.0
+        return str(pred), f"{action_id} → {strategy_name}", round(latency_ms, 1)
     except Exception as err:
         latency_ms = (time.perf_counter() - t0) * 1000.0
         print("[ERROR] Inference failed:\n" + "".join(traceback.format_exc()), flush=True)
         return f"Error: {err}", "error", round(latency_ms, 1)
 with gr.Blocks(title=TITLE, analytics_enabled=False) as demo:
     gr.Markdown(f"### {TITLE}\n{DESCRIPTION}")
     with gr.Row():
         with gr.Column():
             img_in = gr.Image(
                 type="pil",
                 label="Image",
                 height=320,
+                sources=["upload", "webcam", "clipboard"],  # ✅ fixed
             )
+            q_in = gr.Textbox(label="Question", placeholder="e.g., What colour is the bus?", lines=2, max_lines=4)
+            source_in = gr.Radio(CONTROLLER_SOURCES, value="auto", label="Controller Source")
+            dist_in = gr.Radio(DISTILLED_CHOICES, value="auto", label="Distilled Gate (if used)")
             run_btn = gr.Button("Predict", variant="primary")
         with gr.Column():
             ans_out = gr.Textbox(label="Answer", interactive=False, lines=3, max_lines=6)
         api_name="predict",
     )
 if __name__ == "__main__":
     port = int(os.getenv("PORT", "7860"))
     demo.queue(concurrency_count=2)
+    demo.launch(server_name="0.0.0.0", server_port=port, share=False, show_error=True)