Spaces:

riteshkokam
/

MedAI

Runtime error

App Files Files Community

riteshkokam commited on Jun 17

Commit

bf7dabd

verified ·

1 Parent(s): db2b3cb

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 # app.py
 import gradio as gr
 import torch
-from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline
 from gtts import gTTS
 import tempfile
 class AIDoctor:
-    def __init__(self, model_name="Qwen/Qwen2.5-VL-7B-Instruct"):
         self.device = "cpu"
-        print(f"💻 Running on {self.device}")
-        self.proc = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-        self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-            model_name,
             torch_dtype=torch.float32,
             trust_remote_code=True
         ).to(self.device)
@@ -22,7 +23,7 @@ class AIDoctor:
             return "Please upload a medical image."
         prompt = question or "Please analyze this medical image for any abnormalities."
         inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
-        outputs = self.model.generate(**inputs, max_new_tokens=200, temperature=0.7)
         return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
     def tts(self, text):
@@ -32,27 +33,28 @@ class AIDoctor:
         return file
     def respond(self, image, audio, text):
-        question = text.strip()
         if audio:
-            res = self.stt(audio)
-            q = res.get("text", "").strip() if isinstance(res, dict) else str(res).strip()
-            if q:
-                question = q
-        resp = self.analyze(image, question)
         voice = self.tts(resp)
-        return resp, voice, question
 doctor = AIDoctor()
-with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B)") as demo:
-    gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B‑Instruct** (Vision + Voice)")
     with gr.Row():
-        img = gr.Image(label="Upload medical image", type="pil")
-        aud_in = gr.Audio(label="Ask by voice", type="filepath")
-    txt = gr.Textbox(label="Ask by text", lines=2)
-    out_txt = gr.Textbox(label="AI Response", lines=10)
-    out_aud = gr.Audio(label="AI Speaks", type="filepath")
     q_out = gr.Textbox(label="Processed Question")
     btn = gr.Button("Ask Doctor")
-    btn.click(fn=doctor.respond, inputs=[img, aud_in, txt], outputs=[out_txt, out_aud, q_out])
-demo.launch()

 # app.py
 import gradio as gr
 import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
 from gtts import gTTS
 import tempfile
+from PIL import Image
 class AIDoctor:
+    def __init__(self, vision_model="meta-llama/Llama-3.2-11B-Vision"):
         self.device = "cpu"
+        print(f"🖥️ Using device: {self.device}")
+        self.proc = AutoProcessor.from_pretrained(vision_model, trust_remote_code=True)
+        self.model = AutoModelForVision2Seq.from_pretrained(
+            vision_model,
             torch_dtype=torch.float32,
             trust_remote_code=True
         ).to(self.device)
             return "Please upload a medical image."
         prompt = question or "Please analyze this medical image for any abnormalities."
         inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=256, temperature=0.7)
         return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
     def tts(self, text):
         return file
     def respond(self, image, audio, text):
+        q = text.strip()
         if audio:
+            result = self.stt(audio)
+            trans = result.get("text", "").strip() if isinstance(result, dict) else str(result)
+            if trans:
+                q = trans
+        resp = self.analyze(image, q)
         voice = self.tts(resp)
+        return resp, voice, q
 doctor = AIDoctor()
+with gr.Blocks(title="🏥 AI Doctor with Llama 3.2 Vision") as demo:
+    gr.Markdown("## AI Doctor — Vision + Voice using Llama‑3.2‑11B‑Vision")
     with gr.Row():
+        img = gr.Image(label="Medical Image", type="pil")
+        aud_input = gr.Audio(label="Ask by voice", type="filepath")
+    txt_input = gr.Textbox(label="Ask by text", lines=2)
+    resp_out = gr.Textbox(label="AI Response", lines=10)
+    aud_out = gr.Audio(label="AI Speaks", type="filepath")
     q_out = gr.Textbox(label="Processed Question")
     btn = gr.Button("Ask Doctor")
+    btn.click(fn=doctor.respond, inputs=[img, aud_input, txt_input],
+              outputs=[resp_out, aud_out, q_out])
+demo.launch()