Spaces:

riteshkokam
/

MedAI

Runtime error

App Files Files Community

riteshkokam commited on Jun 17

Commit

383638b

verified ·

1 Parent(s): 160ecdd

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -26

app.py CHANGED Viewed

@@ -1,53 +1,60 @@
 # app.py
 import gradio as gr
 import torch
-from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
 from gtts import gTTS
 import tempfile
 class AIDoctor:
-    def __init__(self, model_name="lintw/HealthGPT-M3"):
         self.device = "cpu"
-        print(f"⚙️ Using device: {self.device}")
-        self.proc = AutoProcessor.from_pretrained(model_name, local_files_only=False, trust_remote_code=True)
-        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, local_files_only=False, trust_remote_code=True).to(self.device)
         self.stt = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
     def analyze(self, image, question):
         if image is None:
             return "Please upload a medical image."
-        prompt = question or "What do you observe in this medical image?"
-        inputs = self.proc(text=prompt, images=image, return_tensors="pt").to(self.device)
-        outputs = self.model.generate(**inputs, max_new_tokens=200, temperature=0.7)
         return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
     def tts(self, text):
         tts = gTTS(text=text, lang="en")
-        path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
-        tts.save(path)
-        return path
     def respond(self, image, audio, text):
-        question = text.strip()
         if audio:
-            trans_res = self.stt(audio)
-            q_trans = trans_res.get("text", "").strip() if isinstance(trans_res, dict) else str(trans_res)
-            if q_trans:
-                question = q_trans
-        resp = self.analyze(image, question)
         voice = self.tts(resp)
-        return resp, voice, question
 doctor = AIDoctor()
-with gr.Blocks() as demo:
-    gr.Markdown("## 🏥 AI Doctor with HealthGPT‑M3 (CPU-optimized)")
-    img_in = gr.Image(label="Medical Image", type="pil")
-    aud_in = gr.Audio(label="Ask by voice", type="filepath")
-    txt_in = gr.Textbox(label="Ask by text")
     resp_out = gr.Textbox(label="AI Response", lines=10)
     aud_out = gr.Audio(label="AI Speaks", type="filepath")
     q_out = gr.Textbox(label="Processed Question")
     btn = gr.Button("Ask Doctor")
-    btn.click(fn=doctor.respond, inputs=[img_in, aud_in, txt_in], outputs=[resp_out, aud_out, q_out])
-demo.launch()

 # app.py
 import gradio as gr
 import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
 from gtts import gTTS
 import tempfile
+from PIL import Image
 class AIDoctor:
+    def __init__(self, vision_model="meta-llama/Llama-3.2-11B-Vision-Instruct"):
         self.device = "cpu"
+        print(f"🖥️ Using device: {self.device}")
+        self.proc = AutoProcessor.from_pretrained(vision_model, trust_remote_code=True)
+        self.model = AutoModelForVision2Seq.from_pretrained(
+            vision_model,
+            torch_dtype=torch.float32,
+            trust_remote_code=True
+        ).to(self.device)
         self.stt = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
     def analyze(self, image, question):
         if image is None:
             return "Please upload a medical image."
+        prompt = question or "Please analyze this medical image for any abnormalities."
+        inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=256, temperature=0.7)
         return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
     def tts(self, text):
         tts = gTTS(text=text, lang="en")
+        file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+        tts.save(file)
+        return file
     def respond(self, image, audio, text):
+        q = text.strip()
         if audio:
+            result = self.stt(audio)
+            trans = result.get("text", "").strip() if isinstance(result, dict) else str(result)
+            if trans:
+                q = trans
+        resp = self.analyze(image, q)
         voice = self.tts(resp)
+        return resp, voice, q
 doctor = AIDoctor()
+with gr.Blocks(title="🏥 AI Doctor with Llama 3.2 Vision") as demo:
+    gr.Markdown("## AI Doctor — Vision + Voice using Llama‑3.2‑11B‑Vision")
+    with gr.Row():
+        img = gr.Image(label="Medical Image", type="pil")
+        aud_input = gr.Audio(label="Ask by voice", type="filepath")
+    txt_input = gr.Textbox(label="Ask by text", lines=2)
     resp_out = gr.Textbox(label="AI Response", lines=10)
     aud_out = gr.Audio(label="AI Speaks", type="filepath")
     q_out = gr.Textbox(label="Processed Question")
     btn = gr.Button("Ask Doctor")
+    btn.click(fn=doctor.respond, inputs=[img, aud_input, txt_input],
+              outputs=[resp_out, aud_out, q_out])
+demo.launch()