riteshkokam commited on
Commit
bf7dabd
·
verified ·
1 Parent(s): db2b3cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -23
app.py CHANGED
@@ -1,17 +1,18 @@
1
  # app.py
2
  import gradio as gr
3
  import torch
4
- from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline
5
  from gtts import gTTS
6
  import tempfile
 
7
 
8
  class AIDoctor:
9
- def __init__(self, model_name="Qwen/Qwen2.5-VL-7B-Instruct"):
10
  self.device = "cpu"
11
- print(f"💻 Running on {self.device}")
12
- self.proc = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
13
- self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
14
- model_name,
15
  torch_dtype=torch.float32,
16
  trust_remote_code=True
17
  ).to(self.device)
@@ -22,7 +23,7 @@ class AIDoctor:
22
  return "Please upload a medical image."
23
  prompt = question or "Please analyze this medical image for any abnormalities."
24
  inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
25
- outputs = self.model.generate(**inputs, max_new_tokens=200, temperature=0.7)
26
  return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
27
 
28
  def tts(self, text):
@@ -32,27 +33,28 @@ class AIDoctor:
32
  return file
33
 
34
  def respond(self, image, audio, text):
35
- question = text.strip()
36
  if audio:
37
- res = self.stt(audio)
38
- q = res.get("text", "").strip() if isinstance(res, dict) else str(res).strip()
39
- if q:
40
- question = q
41
- resp = self.analyze(image, question)
42
  voice = self.tts(resp)
43
- return resp, voice, question
44
 
45
  doctor = AIDoctor()
46
 
47
- with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B)") as demo:
48
- gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B‑Instruct** (Vision + Voice)")
49
  with gr.Row():
50
- img = gr.Image(label="Upload medical image", type="pil")
51
- aud_in = gr.Audio(label="Ask by voice", type="filepath")
52
- txt = gr.Textbox(label="Ask by text", lines=2)
53
- out_txt = gr.Textbox(label="AI Response", lines=10)
54
- out_aud = gr.Audio(label="AI Speaks", type="filepath")
55
  q_out = gr.Textbox(label="Processed Question")
56
  btn = gr.Button("Ask Doctor")
57
- btn.click(fn=doctor.respond, inputs=[img, aud_in, txt], outputs=[out_txt, out_aud, q_out])
58
- demo.launch()
 
 
1
  # app.py
2
  import gradio as gr
3
  import torch
4
+ from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
5
  from gtts import gTTS
6
  import tempfile
7
+ from PIL import Image
8
 
9
  class AIDoctor:
10
+ def __init__(self, vision_model="meta-llama/Llama-3.2-11B-Vision"):
11
  self.device = "cpu"
12
+ print(f"🖥️ Using device: {self.device}")
13
+ self.proc = AutoProcessor.from_pretrained(vision_model, trust_remote_code=True)
14
+ self.model = AutoModelForVision2Seq.from_pretrained(
15
+ vision_model,
16
  torch_dtype=torch.float32,
17
  trust_remote_code=True
18
  ).to(self.device)
 
23
  return "Please upload a medical image."
24
  prompt = question or "Please analyze this medical image for any abnormalities."
25
  inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
26
+ outputs = self.model.generate(**inputs, max_new_tokens=256, temperature=0.7)
27
  return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
28
 
29
  def tts(self, text):
 
33
  return file
34
 
35
  def respond(self, image, audio, text):
36
+ q = text.strip()
37
  if audio:
38
+ result = self.stt(audio)
39
+ trans = result.get("text", "").strip() if isinstance(result, dict) else str(result)
40
+ if trans:
41
+ q = trans
42
+ resp = self.analyze(image, q)
43
  voice = self.tts(resp)
44
+ return resp, voice, q
45
 
46
  doctor = AIDoctor()
47
 
48
+ with gr.Blocks(title="🏥 AI Doctor with Llama 3.2 Vision") as demo:
49
+ gr.Markdown("## AI Doctor Vision + Voice using Llama‑3.2‑11B‑Vision")
50
  with gr.Row():
51
+ img = gr.Image(label="Medical Image", type="pil")
52
+ aud_input = gr.Audio(label="Ask by voice", type="filepath")
53
+ txt_input = gr.Textbox(label="Ask by text", lines=2)
54
+ resp_out = gr.Textbox(label="AI Response", lines=10)
55
+ aud_out = gr.Audio(label="AI Speaks", type="filepath")
56
  q_out = gr.Textbox(label="Processed Question")
57
  btn = gr.Button("Ask Doctor")
58
+ btn.click(fn=doctor.respond, inputs=[img, aud_input, txt_input],
59
+ outputs=[resp_out, aud_out, q_out])
60
+ demo.launch()