riteshkokam commited on
Commit
db2b3cb
·
verified ·
1 Parent(s): 383638b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -24
app.py CHANGED
@@ -1,18 +1,17 @@
1
  # app.py
2
  import gradio as gr
3
  import torch
4
- from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
5
  from gtts import gTTS
6
  import tempfile
7
- from PIL import Image
8
 
9
  class AIDoctor:
10
- def __init__(self, vision_model="meta-llama/Llama-3.2-11B-Vision-Instruct"):
11
  self.device = "cpu"
12
- print(f"🖥️ Using device: {self.device}")
13
- self.proc = AutoProcessor.from_pretrained(vision_model, trust_remote_code=True)
14
- self.model = AutoModelForVision2Seq.from_pretrained(
15
- vision_model,
16
  torch_dtype=torch.float32,
17
  trust_remote_code=True
18
  ).to(self.device)
@@ -23,7 +22,7 @@ class AIDoctor:
23
  return "Please upload a medical image."
24
  prompt = question or "Please analyze this medical image for any abnormalities."
25
  inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
26
- outputs = self.model.generate(**inputs, max_new_tokens=256, temperature=0.7)
27
  return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
28
 
29
  def tts(self, text):
@@ -33,28 +32,27 @@ class AIDoctor:
33
  return file
34
 
35
  def respond(self, image, audio, text):
36
- q = text.strip()
37
  if audio:
38
- result = self.stt(audio)
39
- trans = result.get("text", "").strip() if isinstance(result, dict) else str(result)
40
- if trans:
41
- q = trans
42
- resp = self.analyze(image, q)
43
  voice = self.tts(resp)
44
- return resp, voice, q
45
 
46
  doctor = AIDoctor()
47
 
48
- with gr.Blocks(title="🏥 AI Doctor with Llama 3.2 Vision") as demo:
49
- gr.Markdown("## AI Doctor Vision + Voice using Llama‑3.2‑11B‑Vision")
50
  with gr.Row():
51
- img = gr.Image(label="Medical Image", type="pil")
52
- aud_input = gr.Audio(label="Ask by voice", type="filepath")
53
- txt_input = gr.Textbox(label="Ask by text", lines=2)
54
- resp_out = gr.Textbox(label="AI Response", lines=10)
55
- aud_out = gr.Audio(label="AI Speaks", type="filepath")
56
  q_out = gr.Textbox(label="Processed Question")
57
  btn = gr.Button("Ask Doctor")
58
- btn.click(fn=doctor.respond, inputs=[img, aud_input, txt_input],
59
- outputs=[resp_out, aud_out, q_out])
60
  demo.launch()
 
1
  # app.py
2
  import gradio as gr
3
  import torch
4
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline
5
  from gtts import gTTS
6
  import tempfile
 
7
 
8
  class AIDoctor:
9
+ def __init__(self, model_name="Qwen/Qwen2.5-VL-7B-Instruct"):
10
  self.device = "cpu"
11
+ print(f"💻 Running on {self.device}")
12
+ self.proc = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
13
+ self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
14
+ model_name,
15
  torch_dtype=torch.float32,
16
  trust_remote_code=True
17
  ).to(self.device)
 
22
  return "Please upload a medical image."
23
  prompt = question or "Please analyze this medical image for any abnormalities."
24
  inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
25
+ outputs = self.model.generate(**inputs, max_new_tokens=200, temperature=0.7)
26
  return self.proc.decode(outputs[0], skip_special_tokens=True).strip()
27
 
28
  def tts(self, text):
 
32
  return file
33
 
34
  def respond(self, image, audio, text):
35
+ question = text.strip()
36
  if audio:
37
+ res = self.stt(audio)
38
+ q = res.get("text", "").strip() if isinstance(res, dict) else str(res).strip()
39
+ if q:
40
+ question = q
41
+ resp = self.analyze(image, question)
42
  voice = self.tts(resp)
43
+ return resp, voice, question
44
 
45
  doctor = AIDoctor()
46
 
47
+ with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B)") as demo:
48
+ gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B‑Instruct** (Vision + Voice)")
49
  with gr.Row():
50
+ img = gr.Image(label="Upload medical image", type="pil")
51
+ aud_in = gr.Audio(label="Ask by voice", type="filepath")
52
+ txt = gr.Textbox(label="Ask by text", lines=2)
53
+ out_txt = gr.Textbox(label="AI Response", lines=10)
54
+ out_aud = gr.Audio(label="AI Speaks", type="filepath")
55
  q_out = gr.Textbox(label="Processed Question")
56
  btn = gr.Button("Ask Doctor")
57
+ btn.click(fn=doctor.respond, inputs=[img, aud_in, txt], outputs=[out_txt, out_aud, q_out])
 
58
  demo.launch()