broadfield-dev commited on
Commit
3356d92
·
verified ·
1 Parent(s): 44d72e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -55
app.py CHANGED
@@ -1,86 +1,87 @@
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from transformers.generation import GenerationConfig
6
  import requests
7
  from io import BytesIO
8
- import os
9
 
10
  # --- Configuration ---
11
- # Using a CPU-compatible model from the Qwen family
12
  MODEL_PATH = "Qwen/Qwen3-VL-2B-Instruct"
13
- CPU_DEVICE = "cpu" # Explicitly use CPU
14
 
15
- # --- Model and Tokenizer Loading ---
16
- # This will be done once when the Space starts. It will be slow on a CPU.
17
- print("Loading model and tokenizer... This may take a while on a CPU.")
18
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
19
-
20
- # For CPU, we load the model in bfloat16 if supported, otherwise float32.
21
- # Note: This will consume a significant amount of RAM.
22
- try:
23
- model = AutoModelForCausalLM.from_pretrained(
24
- MODEL_PATH,
25
- device_map=CPU_DEVICE,
26
- trust_remote_code=True,
27
- bf16=torch.cuda.is_bf16_supported(), # bf16 on CPU can be slow, but uses less memory
28
- ).eval()
29
- except RuntimeError:
30
- # Fallback to float32 if bf16 is not supported or causes issues
31
- model = AutoModelForCausalLM.from_pretrained(
32
- MODEL_PATH,
33
- device_map=CPU_DEVICE,
34
- trust_remote_code=True
35
- ).eval()
36
-
37
- # Specify generation configuration
38
- model.generation_config = GenerationConfig.from_pretrained(MODEL_PATH, trust_remote_code=True)
39
- print("Model and tokenizer loaded successfully.")
40
 
41
  # --- Inference Function ---
42
  def process_and_generate(image_input, text_prompt):
43
  """
44
- Processes the image and text prompt, and generates a response from the model on the CPU.
45
  """
46
- if image_input is None or text_prompt.strip() == "":
47
  return "Please provide both an image and a text prompt."
48
 
49
  # Convert Gradio's numpy array to a PIL Image
50
  pil_image = Image.fromarray(image_input)
51
- # Create a temporary path to save the image
52
- temp_image_path = "temp_image.png"
53
- pil_image.save(temp_image_path)
54
 
55
- # The model's tokenizer can directly handle an image path.
56
- # We construct the query according to the model's required format.
57
- query = tokenizer.from_list_format([
58
- {'image': temp_image_path},
59
- {'text': text_prompt},
60
- ])
 
 
 
 
61
 
62
- print("Generating response... This will be slow.")
63
  try:
64
- # Generate the response
65
- response, history = model.chat(tokenizer, query=query, history=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # Clean up the temporary image file
68
- os.remove(temp_image_path)
69
 
70
- return response
71
  except Exception as e:
72
- # Clean up even if there's an error
73
- if os.path.exists(temp_image_path):
74
- os.remove(temp_image_path)
75
  return f"An error occurred during generation: {str(e)}"
76
 
77
  # --- Gradio Interface ---
78
  with gr.Blocks() as demo:
79
  gr.Markdown(
80
  """
81
- # Qwen-VL-Chat CPU Demo
82
- This Space demonstrates the `Qwen/Qwen-VL-Chat` model, a CPU-compatible alternative to Qwen3-VL.
83
- **Warning:** Running this vision-language model on a CPU is very slow. Please be patient after clicking generate.
84
  """
85
  )
86
 
@@ -100,8 +101,9 @@ with gr.Blocks() as demo:
100
 
101
  gr.Examples(
102
  examples=[
103
- ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/QWEN-VL/assets/demo.jpeg", "这是什么?"],
104
- ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/QWEN-VL/assets/demo.jpeg", "框出图中礼服和帽子"],
 
105
  ],
106
  inputs=[image_input, text_prompt]
107
  )
 
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
 
 
4
  import requests
5
  from io import BytesIO
6
+ from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
7
 
8
  # --- Configuration ---
 
9
  MODEL_PATH = "Qwen/Qwen3-VL-2B-Instruct"
10
+ CPU_DEVICE = "cpu"
11
 
12
+ # --- Model and Processor Loading ---
13
+ # This will be done once when the Space starts.
14
+ # 'device_map="auto"' will correctly assign the model to the CPU in this environment.
15
+ print("Loading model and processor... This will take a few minutes on a CPU.")
16
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
17
+ model = Qwen3VLForConditionalGeneration.from_pretrained(
18
+ MODEL_PATH,
19
+ trust_remote_code=True,
20
+ dtype="auto", # Use 'auto' for dtype for better compatibility
21
+ device_map="auto" # This is the key for CPU (and GPU) compatibility
22
+ )
23
+ print("Model and processor loaded successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # --- Inference Function ---
26
  def process_and_generate(image_input, text_prompt):
27
  """
28
+ Processes the image and text prompt, and generates a response from the model.
29
  """
30
+ if image_input is None or not text_prompt.strip():
31
  return "Please provide both an image and a text prompt."
32
 
33
  # Convert Gradio's numpy array to a PIL Image
34
  pil_image = Image.fromarray(image_input)
 
 
 
35
 
36
+ # Prepare the messages payload for the model
37
+ messages = [
38
+ {
39
+ "role": "user",
40
+ "content": [
41
+ {"type": "image", "image": pil_image},
42
+ {"type": "text", "text": text_prompt},
43
+ ],
44
+ }
45
+ ]
46
 
47
+ print("Processing inputs and generating response... This will be slow.")
48
  try:
49
+ # Preparation for inference
50
+ inputs = processor.apply_chat_template(
51
+ messages,
52
+ tokenize=True,
53
+ add_generation_prompt=True,
54
+ return_dict=True,
55
+ return_tensors="pt"
56
+ )
57
+ inputs = inputs.to(model.device)
58
+
59
+ # Inference: Generation of the output
60
+ generated_ids = model.generate(**inputs, max_new_tokens=1024)
61
+
62
+ # To get only the new tokens, we trim the input IDs from the generated IDs
63
+ generated_ids_trimmed = [
64
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
65
+ ]
66
+
67
+ # Decode the trimmed IDs to text
68
+ output_text = processor.batch_decode(
69
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
70
+ )
71
 
72
+ # batch_decode returns a list, we return the first element
73
+ return output_text[0]
74
 
 
75
  except Exception as e:
 
 
 
76
  return f"An error occurred during generation: {str(e)}"
77
 
78
  # --- Gradio Interface ---
79
  with gr.Blocks() as demo:
80
  gr.Markdown(
81
  """
82
+ # Qwen3-VL-2B-Instruct CPU Demo
83
+ This Space runs the `Qwen/Qwen3-VL-2B-Instruct` model using the standard `transformers` library.
84
+ **Warning:** Running this on a free CPU Space is **very slow**. Please be patient after clicking the generate button.
85
  """
86
  )
87
 
 
101
 
102
  gr.Examples(
103
  examples=[
104
+ ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", "Describe this image."],
105
+ ["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-VL/receipt.png", "Read the text from this receipt."],
106
+ ["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-VL/what_is_in_the_box.jpg", "What is inside the red box?"],
107
  ],
108
  inputs=[image_input, text_prompt]
109
  )