innoai commited on
Commit
fa6b263
ยท
verified ยท
1 Parent(s): d53478b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -106
app.py CHANGED
@@ -1,40 +1,84 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
- Ovis-U1-3B ๅคšๆจกๆ€ DEMO
5
- ๅ…ผๅฎน Hugging Face CPU Space๏ผˆๆ—  GPU ้ฉฑๅŠจ๏ผ‰
6
- ไพ่ต–๏ผšPython 3.10+ใ€gradio 4.*, torch 2.*ใ€transformers 4.41.*
7
  """
8
 
 
 
 
9
  import os
 
 
10
  import subprocess
11
  import random
12
  import numpy as np
13
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  from PIL import Image
15
  import gradio as gr
16
  import spaces
17
  from transformers import AutoModelForCausalLM
18
 
19
- # -------------------------------------------------------------------------
20
- # ๅฏ้€‰๏ผšGPU ็Žฏๅขƒๆ‰่ƒฝ็œŸๆญฃ็”จๅˆฐ flash-attn๏ผ›CPU Space ๅฏๅฟฝ็•ฅๅฎ‰่ฃ…ๅผ‚ๅธธ
21
- # -------------------------------------------------------------------------
22
- try:
23
- subprocess.run(
24
- "pip install flash-attn==2.6.3 --no-build-isolation",
25
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
26
- shell=True,
27
- check=True,
28
- )
29
- except subprocess.CalledProcessError:
30
- print("[INFO] flash-attn ๅฎ‰่ฃ…ๅคฑ่ดฅ๏ผˆCPU ็Žฏๅขƒๅฏๅฟฝ็•ฅ๏ผ‰")
31
 
32
- # -------------------------------------------------------------------------
33
- # ๅธธ้‡ไธŽๅทฅๅ…ทๅ‡ฝๆ•ฐ
34
- # -------------------------------------------------------------------------
35
  MAX_SEED = 10_000
36
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
- DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 # CPU ้ป˜่ฎค็”จ fp32
38
 
39
  def set_global_seed(seed: int = 42) -> None:
40
  """็ปŸไธ€่ฎพ็ฝฎ้šๆœบ็งๅญ๏ผˆCPU / CUDA ่‡ช้€‚ๅบ”๏ผ‰"""
@@ -45,37 +89,29 @@ def set_global_seed(seed: int = 42) -> None:
45
  torch.cuda.manual_seed_all(seed)
46
 
47
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
48
- """UI ไพง seed ้šๆœบๅŒ–"""
49
  return random.randint(0, MAX_SEED) if randomize else seed
50
 
51
- # -------------------------------------------------------------------------
52
- # ๅŠ ่ฝฝๆจกๅž‹
53
- # -------------------------------------------------------------------------
54
- HF_TOKEN = os.getenv("HF_TOKEN") # ๅฆ‚ๆžœ็งๆœ‰ๆจกๅž‹้œ€ token
55
- HUB_MODEL_ID = "AIDC-AI/Ovis-U1-3B"
56
 
57
- print(f"[INFO] Loading {HUB_MODEL_ID} on {DEVICE} ...")
58
  model = AutoModelForCausalLM.from_pretrained(
59
- HUB_MODEL_ID,
60
  torch_dtype=DTYPE,
61
- low_cpu_mem_usage=True, # ๆ˜พ่‘—้™ไฝŽ CPU ๅ ็”จ
62
- device_map="auto", # cuda ่‡ชๅŠจๆ”พ GPU๏ผŒCPU ็Žฏๅขƒๅ…จ้ƒจๆ”พ CPU
63
  token=HF_TOKEN,
64
  trust_remote_code=True
65
- ).eval() # ่ฏ„ไผฐๆจกๅผ
66
-
67
  print("[INFO] Model ready!")
68
 
69
- # -------------------------------------------------------------------------
70
- # ๅผ•ๅ…ฅ่‡ชๅฎšไน‰็ฎก็บฟๅ‡ฝๆ•ฐ โ€”โ€” ไฟๆŒไธŽๅŽŸไปฃ็ ไธ€่‡ด
71
- # -------------------------------------------------------------------------
72
- from test_img_edit import pipe_img_edit
73
- from test_img_to_txt import pipe_txt_gen
74
- from test_txt_to_img import pipe_t2i
75
-
76
- # -------------------------------------------------------------------------
77
- # ๆŽจ็†ๅฐ่ฃ…๏ผˆๅ‡่ฟ่กŒๅœจ DEVICE๏ผ‰
78
- # -------------------------------------------------------------------------
79
  def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
80
  final_seed: int, guidance_scale: float,
81
  progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
@@ -94,23 +130,24 @@ def process_img_txt_to_img(prompt: str, img: Image.Image, steps: int,
94
  return pipe_img_edit(model, img, prompt, steps,
95
  txt_cfg, img_cfg, seed=final_seed)
96
 
97
- # -------------------------------------------------------------------------
98
- # Gradio UI
99
- # -------------------------------------------------------------------------
100
- with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
101
- gr.Markdown("# Ovis-U1-3B\nโœจ ๅคšๆจกๆ€ๆ–‡ๆœฌ-ๅ›พๅƒ DEMO๏ผˆCPU ็‰ˆ๏ผ‰")
102
 
103
  with gr.Row():
 
104
  with gr.Column():
105
  with gr.Tabs():
106
- # ---------------------- Tab 1 ๅ›พๅƒ + ๆ–‡ๆœฌ โ†’ ๅ›พๅƒ ----------------------
107
  with gr.TabItem("Image + Text โ†’ Image"):
108
  edit_image_input = gr.Image(label="Input Image", type="pil")
109
  with gr.Row():
110
  edit_prompt_input = gr.Textbox(
111
  label="Prompt",
112
  show_label=False,
113
- placeholder="Describe the editing instruction...",
114
  container=False,
115
  lines=1
116
  )
@@ -130,8 +167,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
130
  label="Steps", minimum=40, maximum=100, value=50, step=1
131
  )
132
  edit_seed_slider = gr.Slider(
133
- label="Seed", minimum=0, maximum=MAX_SEED,
134
- step=1, value=42
135
  )
136
  edit_randomize_checkbox = gr.Checkbox(
137
  label="Randomize seed", value=False
@@ -148,15 +184,13 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
148
  label="Image Editing Examples"
149
  )
150
 
151
- # ---------------------- Tab 2 ๆ–‡ๆœฌ โ†’ ๅ›พๅƒ ----------------------
152
  with gr.TabItem("Text โ†’ Image"):
153
  with gr.Row():
154
  prompt_gen_input = gr.Textbox(
155
- label="Prompt",
156
- show_label=False,
157
- placeholder="Describe the image you want...",
158
- container=False,
159
- lines=1
160
  )
161
  run_image_gen_btn = gr.Button("Run", scale=0)
162
 
@@ -171,15 +205,14 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
171
  value=1024, step=32
172
  )
173
  guidance_slider = gr.Slider(
174
- label="Guidance Scale",
175
- minimum=1.0, maximum=30.0, step=0.5, value=5.0
176
  )
177
  num_steps_slider = gr.Slider(
178
  label="Steps", minimum=40, maximum=100, value=50, step=1
179
  )
180
  seed_slider = gr.Slider(
181
- label="Seed", minimum=0, maximum=MAX_SEED,
182
- step=1, value=42
183
  )
184
  randomize_checkbox = gr.Checkbox(
185
  label="Randomize seed", value=False
@@ -196,13 +229,13 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
196
  label="Image Generation Examples"
197
  )
198
 
199
- # ---------------------- Tab 3 ๅ›พๅƒ โ†’ ๆ–‡ๆœฌ ----------------------
200
  with gr.TabItem("Image โ†’ Text"):
201
  image_understand_input = gr.Image(label="Input Image", type="pil")
202
  with gr.Row():
203
  prompt_understand_input = gr.Textbox(
204
  label="Prompt", show_label=False,
205
- placeholder="Describe the question about image...",
206
  container=False, lines=1
207
  )
208
  run_image_understand_btn = gr.Button("Run", scale=0)
@@ -220,55 +253,38 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
220
 
221
  clean_btn = gr.Button("Clear All Inputs / Outputs")
222
 
223
- # ---------------------- ่พ“ๅ‡บๅŒบ ----------------------
224
  with gr.Column():
225
- output_gallery = gr.Gallery(label="Generated Images",
226
- columns=2, visible=True)
227
- output_text = gr.Textbox(label="Generated Text",
228
- visible=False, lines=5,
229
- interactive=False)
230
-
231
- # ---------------------------------------------------------------------
232
- # ไบ‹ไปถ็ป‘ๅฎš
233
- # ---------------------------------------------------------------------
234
  def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
235
  progress=gr.Progress(track_tqdm=True)):
236
  if img is None:
237
- return (
238
- gr.update(value=[], visible=False),
239
- gr.update(value="Please upload an image for editing.", visible=True)
240
- )
241
- imgs = process_img_txt_to_img(prompt, img, steps, seed,
242
- txt_cfg, img_cfg, progress)
243
- return (
244
- gr.update(value=imgs, visible=True),
245
- gr.update(value="", visible=False)
246
- )
247
 
248
  def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
249
  progress=gr.Progress(track_tqdm=True)):
250
- imgs = process_txt_to_img(prompt, height, width, steps, seed,
251
- guidance, progress)
252
- return (
253
- gr.update(value=imgs, visible=True),
254
- gr.update(value="", visible=False)
255
- )
256
 
257
  def run_img_to_txt_tab(img, prompt,
258
  progress=gr.Progress(track_tqdm=True)):
259
  if img is None:
260
- return (
261
- gr.update(value=[], visible=False),
262
- gr.update(value="Please upload an image for understanding.", visible=True)
263
- )
264
  txt = process_img_to_txt(prompt, img, progress)
265
- return (
266
- gr.update(value=[], visible=False),
267
- gr.update(value=txt, visible=True)
268
- )
269
 
270
  def clean_all_fn():
271
- """้‡็ฝฎๅ…จ้ƒจ่พ“ๅ…ฅ / ่พ“ๅ‡บ"""
272
  return (
273
  # Tab 1
274
  gr.update(value=None), gr.update(value=""),
@@ -287,7 +303,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
287
  gr.update(value="", visible=False)
288
  )
289
 
290
- # ---------- Tab 1 ----------
291
  edit_inputs = [
292
  edit_prompt_input, edit_image_input,
293
  edit_num_steps_slider, edit_seed_slider,
@@ -306,7 +322,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
306
  [output_gallery, output_text]
307
  )
308
 
309
- # ---------- Tab 2 ----------
310
  gen_inputs = [
311
  prompt_gen_input, height_slider, width_slider,
312
  num_steps_slider, seed_slider, guidance_slider
@@ -324,7 +340,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
324
  [output_gallery, output_text]
325
  )
326
 
327
- # ---------- Tab 3 ----------
328
  understand_inputs = [image_understand_input, prompt_understand_input]
329
  run_image_understand_btn.click(run_img_to_txt_tab,
330
  understand_inputs,
@@ -333,7 +349,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
333
  understand_inputs,
334
  [output_gallery, output_text])
335
 
336
- # ---------- ๆธ…็ฉบ ----------
337
  clean_btn.click(clean_all_fn, [], [
338
  edit_image_input, edit_prompt_input, edit_img_guidance_slider,
339
  edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
@@ -343,8 +359,9 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
343
  output_gallery, output_text
344
  ])
345
 
346
- # -------------------------------------------------------------------------
347
- # ๅฏๅŠจ
348
- # -------------------------------------------------------------------------
349
  if __name__ == "__main__":
350
- demo.launch(share=True) # HF Spaces ่‡ชๅŠจ็›‘ๅฌ 7860 ็ซฏๅฃ
 
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ Ovis-U1-3B ๅคšๆจกๆ€ DEMO๏ผˆCPU / GPU ่‡ช้€‚ๅบ”็‰ˆๆœฌ๏ผ‰
5
+ ไพ่ต–๏ผšPython 3.10+ใ€torch 2.*ใ€transformers 4.41.*ใ€gradio 4.*
 
6
  """
7
 
8
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ # โ‘  ๅœจไปปไฝ• transformers / flash_attn ๅฏผๅ…ฅไน‹ๅ‰ๅฎŒๆˆ็Žฏๅขƒๅค„็†
10
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
  import os
12
+ import sys
13
+ import types
14
  import subprocess
15
  import random
16
  import numpy as np
17
  import torch
18
+
19
+ # ๅˆคๆ–ญๆ˜ฏๅฆๆœ‰ CUDA
20
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
+ DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 # CPU โ†’ float32
22
+
23
+ # ---------- CPU ็Žฏๅขƒ๏ผšๅฑ่”ฝ flash-attn ----------
24
+ if DEVICE == "cpu":
25
+ # ๅฐ่ฏ•ๅธ่ฝฝๅทฒๅญ˜ๅœจ็š„ flash-attn๏ผˆ่‹ฅๅŸบ็ก€้•œๅƒ้ข„่ฃ…๏ผ‰
26
+ subprocess.run("pip uninstall -y flash-attn", shell=True,
27
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
28
+
29
+ # ๆž„้€ โ€œ็ฉบๅฃณโ€ flash_attn ๆจกๅ—๏ผŒๆไพ›ๆœ€็ฎ€ไฝ็ฝฎๆ—‹่ฝฌๅฎž็Žฐ
30
+ fake_flash_attn = types.ModuleType("flash_attn")
31
+ fake_layers = types.ModuleType("flash_attn.layers")
32
+ fake_rotary = types.ModuleType("flash_attn.layers.rotary")
33
+
34
+ def _cpu_apply_rotary_emb(x, cos, sin):
35
+ """็บฏ CPU ็š„ๆ—‹่ฝฌไฝ็ฝฎ็ผ–็ ๏ผˆๆ…ข๏ผŒไฝ†ๅฏ็”จ๏ผ‰"""
36
+ x1, x2 = x[..., ::2], x[..., 1::2]
37
+ rot_x1 = x1 * cos - x2 * sin
38
+ rot_x2 = x1 * sin + x2 * cos
39
+ out = torch.empty_like(x)
40
+ out[..., ::2] = rot_x1
41
+ out[..., 1::2] = rot_x2
42
+ return out
43
+
44
+ fake_rotary.apply_rotary_emb = _cpu_apply_rotary_emb
45
+ fake_layers.rotary = fake_rotary
46
+ fake_flash_attn.layers = fake_layers
47
+
48
+ sys.modules.update({
49
+ "flash_attn": fake_flash_attn,
50
+ "flash_attn.layers": fake_layers,
51
+ "flash_attn.layers.rotary": fake_rotary,
52
+ })
53
+ else:
54
+ # GPU ็Žฏๅขƒ๏ผšๅฆ‚ๆœ‰้œ€่ฆๅฎ‰่ฃ… flash-attn๏ผˆๅฎ‰่ฃ…ๅคฑ่ดฅไธ่‡ดๅ‘ฝ๏ผ‰
55
+ try:
56
+ subprocess.run(
57
+ "pip install flash-attn==2.6.3 --no-build-isolation",
58
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
59
+ shell=True,
60
+ check=True,
61
+ )
62
+ except subprocess.CalledProcessError:
63
+ print("[WARN] flash-attn ๅฎ‰่ฃ…ๅคฑ่ดฅ๏ผŒๆŽจ็†ๆ€ง่ƒฝๅฏ่ƒฝๅ—ๅฝฑๅ“ใ€‚")
64
+
65
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
+ # โ‘ก ๅธธ่ง„ไพ่ต–
67
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
  from PIL import Image
69
  import gradio as gr
70
  import spaces
71
  from transformers import AutoModelForCausalLM
72
 
73
+ # ็”จๆˆท่‡ชๅฎšไน‰็ฎก็บฟ่„šๆœฌ๏ผˆไฟๆŒๅŽŸๆœ‰่ทฏๅพ„๏ผ‰
74
+ from test_img_edit import pipe_img_edit
75
+ from test_img_to_txt import pipe_txt_gen
76
+ from test_txt_to_img import pipe_t2i
 
 
 
 
 
 
 
 
77
 
78
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
79
+ # โ‘ข ๅทฅๅ…ทๅ‡ฝๆ•ฐ & ๅธธ้‡
80
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
81
  MAX_SEED = 10_000
 
 
82
 
83
  def set_global_seed(seed: int = 42) -> None:
84
  """็ปŸไธ€่ฎพ็ฝฎ้šๆœบ็งๅญ๏ผˆCPU / CUDA ่‡ช้€‚ๅบ”๏ผ‰"""
 
89
  torch.cuda.manual_seed_all(seed)
90
 
91
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
92
+ """ๆ นๆฎๅค้€‰ๆก†ๅ†ณๅฎšๆ˜ฏๅฆ้šๆœบ็งๅญ"""
93
  return random.randint(0, MAX_SEED) if randomize else seed
94
 
95
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
96
+ # โ‘ฃ ๅŠ ่ฝฝๆจกๅž‹
97
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
98
+ HF_TOKEN = os.getenv("HF_TOKEN") # ็งๆœ‰ไป“ๅบ“่ฏทๅœจ Space Secret ่ฎพ็ฝฎ
99
+ MODEL_ID = "AIDC-AI/Ovis-U1-3B"
100
 
101
+ print(f"[INFO] Loading {MODEL_ID} on {DEVICE} โ€ฆ")
102
  model = AutoModelForCausalLM.from_pretrained(
103
+ MODEL_ID,
104
  torch_dtype=DTYPE,
105
+ low_cpu_mem_usage=True, # ๅ‡ไฝŽ RSS
106
+ device_map="auto", # CPU ็Žฏๅขƒๅ…จ้ƒจๆ”พ CPU
107
  token=HF_TOKEN,
108
  trust_remote_code=True
109
+ ).eval()
 
110
  print("[INFO] Model ready!")
111
 
112
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
113
+ # โ‘ค ๆŽจ็†ๅฐ่ฃ…
114
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
 
 
115
  def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
116
  final_seed: int, guidance_scale: float,
117
  progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
 
130
  return pipe_img_edit(model, img, prompt, steps,
131
  txt_cfg, img_cfg, seed=final_seed)
132
 
133
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
134
+ # โ‘ฅ Gradio UI
135
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
136
+ with gr.Blocks(title="Ovis-U1-3B (CPU/GPU adaptive)") as demo:
137
+ gr.Markdown("# Ovis-U1-3B\nๅคšๆจกๆ€ๆ–‡ๆœฌ-ๅ›พๅƒ DEMO๏ผˆCPU/GPU ่‡ช้€‚ๅบ”็‰ˆ๏ผ‰")
138
 
139
  with gr.Row():
140
+ # -------- ๅทฆไพง๏ผš่พ“ๅ…ฅๅŒบ --------
141
  with gr.Column():
142
  with gr.Tabs():
143
+ # โ”€โ”€ Tab 1: Image + Text โ†’ Image โ”€โ”€
144
  with gr.TabItem("Image + Text โ†’ Image"):
145
  edit_image_input = gr.Image(label="Input Image", type="pil")
146
  with gr.Row():
147
  edit_prompt_input = gr.Textbox(
148
  label="Prompt",
149
  show_label=False,
150
+ placeholder="Describe the editing instructionโ€ฆ",
151
  container=False,
152
  lines=1
153
  )
 
167
  label="Steps", minimum=40, maximum=100, value=50, step=1
168
  )
169
  edit_seed_slider = gr.Slider(
170
+ label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
 
171
  )
172
  edit_randomize_checkbox = gr.Checkbox(
173
  label="Randomize seed", value=False
 
184
  label="Image Editing Examples"
185
  )
186
 
187
+ # โ”€โ”€ Tab 2: Text โ†’ Image โ”€โ”€
188
  with gr.TabItem("Text โ†’ Image"):
189
  with gr.Row():
190
  prompt_gen_input = gr.Textbox(
191
+ label="Prompt", show_label=False,
192
+ placeholder="Describe the image you wantโ€ฆ",
193
+ container=False, lines=1
 
 
194
  )
195
  run_image_gen_btn = gr.Button("Run", scale=0)
196
 
 
205
  value=1024, step=32
206
  )
207
  guidance_slider = gr.Slider(
208
+ label="Guidance Scale", minimum=1.0,
209
+ maximum=30.0, step=0.5, value=5.0
210
  )
211
  num_steps_slider = gr.Slider(
212
  label="Steps", minimum=40, maximum=100, value=50, step=1
213
  )
214
  seed_slider = gr.Slider(
215
+ label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
 
216
  )
217
  randomize_checkbox = gr.Checkbox(
218
  label="Randomize seed", value=False
 
229
  label="Image Generation Examples"
230
  )
231
 
232
+ # โ”€โ”€ Tab 3: Image โ†’ Text โ”€โ”€
233
  with gr.TabItem("Image โ†’ Text"):
234
  image_understand_input = gr.Image(label="Input Image", type="pil")
235
  with gr.Row():
236
  prompt_understand_input = gr.Textbox(
237
  label="Prompt", show_label=False,
238
+ placeholder="Describe the question about imageโ€ฆ",
239
  container=False, lines=1
240
  )
241
  run_image_understand_btn = gr.Button("Run", scale=0)
 
253
 
254
  clean_btn = gr.Button("Clear All Inputs / Outputs")
255
 
256
+ # -------- ๅณไพง๏ผš่พ“ๅ‡บๅŒบ --------
257
  with gr.Column():
258
+ output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True)
259
+ output_text = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
260
+
261
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ไบ‹ไปถ็ป‘ๅฎš โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
262
  def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
263
  progress=gr.Progress(track_tqdm=True)):
264
  if img is None:
265
+ return (gr.update(value=[], visible=False),
266
+ gr.update(value="Please upload an image for editing.", visible=True))
267
+ imgs = process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg, progress)
268
+ return (gr.update(value=imgs, visible=True),
269
+ gr.update(value="", visible=False))
 
 
 
 
 
270
 
271
  def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
272
  progress=gr.Progress(track_tqdm=True)):
273
+ imgs = process_txt_to_img(prompt, height, width, steps, seed, guidance, progress)
274
+ return (gr.update(value=imgs, visible=True),
275
+ gr.update(value="", visible=False))
 
 
 
276
 
277
  def run_img_to_txt_tab(img, prompt,
278
  progress=gr.Progress(track_tqdm=True)):
279
  if img is None:
280
+ return (gr.update(value=[], visible=False),
281
+ gr.update(value="Please upload an image for understanding.", visible=True))
 
 
282
  txt = process_img_to_txt(prompt, img, progress)
283
+ return (gr.update(value=[], visible=False),
284
+ gr.update(value=txt, visible=True))
 
 
285
 
286
  def clean_all_fn():
287
+ """้‡็ฝฎๅ…จ้ƒจ่พ“ๅ…ฅ/่พ“ๅ‡บๆŽงไปถ"""
288
  return (
289
  # Tab 1
290
  gr.update(value=None), gr.update(value=""),
 
303
  gr.update(value="", visible=False)
304
  )
305
 
306
+ # ------ Tab 1 ็ป‘ๅฎš ------
307
  edit_inputs = [
308
  edit_prompt_input, edit_image_input,
309
  edit_num_steps_slider, edit_seed_slider,
 
322
  [output_gallery, output_text]
323
  )
324
 
325
+ # ------ Tab 2 ็ป‘ๅฎš ------
326
  gen_inputs = [
327
  prompt_gen_input, height_slider, width_slider,
328
  num_steps_slider, seed_slider, guidance_slider
 
340
  [output_gallery, output_text]
341
  )
342
 
343
+ # ------ Tab 3 ็ป‘ๅฎš ------
344
  understand_inputs = [image_understand_input, prompt_understand_input]
345
  run_image_understand_btn.click(run_img_to_txt_tab,
346
  understand_inputs,
 
349
  understand_inputs,
350
  [output_gallery, output_text])
351
 
352
+ # ๆธ…็ฉบ
353
  clean_btn.click(clean_all_fn, [], [
354
  edit_image_input, edit_prompt_input, edit_img_guidance_slider,
355
  edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
 
359
  output_gallery, output_text
360
  ])
361
 
362
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
363
+ # โ‘ฆ ๅฏๅŠจ Space
364
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
365
  if __name__ == "__main__":
366
+ # HF Spaces ้ป˜่ฎค็›‘ๅฌ 0.0.0.0:7860๏ผŒๆ— ้œ€ share=True
367
+ demo.launch()