innoai commited on
Commit
ed79f3e
ยท
verified ยท
1 Parent(s): fa6b263

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -259
app.py CHANGED
@@ -5,34 +5,28 @@ Ovis-U1-3B ๅคšๆจกๆ€ DEMO๏ผˆCPU / GPU ่‡ช้€‚ๅบ”็‰ˆๆœฌ๏ผ‰
5
  ไพ่ต–๏ผšPython 3.10+ใ€torch 2.*ใ€transformers 4.41.*ใ€gradio 4.*
6
  """
7
 
8
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
- # โ‘  ๅœจไปปไฝ• transformers / flash_attn ๅฏผๅ…ฅไน‹ๅ‰ๅฎŒๆˆ็Žฏๅขƒๅค„็†
10
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
- import os
12
- import sys
13
- import types
14
- import subprocess
15
- import random
16
- import numpy as np
17
- import torch
18
 
19
- # ๅˆคๆ–ญๆ˜ฏๅฆๆœ‰ CUDA
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
- DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 # CPU โ†’ float32
22
 
23
- # ---------- CPU ็Žฏๅขƒ๏ผšๅฑ่”ฝ flash-attn ----------
24
  if DEVICE == "cpu":
25
- # ๅฐ่ฏ•ๅธ่ฝฝๅทฒๅญ˜ๅœจ็š„ flash-attn๏ผˆ่‹ฅๅŸบ็ก€้•œๅƒ้ข„่ฃ…๏ผ‰
26
- subprocess.run("pip uninstall -y flash-attn", shell=True,
27
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
28
 
29
- # ๆž„้€ โ€œ็ฉบๅฃณโ€ flash_attn ๆจกๅ—๏ผŒๆไพ›ๆœ€็ฎ€ไฝ็ฝฎๆ—‹่ฝฌๅฎž็Žฐ
30
  fake_flash_attn = types.ModuleType("flash_attn")
31
  fake_layers = types.ModuleType("flash_attn.layers")
32
  fake_rotary = types.ModuleType("flash_attn.layers.rotary")
33
 
34
  def _cpu_apply_rotary_emb(x, cos, sin):
35
- """็บฏ CPU ็š„ๆ—‹่ฝฌไฝ็ฝฎ็ผ–็ ๏ผˆๆ…ข๏ผŒไฝ†ๅฏ็”จ๏ผ‰"""
36
  x1, x2 = x[..., ::2], x[..., 1::2]
37
  rot_x1 = x1 * cos - x2 * sin
38
  rot_x2 = x1 * sin + x2 * cos
@@ -45,323 +39,189 @@ if DEVICE == "cpu":
45
  fake_layers.rotary = fake_rotary
46
  fake_flash_attn.layers = fake_layers
47
 
 
 
 
48
  sys.modules.update({
49
  "flash_attn": fake_flash_attn,
50
  "flash_attn.layers": fake_layers,
51
  "flash_attn.layers.rotary": fake_rotary,
52
  })
53
  else:
54
- # GPU ็Žฏๅขƒ๏ผšๅฆ‚ๆœ‰้œ€่ฆๅฎ‰่ฃ… flash-attn๏ผˆๅฎ‰่ฃ…ๅคฑ่ดฅไธ่‡ดๅ‘ฝ๏ผ‰
55
  try:
56
  subprocess.run(
57
  "pip install flash-attn==2.6.3 --no-build-isolation",
58
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
59
- shell=True,
60
- check=True,
61
- )
62
  except subprocess.CalledProcessError:
63
- print("[WARN] flash-attn ๅฎ‰่ฃ…ๅคฑ่ดฅ๏ผŒๆŽจ็†ๆ€ง่ƒฝๅฏ่ƒฝๅ—ๅฝฑๅ“ใ€‚")
64
 
65
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
  # โ‘ก ๅธธ่ง„ไพ่ต–
67
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
  from PIL import Image
69
  import gradio as gr
70
  import spaces
71
  from transformers import AutoModelForCausalLM
72
 
73
- # ็”จๆˆท่‡ชๅฎšไน‰็ฎก็บฟ่„šๆœฌ๏ผˆไฟๆŒๅŽŸๆœ‰่ทฏๅพ„๏ผ‰
74
  from test_img_edit import pipe_img_edit
75
  from test_img_to_txt import pipe_txt_gen
76
  from test_txt_to_img import pipe_t2i
77
 
78
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
79
  # โ‘ข ๅทฅๅ…ทๅ‡ฝๆ•ฐ & ๅธธ้‡
80
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
81
  MAX_SEED = 10_000
82
 
83
- def set_global_seed(seed: int = 42) -> None:
84
- """็ปŸไธ€่ฎพ็ฝฎ้šๆœบ็งๅญ๏ผˆCPU / CUDA ่‡ช้€‚ๅบ”๏ผ‰"""
85
- random.seed(seed)
86
- np.random.seed(seed)
87
- torch.manual_seed(seed)
88
  if torch.cuda.is_available():
89
  torch.cuda.manual_seed_all(seed)
90
 
91
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
92
- """ๆ นๆฎๅค้€‰ๆก†ๅ†ณๅฎšๆ˜ฏๅฆ้šๆœบ็งๅญ"""
93
  return random.randint(0, MAX_SEED) if randomize else seed
94
 
95
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
96
  # โ‘ฃ ๅŠ ่ฝฝๆจกๅž‹
97
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
98
- HF_TOKEN = os.getenv("HF_TOKEN") # ็งๆœ‰ไป“ๅบ“่ฏทๅœจ Space Secret ่ฎพ็ฝฎ
99
- MODEL_ID = "AIDC-AI/Ovis-U1-3B"
100
 
101
  print(f"[INFO] Loading {MODEL_ID} on {DEVICE} โ€ฆ")
102
  model = AutoModelForCausalLM.from_pretrained(
103
  MODEL_ID,
104
  torch_dtype=DTYPE,
105
- low_cpu_mem_usage=True, # ๅ‡ไฝŽ RSS
106
- device_map="auto", # CPU ็Žฏๅขƒๅ…จ้ƒจๆ”พ CPU
107
  token=HF_TOKEN,
108
  trust_remote_code=True
109
  ).eval()
110
  print("[INFO] Model ready!")
111
 
112
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
113
  # โ‘ค ๆŽจ็†ๅฐ่ฃ…
114
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
115
- def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
116
- final_seed: int, guidance_scale: float,
117
- progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
118
- set_global_seed(final_seed)
119
- return pipe_t2i(model, prompt, height, width, steps,
120
- cfg=guidance_scale, seed=final_seed)
121
 
122
- def process_img_to_txt(prompt: str, img: Image.Image,
123
- progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
124
  return pipe_txt_gen(model, img, prompt)
125
 
126
- def process_img_txt_to_img(prompt: str, img: Image.Image, steps: int,
127
- final_seed: int, txt_cfg: float, img_cfg: float,
128
- progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
129
- set_global_seed(final_seed)
130
- return pipe_img_edit(model, img, prompt, steps,
131
- txt_cfg, img_cfg, seed=final_seed)
132
 
133
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
134
- # โ‘ฅ Gradio UI
135
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
136
  with gr.Blocks(title="Ovis-U1-3B (CPU/GPU adaptive)") as demo:
137
  gr.Markdown("# Ovis-U1-3B\nๅคšๆจกๆ€ๆ–‡ๆœฌ-๏ฟฝ๏ฟฝๅƒ DEMO๏ผˆCPU/GPU ่‡ช้€‚ๅบ”็‰ˆ๏ผ‰")
138
 
139
  with gr.Row():
140
- # -------- ๅทฆไพง๏ผš่พ“ๅ…ฅๅŒบ --------
141
  with gr.Column():
142
  with gr.Tabs():
143
- # โ”€โ”€ Tab 1: Image + Text โ†’ Image โ”€โ”€
144
  with gr.TabItem("Image + Text โ†’ Image"):
145
  edit_image_input = gr.Image(label="Input Image", type="pil")
146
  with gr.Row():
147
- edit_prompt_input = gr.Textbox(
148
- label="Prompt",
149
- show_label=False,
150
- placeholder="Describe the editing instructionโ€ฆ",
151
- container=False,
152
- lines=1
153
- )
154
  run_edit_image_btn = gr.Button("Run", scale=0)
155
-
156
  with gr.Accordion("Advanced Settings", open=False):
157
  with gr.Row():
158
- edit_img_guidance_slider = gr.Slider(
159
- label="Image Guidance Scale",
160
- minimum=1.0, maximum=10.0, step=0.1, value=1.5
161
- )
162
- edit_txt_guidance_slider = gr.Slider(
163
- label="Text Guidance Scale",
164
- minimum=1.0, maximum=30.0, step=0.5, value=6.0
165
- )
166
- edit_num_steps_slider = gr.Slider(
167
- label="Steps", minimum=40, maximum=100, value=50, step=1
168
- )
169
- edit_seed_slider = gr.Slider(
170
- label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
171
- )
172
- edit_randomize_checkbox = gr.Checkbox(
173
- label="Randomize seed", value=False
174
- )
175
-
176
- gr.Examples(
177
- examples=[
178
- ["imgs/train.png", "Modify this image in a Ghibli style."],
179
- ["imgs/chair.png", "Transfer the image into a faceted low-poly 3-D render style."],
180
- ["imgs/car.png", "Replace the tiny house on wheels in the image with a vintage car."],
181
- ],
182
- inputs=[edit_image_input, edit_prompt_input],
183
- cache_examples=False,
184
- label="Image Editing Examples"
185
- )
186
-
187
- # โ”€โ”€ Tab 2: Text โ†’ Image โ”€โ”€
188
  with gr.TabItem("Text โ†’ Image"):
189
- with gr.Row():
190
- prompt_gen_input = gr.Textbox(
191
- label="Prompt", show_label=False,
192
- placeholder="Describe the image you wantโ€ฆ",
193
- container=False, lines=1
194
- )
195
- run_image_gen_btn = gr.Button("Run", scale=0)
196
-
197
  with gr.Accordion("Advanced Settings", open=False):
198
  with gr.Row():
199
- height_slider = gr.Slider(
200
- label="height", minimum=256, maximum=1536,
201
- value=1024, step=32
202
- )
203
- width_slider = gr.Slider(
204
- label="width", minimum=256, maximum=1536,
205
- value=1024, step=32
206
- )
207
- guidance_slider = gr.Slider(
208
- label="Guidance Scale", minimum=1.0,
209
- maximum=30.0, step=0.5, value=5.0
210
- )
211
- num_steps_slider = gr.Slider(
212
- label="Steps", minimum=40, maximum=100, value=50, step=1
213
- )
214
- seed_slider = gr.Slider(
215
- label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
216
- )
217
- randomize_checkbox = gr.Checkbox(
218
- label="Randomize seed", value=False
219
- )
220
-
221
- gr.Examples(
222
- examples=[
223
- ["A breathtaking fairy with teal wings sits gracefully on a lotus flower in a serene pond, exuding elegance."],
224
- ["A winter mountain landscape at deep night with snowy terrain and colorful flowers, portrayed as an anime background illustration."],
225
- ["A photo of a pug wearing a cowboy hat and bandana, sitting on a hay bale."]
226
- ],
227
- inputs=[prompt_gen_input],
228
- cache_examples=False,
229
- label="Image Generation Examples"
230
- )
231
-
232
- # โ”€โ”€ Tab 3: Image โ†’ Text โ”€โ”€
233
  with gr.TabItem("Image โ†’ Text"):
234
- image_understand_input = gr.Image(label="Input Image", type="pil")
235
- with gr.Row():
236
- prompt_understand_input = gr.Textbox(
237
- label="Prompt", show_label=False,
238
- placeholder="Describe the question about imageโ€ฆ",
239
- container=False, lines=1
240
- )
241
- run_image_understand_btn = gr.Button("Run", scale=0)
242
 
243
- gr.Examples(
244
- examples=[
245
- ["imgs/table.webp", "In what scenario does this picture take place?"],
246
- ["imgs/count.png", "How many broccoli are there in the picture?"],
247
- ["imgs/foot.webp", "Where is this picture located?"],
248
- ],
249
- inputs=[image_understand_input, prompt_understand_input],
250
- cache_examples=False,
251
- label="Image Understanding Examples"
252
- )
253
-
254
- clean_btn = gr.Button("Clear All Inputs / Outputs")
255
-
256
- # -------- ๅณไพง๏ผš่พ“ๅ‡บๅŒบ --------
257
  with gr.Column():
258
- output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True)
259
- output_text = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
260
 
261
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ไบ‹ไปถ็ป‘ๅฎš โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
262
- def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
263
- progress=gr.Progress(track_tqdm=True)):
264
  if img is None:
265
- return (gr.update(value=[], visible=False),
266
- gr.update(value="Please upload an image for editing.", visible=True))
267
  imgs = process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg, progress)
268
- return (gr.update(value=imgs, visible=True),
269
- gr.update(value="", visible=False))
270
 
271
- def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
272
- progress=gr.Progress(track_tqdm=True)):
273
- imgs = process_txt_to_img(prompt, height, width, steps, seed, guidance, progress)
274
- return (gr.update(value=imgs, visible=True),
275
- gr.update(value="", visible=False))
276
 
277
- def run_img_to_txt_tab(img, prompt,
278
- progress=gr.Progress(track_tqdm=True)):
279
  if img is None:
280
- return (gr.update(value=[], visible=False),
281
- gr.update(value="Please upload an image for understanding.", visible=True))
282
- txt = process_img_to_txt(prompt, img, progress)
283
- return (gr.update(value=[], visible=False),
284
- gr.update(value=txt, visible=True))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
- def clean_all_fn():
287
- """้‡็ฝฎๅ…จ้ƒจ่พ“ๅ…ฅ/่พ“ๅ‡บๆŽงไปถ"""
288
  return (
289
- # Tab 1
290
- gr.update(value=None), gr.update(value=""),
291
- gr.update(value=1.5), gr.update(value=6.0),
292
- gr.update(value=50), gr.update(value=42),
293
- gr.update(value=False),
294
- # Tab 2
295
- gr.update(value=""), gr.update(value=1024),
296
- gr.update(value=1024), gr.update(value=5.0),
297
- gr.update(value=50), gr.update(value=42),
298
- gr.update(value=False),
299
- # Tab 3
300
  gr.update(value=None), gr.update(value=""),
301
- # Outputs
302
- gr.update(value=[], visible=True),
303
- gr.update(value="", visible=False)
304
  )
305
-
306
- # ------ Tab 1 ็ป‘ๅฎš ------
307
- edit_inputs = [
308
- edit_prompt_input, edit_image_input,
309
- edit_num_steps_slider, edit_seed_slider,
310
- edit_txt_guidance_slider, edit_img_guidance_slider
311
- ]
312
- run_edit_image_btn.click(randomize_seed_fn,
313
- [edit_seed_slider, edit_randomize_checkbox],
314
- [edit_seed_slider]).then(
315
- run_img_txt_to_img_tab, edit_inputs,
316
- [output_gallery, output_text]
317
- )
318
- edit_prompt_input.submit(randomize_seed_fn,
319
- [edit_seed_slider, edit_randomize_checkbox],
320
- [edit_seed_slider]).then(
321
- run_img_txt_to_img_tab, edit_inputs,
322
- [output_gallery, output_text]
323
- )
324
-
325
- # ------ Tab 2 ็ป‘ๅฎš ------
326
- gen_inputs = [
327
- prompt_gen_input, height_slider, width_slider,
328
- num_steps_slider, seed_slider, guidance_slider
329
- ]
330
- run_image_gen_btn.click(randomize_seed_fn,
331
- [seed_slider, randomize_checkbox],
332
- [seed_slider]).then(
333
- run_txt_to_img_tab, gen_inputs,
334
- [output_gallery, output_text]
335
- )
336
- prompt_gen_input.submit(randomize_seed_fn,
337
- [seed_slider, randomize_checkbox],
338
- [seed_slider]).then(
339
- run_txt_to_img_tab, gen_inputs,
340
- [output_gallery, output_text]
341
- )
342
-
343
- # ------ Tab 3 ็ป‘ๅฎš ------
344
- understand_inputs = [image_understand_input, prompt_understand_input]
345
- run_image_understand_btn.click(run_img_to_txt_tab,
346
- understand_inputs,
347
- [output_gallery, output_text])
348
- prompt_understand_input.submit(run_img_to_txt_tab,
349
- understand_inputs,
350
- [output_gallery, output_text])
351
-
352
- # ๆธ…็ฉบ
353
- clean_btn.click(clean_all_fn, [], [
354
- edit_image_input, edit_prompt_input, edit_img_guidance_slider,
355
- edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
356
- edit_randomize_checkbox, prompt_gen_input, height_slider,
357
- width_slider, guidance_slider, num_steps_slider, seed_slider,
358
- randomize_checkbox, image_understand_input, prompt_understand_input,
359
- output_gallery, output_text
360
  ])
361
 
362
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
363
- # โ‘ฆ ๅฏๅŠจ Space
364
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
365
  if __name__ == "__main__":
366
- # HF Spaces ้ป˜่ฎค็›‘ๅฌ 0.0.0.0:7860๏ผŒๆ— ้œ€ share=True
367
  demo.launch()
 
5
  ไพ่ต–๏ผšPython 3.10+ใ€torch 2.*ใ€transformers 4.41.*ใ€gradio 4.*
6
  """
7
 
8
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ # โ‘  ๅœจไปปไฝ• transformers / flash_attn ๅฏผๅ…ฅไน‹ๅ‰ๅค„็†็Žฏๅขƒ
10
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
+ import os, sys, types, subprocess, random, numpy as np, torch
12
+ import importlib.util # โ˜… ๆ–ฐๅขž๏ผš็”จไบŽ็”Ÿๆˆ ModuleSpec
 
 
 
 
 
13
 
 
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+ DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
16
 
17
+ # -------- CPU ็Žฏๅขƒ๏ผšๅฑ่”ฝ flash-attn --------
18
  if DEVICE == "cpu":
19
+ # ๅธ่ฝฝๆฝœๅœจ็š„ flash-attn
20
+ subprocess.run("pip uninstall -y flash-attn",
21
+ shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
22
 
23
+ # ๆž„้€ ็ฉบๅฃณๆจกๅ—
24
  fake_flash_attn = types.ModuleType("flash_attn")
25
  fake_layers = types.ModuleType("flash_attn.layers")
26
  fake_rotary = types.ModuleType("flash_attn.layers.rotary")
27
 
28
  def _cpu_apply_rotary_emb(x, cos, sin):
29
+ """็บฏ CPU ็š„ๆ—‹่ฝฌไฝ็ฝฎ็ผ–็ ๏ผˆ็ฎ€ๆ˜“ๅฎž็Žฐ๏ผ‰"""
30
  x1, x2 = x[..., ::2], x[..., 1::2]
31
  rot_x1 = x1 * cos - x2 * sin
32
  rot_x2 = x1 * sin + x2 * cos
 
39
  fake_layers.rotary = fake_rotary
40
  fake_flash_attn.layers = fake_layers
41
 
42
+ # โ˜… ๆ–ฐๅขž๏ผšไธบ็ฉบๅฃณๆจกๅ—่กฅๅ……ๅˆๆณ•็š„ __spec__
43
+ fake_flash_attn.__spec__ = importlib.util.spec_from_loader("flash_attn", loader=None)
44
+
45
  sys.modules.update({
46
  "flash_attn": fake_flash_attn,
47
  "flash_attn.layers": fake_layers,
48
  "flash_attn.layers.rotary": fake_rotary,
49
  })
50
  else:
51
+ # GPU ็Žฏๅขƒ๏ผšๅฐ่ฏ•ๅฎ‰่ฃ… flash-attn
52
  try:
53
  subprocess.run(
54
  "pip install flash-attn==2.6.3 --no-build-isolation",
55
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
56
+ shell=True, check=True)
 
 
57
  except subprocess.CalledProcessError:
58
+ print("[WARN] flash-attn ๅฎ‰่ฃ…ๅคฑ่ดฅ๏ผŒGPU ๅŠ ้€ŸๅŠŸ่ƒฝๅ—้™ใ€‚")
59
 
60
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
61
  # โ‘ก ๅธธ่ง„ไพ่ต–
62
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
63
  from PIL import Image
64
  import gradio as gr
65
  import spaces
66
  from transformers import AutoModelForCausalLM
67
 
 
68
  from test_img_edit import pipe_img_edit
69
  from test_img_to_txt import pipe_txt_gen
70
  from test_txt_to_img import pipe_t2i
71
 
72
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
73
  # โ‘ข ๅทฅๅ…ทๅ‡ฝๆ•ฐ & ๅธธ้‡
74
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
75
  MAX_SEED = 10_000
76
 
77
+ def set_global_seed(seed: int = 42):
78
+ random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
 
 
 
79
  if torch.cuda.is_available():
80
  torch.cuda.manual_seed_all(seed)
81
 
82
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
 
83
  return random.randint(0, MAX_SEED) if randomize else seed
84
 
85
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
86
  # โ‘ฃ ๅŠ ่ฝฝๆจกๅž‹
87
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
88
+ HF_TOKEN = os.getenv("HF_TOKEN")
89
+ MODEL_ID = "AIDC-AI/Ovis-U1-3B"
90
 
91
  print(f"[INFO] Loading {MODEL_ID} on {DEVICE} โ€ฆ")
92
  model = AutoModelForCausalLM.from_pretrained(
93
  MODEL_ID,
94
  torch_dtype=DTYPE,
95
+ low_cpu_mem_usage=True,
96
+ device_map="auto",
97
  token=HF_TOKEN,
98
  trust_remote_code=True
99
  ).eval()
100
  print("[INFO] Model ready!")
101
 
102
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
103
  # โ‘ค ๆŽจ็†ๅฐ่ฃ…
104
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
105
+ def process_txt_to_img(prompt, height, width, steps, seed, cfg,
106
+ progress=gr.Progress(track_tqdm=True)):
107
+ set_global_seed(seed)
108
+ return pipe_t2i(model, prompt, height, width, steps, cfg=cfg, seed=seed)
 
 
109
 
110
+ def process_img_to_txt(prompt, img, progress=gr.Progress(track_tqdm=True)):
 
111
  return pipe_txt_gen(model, img, prompt)
112
 
113
+ def process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg,
114
+ progress=gr.Progress(track_tqdm=True)):
115
+ set_global_seed(seed)
116
+ return pipe_img_edit(model, img, prompt, steps, txt_cfg, img_cfg, seed=seed)
 
 
117
 
118
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
119
+ # โ‘ฅ Gradio UI๏ผˆไธŽๅ‰็‰ˆไธ€่‡ด๏ผŒๆญคๅค„็œ็•ฅไฟฎๆ”นๆ ‡่ฎฐ๏ผ‰
120
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
121
  with gr.Blocks(title="Ovis-U1-3B (CPU/GPU adaptive)") as demo:
122
  gr.Markdown("# Ovis-U1-3B\nๅคšๆจกๆ€ๆ–‡ๆœฌ-๏ฟฝ๏ฟฝๅƒ DEMO๏ผˆCPU/GPU ่‡ช้€‚ๅบ”็‰ˆ๏ผ‰")
123
 
124
  with gr.Row():
 
125
  with gr.Column():
126
  with gr.Tabs():
127
+ # Tab 1: Image + Text โ†’ Image
128
  with gr.TabItem("Image + Text โ†’ Image"):
129
  edit_image_input = gr.Image(label="Input Image", type="pil")
130
  with gr.Row():
131
+ edit_prompt_input = gr.Textbox(show_label=False, placeholder="Describe the editing instructionโ€ฆ")
 
 
 
 
 
 
132
  run_edit_image_btn = gr.Button("Run", scale=0)
 
133
  with gr.Accordion("Advanced Settings", open=False):
134
  with gr.Row():
135
+ edit_img_guidance = gr.Slider(label="Image Guidance", minimum=1, maximum=10, value=1.5, step=0.1)
136
+ edit_txt_guidance = gr.Slider(label="Text Guidance", minimum=1, maximum=30, value=6.0, step=0.5)
137
+ edit_steps = gr.Slider(label="Steps", minimum=40, maximum=100, value=50, step=1)
138
+ edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=42, step=1)
139
+ edit_random = gr.Checkbox(label="Randomize seed", value=False)
140
+ # Tab 2: Text โ†’ Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  with gr.TabItem("Text โ†’ Image"):
142
+ prompt_gen = gr.Textbox(show_label=False, placeholder="Describe the image you wantโ€ฆ")
143
+ run_gen_btn = gr.Button("Run", scale=0)
 
 
 
 
 
 
144
  with gr.Accordion("Advanced Settings", open=False):
145
  with gr.Row():
146
+ height_slider = gr.Slider(label="height", minimum=256, maximum=1536, value=1024, step=32)
147
+ width_slider = gr.Slider(label="width", minimum=256, maximum=1536, value=1024, step=32)
148
+ guidance_slider = gr.Slider(label="Guidance Scale", minimum=1, maximum=30, value=5, step=0.5)
149
+ steps_slider = gr.Slider(label="Steps", minimum=40, maximum=100, value=50, step=1)
150
+ seed_slider = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=42, step=1)
151
+ random_check = gr.Checkbox(label="Randomize seed", value=False)
152
+ # Tab 3: Image โ†’ Text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  with gr.TabItem("Image โ†’ Text"):
154
+ understand_img = gr.Image(label="Input Image", type="pil")
155
+ understand_prompt = gr.Textbox(show_label=False, placeholder="Describe the question about imageโ€ฆ")
156
+ run_understand = gr.Button("Run", scale=0)
157
+ clear_btn = gr.Button("Clear All")
 
 
 
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  with gr.Column():
160
+ gallery = gr.Gallery(label="Generated Images", columns=2, visible=True)
161
+ txt_out = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
162
 
163
+ # ไบ‹ไปถ็ป‘ๅฎš๏ผˆไธŽไธŠไธ€็‰ˆ็›ธๅŒ๏ผŒ็œ็•ฅ้‡ๅคๆณจ้‡Š๏ผ‰
164
+ def run_tab1(prompt, img, steps, seed, txt_cfg, img_cfg, progress=gr.Progress(track_tqdm=True)):
 
165
  if img is None:
166
+ return gr.update(value=[], visible=False), gr.update(value="Please upload an image.", visible=True)
 
167
  imgs = process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg, progress)
168
+ return gr.update(value=imgs, visible=True), gr.update(value="", visible=False)
 
169
 
170
+ def run_tab2(prompt, h, w, steps, seed, guidance, progress=gr.Progress(track_tqdm=True)):
171
+ imgs = process_txt_to_img(prompt, h, w, steps, seed, guidance, progress)
172
+ return gr.update(value=imgs, visible=True), gr.update(value="", visible=False)
 
 
173
 
174
+ def run_tab3(img, prompt, progress=gr.Progress(track_tqdm=True)):
 
175
  if img is None:
176
+ return gr.update(value=[], visible=False), gr.update(value="Please upload an image.", visible=True)
177
+ text = process_img_to_txt(prompt, img, progress)
178
+ return gr.update(value=[], visible=False), gr.update(value=text, visible=True)
179
+
180
+ # Tab1 ็ป‘ๅฎš
181
+ run_edit_image_btn.click(randomize_seed_fn, [edit_seed, edit_random], [edit_seed]).then(
182
+ run_tab1,
183
+ [edit_prompt_input, edit_image_input, edit_steps, edit_seed, edit_txt_guidance, edit_img_guidance],
184
+ [gallery, txt_out])
185
+
186
+ edit_prompt_input.submit(randomize_seed_fn, [edit_seed, edit_random], [edit_seed]).then(
187
+ run_tab1,
188
+ [edit_prompt_input, edit_image_input, edit_steps, edit_seed, edit_txt_guidance, edit_img_guidance],
189
+ [gallery, txt_out])
190
+
191
+ # Tab2 ็ป‘ๅฎš
192
+ run_gen_btn.click(randomize_seed_fn, [seed_slider, random_check], [seed_slider]).then(
193
+ run_tab2,
194
+ [prompt_gen, height_slider, width_slider, steps_slider, seed_slider, guidance_slider],
195
+ [gallery, txt_out])
196
+
197
+ prompt_gen.submit(randomize_seed_fn, [seed_slider, random_check], [seed_slider]).then(
198
+ run_tab2,
199
+ [prompt_gen, height_slider, width_slider, steps_slider, seed_slider, guidance_slider],
200
+ [gallery, txt_out])
201
+
202
+ # Tab3 ็ป‘ๅฎš
203
+ run_understand.click(run_tab3, [understand_img, understand_prompt], [gallery, txt_out])
204
+ understand_prompt.submit(run_tab3, [understand_img, understand_prompt], [gallery, txt_out])
205
 
206
+ # ๆธ…็ฉบ
207
+ def clear_all():
208
  return (
209
+ gr.update(value=None), gr.update(value=""), gr.update(value=1.5), gr.update(value=6.0),
210
+ gr.update(value=50), gr.update(value=42), gr.update(value=False),
211
+ gr.update(value=""), gr.update(value=1024), gr.update(value=1024),
212
+ gr.update(value=5), gr.update(value=50), gr.update(value=42), gr.update(value=False),
 
 
 
 
 
 
 
213
  gr.update(value=None), gr.update(value=""),
214
+ gr.update(value=[], visible=True), gr.update(value="", visible=False)
 
 
215
  )
216
+ clear_btn.click(clear_all, [], [
217
+ edit_image_input, edit_prompt_input, edit_img_guidance, edit_txt_guidance,
218
+ edit_steps, edit_seed, edit_random, prompt_gen, height_slider, width_slider,
219
+ guidance_slider, steps_slider, seed_slider, random_check, understand_img,
220
+ understand_prompt, gallery, txt_out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  ])
222
 
223
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
224
+ # โ‘ฆ ๅฏๅŠจ
225
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
226
  if __name__ == "__main__":
 
227
  demo.launch()