innoai commited on
Commit
d53478b
·
verified ·
1 Parent(s): ff3266f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -194
app.py CHANGED
@@ -1,70 +1,109 @@
 
 
 
 
 
 
 
 
1
  import os
2
  import subprocess
3
- subprocess.run('pip install flash-attn==2.6.3 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
  import random
5
- import spaces
6
  import numpy as np
7
  import torch
8
  from PIL import Image
9
  import gradio as gr
 
10
  from transformers import AutoModelForCausalLM
11
- from test_img_edit import pipe_img_edit
12
- from test_img_to_txt import pipe_txt_gen
13
- from test_txt_to_img import pipe_t2i
14
-
15
-
16
- # Constants
17
- MAX_SEED = 10000
18
 
19
- hf_token = os.getenv("HF_TOKEN")
20
-
21
- HUB_MODEL_ID = "AIDC-AI/Ovis-U1-3B"
22
- model, loading_info = AutoModelForCausalLM.from_pretrained(
23
- HUB_MODEL_ID,
24
- torch_dtype=torch.bfloat16,
25
- output_loading_info=True,
26
- token=hf_token,
27
- trust_remote_code=True
28
  )
29
- print(f'Loading info of Ovis-U1:\n{loading_info}')
30
-
31
- model = model.eval().to("cuda")
32
- model = model.to(torch.bfloat16)
33
-
34
- def set_global_seed(seed: int = 42):
 
 
 
 
 
 
35
  random.seed(seed)
36
  np.random.seed(seed)
37
  torch.manual_seed(seed)
38
- torch.cuda.manual_seed_all(seed)
 
39
 
40
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
 
41
  return random.randint(0, MAX_SEED) if randomize else seed
42
 
43
- @spaces.GPU
44
- def process_txt_to_img(prompt: str, height: int, width: int, steps: int, final_seed: int, guidance_scale: float, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  set_global_seed(final_seed)
46
- images = pipe_t2i(model, prompt, height, width, steps, cfg=guidance_scale, seed=final_seed)
47
- return images
48
 
49
- @spaces.GPU
50
- def process_img_to_txt(prompt: str, img: Image.Image, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
51
- output_text = pipe_txt_gen(model, img, prompt)
52
- return output_text
53
 
54
- @spaces.GPU
55
- def process_img_txt_to_img(prompt: str, img: Image.Image, steps: int, final_seed: int, txt_cfg: float, img_cfg: float, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
 
56
  set_global_seed(final_seed)
57
- images = pipe_img_edit(model, img, prompt, steps, txt_cfg, img_cfg, seed=final_seed)
58
- return images
59
 
 
60
  # Gradio UI
61
- with gr.Blocks(title="Ovis-U1-3B") as demo:
62
- gr.Markdown('''# Ovis-U1-3B
63
- ''')
64
 
65
  with gr.Row():
66
  with gr.Column():
67
  with gr.Tabs():
 
68
  with gr.TabItem("Image + Text → Image"):
69
  edit_image_input = gr.Image(label="Input Image", type="pil")
70
  with gr.Row():
@@ -78,47 +117,38 @@ with gr.Blocks(title="Ovis-U1-3B") as demo:
78
  run_edit_image_btn = gr.Button("Run", scale=0)
79
 
80
  with gr.Accordion("Advanced Settings", open=False):
81
-
82
  with gr.Row():
83
-
84
  edit_img_guidance_slider = gr.Slider(
85
  label="Image Guidance Scale",
86
- minimum=1.0, maximum=10.0,
87
- step=0.1, value=1.5
88
  )
89
-
90
  edit_txt_guidance_slider = gr.Slider(
91
  label="Text Guidance Scale",
92
- minimum=1.0, maximum=30.0,
93
- step=0.5, value=6.0
94
  )
95
-
96
  edit_num_steps_slider = gr.Slider(
97
- label='Steps',
98
- minimum=40, maximum=100,
99
- value=50, step=1
100
  )
101
  edit_seed_slider = gr.Slider(
102
- label="Seed",
103
- minimum=0, maximum=int(MAX_SEED),
104
  step=1, value=42
105
  )
106
  edit_randomize_checkbox = gr.Checkbox(
107
  label="Randomize seed", value=False
108
  )
109
 
110
- img_edit_examples_data = [
111
- ["imgs/train.png", "Modify this image in a Ghibli style. "],
112
- ["imgs/chair.png", "Transfer the image into a faceted low-poly 3-D render style."],
113
- ["imgs/car.png", "Replace the tiny house on wheels in the image with a vintage car."],
114
- ]
115
  gr.Examples(
116
- examples=img_edit_examples_data,
 
 
 
 
117
  inputs=[edit_image_input, edit_prompt_input],
118
- cache_examples=False,
119
  label="Image Editing Examples"
120
  )
121
 
 
122
  with gr.TabItem("Text → Image"):
123
  with gr.Row():
124
  prompt_gen_input = gr.Textbox(
@@ -133,212 +163,188 @@ with gr.Blocks(title="Ovis-U1-3B") as demo:
133
  with gr.Accordion("Advanced Settings", open=False):
134
  with gr.Row():
135
  height_slider = gr.Slider(
136
- label='height',
137
- minimum=256, maximum=1536,
138
  value=1024, step=32
139
  )
140
  width_slider = gr.Slider(
141
- label='width',
142
- minimum=256, maximum=1536,
143
  value=1024, step=32
144
  )
145
-
146
  guidance_slider = gr.Slider(
147
  label="Guidance Scale",
148
- minimum=1.0, maximum=30.0,
149
- step=0.5, value=5.0
150
  )
151
-
152
  num_steps_slider = gr.Slider(
153
- label='Steps',
154
- minimum=40, maximum=100,
155
- value=50, step=1
156
  )
157
  seed_slider = gr.Slider(
158
- label="Seed",
159
- minimum=0, maximum=int(MAX_SEED),
160
  step=1, value=42
161
  )
162
  randomize_checkbox = gr.Checkbox(
163
  label="Randomize seed", value=False
164
  )
165
 
166
- text_gen_examples_data = [
167
- ["A breathtaking fairy with teal wings sits gracefully on a lotus flower in a serene pond, exuding elegance."],
168
- ["A winter mountain landscape at deep night with snowy terrain and colorful flowers, under beautiful clouds and no people, portrayed as an anime background illustration with intricate detail and sharp focus."],
169
- ["A photo of a pug wearing a cowboy hat and bandana, sitting on a hay bale."]
170
- ]
171
  gr.Examples(
172
- examples=text_gen_examples_data,
 
 
 
 
173
  inputs=[prompt_gen_input],
174
- cache_examples=False,
175
  label="Image Generation Examples"
176
  )
177
 
 
178
  with gr.TabItem("Image → Text"):
179
  image_understand_input = gr.Image(label="Input Image", type="pil")
180
  with gr.Row():
181
  prompt_understand_input = gr.Textbox(
182
- label="Prompt",
183
- show_label=False,
184
  placeholder="Describe the question about image...",
185
- container=False,
186
- lines=1
187
  )
188
  run_image_understand_btn = gr.Button("Run", scale=0)
189
 
190
- image_understanding_examples_data = [
191
- ["imgs/table.webp", "In what scenario does this picture take place?"],
192
- ["imgs/count.png", "How many broccoli are there in the picture?"],
193
- ["imgs/foot.webp", "Where is this picture located?"],
194
- ]
195
  gr.Examples(
196
- examples=image_understanding_examples_data,
 
 
 
 
197
  inputs=[image_understand_input, prompt_understand_input],
198
- cache_examples=False,
199
  label="Image Understanding Examples"
200
  )
201
-
202
- clean_btn = gr.Button("Clear All Inputs/Outputs")
203
 
204
- with gr.Column():
205
- output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True) # Default to visible, content will control
206
- output_text = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
207
 
208
- @spaces.GPU
209
- def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
210
  if img is None:
211
  return (
212
  gr.update(value=[], visible=False),
213
  gr.update(value="Please upload an image for editing.", visible=True)
214
  )
215
- # Seed is already finalized by the randomize_seed_fn in the click chain
216
- imgs = process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg, progress=progress)
217
  return (
218
  gr.update(value=imgs, visible=True),
219
  gr.update(value="", visible=False)
220
  )
221
 
222
- @spaces.GPU
223
- def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance, progress=gr.Progress(track_tqdm=True)):
224
- # Seed is already finalized by the randomize_seed_fn in the click chain
225
- imgs = process_txt_to_img(prompt, height, width, steps, seed, guidance, progress=progress)
226
  return (
227
  gr.update(value=imgs, visible=True),
228
  gr.update(value="", visible=False)
229
  )
230
 
231
- @spaces.GPU
232
- def run_img_to_txt_tab(img, prompt, progress=gr.Progress(track_tqdm=True)):
233
  if img is None:
234
  return (
235
  gr.update(value=[], visible=False),
236
  gr.update(value="Please upload an image for understanding.", visible=True)
237
  )
238
- txt = process_img_to_txt(prompt, img, progress=progress)
239
  return (
240
  gr.update(value=[], visible=False),
241
  gr.update(value=txt, visible=True)
242
  )
243
 
244
  def clean_all_fn():
 
245
  return (
246
- # Tab 1 inputs
247
- gr.update(value=None),
248
- gr.update(value=""),
249
- gr.update(value=1.5),
250
- gr.update(value=6.0),
251
- gr.update(value=50),
252
- gr.update(value=42),
253
  gr.update(value=False),
254
- # Tab 2 inputs
255
- gr.update(value=""), # prompt_gen_input
256
- gr.update(value=1024),
257
- gr.update(value=1024),
258
- gr.update(value=5.0),
259
- gr.update(value=50),
260
- gr.update(value=42), # seed_slider
261
- gr.update(value=False), # randomize_checkbox
262
- # Tab 3 inputs
263
- gr.update(value=None), # image_understand_input
264
- gr.update(value=""), # prompt_understand_input
265
  # Outputs
266
- gr.update(value=[], visible=True), # output_gallery (reset and keep visible for next gen)
267
- gr.update(value="", visible=False) # output_text (reset and hide)
268
  )
269
 
270
- # Event listeners for Image + Text -> Image
271
- edit_inputs = [edit_prompt_input, edit_image_input, edit_num_steps_slider, edit_seed_slider, edit_txt_guidance_slider, edit_img_guidance_slider]
272
-
273
- run_edit_image_btn.click(
274
- fn=randomize_seed_fn,
275
- inputs=[edit_seed_slider, edit_randomize_checkbox],
276
- outputs=[edit_seed_slider]
277
- ).then(
278
- fn=run_img_txt_to_img_tab,
279
- inputs=edit_inputs,
280
- outputs=[output_gallery, output_text]
281
  )
282
-
283
- edit_prompt_input.submit(
284
- fn=randomize_seed_fn,
285
- inputs=[edit_seed_slider, edit_randomize_checkbox],
286
- outputs=[edit_seed_slider]
287
- ).then(
288
- fn=run_img_txt_to_img_tab,
289
- inputs=edit_inputs,
290
- outputs=[output_gallery, output_text]
291
  )
292
 
293
- # Event listeners for Text -> Image
294
- gen_inputs = [prompt_gen_input, height_slider, width_slider, num_steps_slider, seed_slider, guidance_slider]
295
-
296
- run_image_gen_btn.click(
297
- fn=randomize_seed_fn,
298
- inputs=[seed_slider, randomize_checkbox],
299
- outputs=[seed_slider]
300
- ).then(
301
- fn=run_txt_to_img_tab,
302
- inputs=gen_inputs,
303
- outputs=[output_gallery, output_text]
304
  )
305
-
306
- prompt_gen_input.submit(
307
- fn=randomize_seed_fn,
308
- inputs=[seed_slider, randomize_checkbox],
309
- outputs=[seed_slider]
310
- ).then(
311
- fn=run_txt_to_img_tab,
312
- inputs=gen_inputs,
313
- outputs=[output_gallery, output_text]
314
  )
315
 
316
- # Event listeners for Image -> Text
317
  understand_inputs = [image_understand_input, prompt_understand_input]
318
-
319
- run_image_understand_btn.click(
320
- fn=run_img_to_txt_tab,
321
- inputs=understand_inputs,
322
- outputs=[output_gallery, output_text]
323
- )
324
-
325
- prompt_understand_input.submit(
326
- fn=run_img_to_txt_tab,
327
- inputs=understand_inputs,
328
- outputs=[output_gallery, output_text]
329
- )
330
-
331
- clean_btn.click(
332
- fn=clean_all_fn,
333
- inputs=[],
334
- outputs=[
335
- edit_image_input, edit_prompt_input, edit_img_guidance_slider, edit_txt_guidance_slider,
336
- edit_num_steps_slider, edit_seed_slider, edit_randomize_checkbox,
337
- prompt_gen_input, height_slider, width_slider, guidance_slider, num_steps_slider, seed_slider, randomize_checkbox,
338
- image_understand_input, prompt_understand_input,
339
- output_gallery, output_text
340
- ]
341
- )
342
-
343
  if __name__ == "__main__":
344
- demo.launch(share=True)
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Ovis-U1-3B 多模态 DEMO
5
+ 兼容 Hugging Face CPU Space(无 GPU 驱动)
6
+ 依赖:Python 3.10+、gradio 4.*, torch 2.*、transformers 4.41.*
7
+ """
8
+
9
  import os
10
  import subprocess
 
11
  import random
 
12
  import numpy as np
13
  import torch
14
  from PIL import Image
15
  import gradio as gr
16
+ import spaces
17
  from transformers import AutoModelForCausalLM
 
 
 
 
 
 
 
18
 
19
+ # -------------------------------------------------------------------------
20
+ # 可选:GPU 环境才能真正用到 flash-attn;CPU Space 可忽略安装异常
21
+ # -------------------------------------------------------------------------
22
+ try:
23
+ subprocess.run(
24
+ "pip install flash-attn==2.6.3 --no-build-isolation",
25
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
26
+ shell=True,
27
+ check=True,
28
  )
29
+ except subprocess.CalledProcessError:
30
+ print("[INFO] flash-attn 安装失败(CPU 环境可忽略)")
31
+
32
+ # -------------------------------------------------------------------------
33
+ # 常量与工具函数
34
+ # -------------------------------------------------------------------------
35
+ MAX_SEED = 10_000
36
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
+ DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 # CPU 默认用 fp32
38
+
39
+ def set_global_seed(seed: int = 42) -> None:
40
+ """统一设置随机种子(CPU / CUDA 自适应)"""
41
  random.seed(seed)
42
  np.random.seed(seed)
43
  torch.manual_seed(seed)
44
+ if torch.cuda.is_available():
45
+ torch.cuda.manual_seed_all(seed)
46
 
47
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
48
+ """UI 侧 seed 随机化"""
49
  return random.randint(0, MAX_SEED) if randomize else seed
50
 
51
+ # -------------------------------------------------------------------------
52
+ # 加载模型
53
+ # -------------------------------------------------------------------------
54
+ HF_TOKEN = os.getenv("HF_TOKEN") # 如果私有模型需 token
55
+ HUB_MODEL_ID = "AIDC-AI/Ovis-U1-3B"
56
+
57
+ print(f"[INFO] Loading {HUB_MODEL_ID} on {DEVICE} ...")
58
+ model = AutoModelForCausalLM.from_pretrained(
59
+ HUB_MODEL_ID,
60
+ torch_dtype=DTYPE,
61
+ low_cpu_mem_usage=True, # 显著降低 CPU 占用
62
+ device_map="auto", # cuda 自动放 GPU,CPU 环境全部放 CPU
63
+ token=HF_TOKEN,
64
+ trust_remote_code=True
65
+ ).eval() # 评估模式
66
+
67
+ print("[INFO] Model ready!")
68
+
69
+ # -------------------------------------------------------------------------
70
+ # 引入自定义管线函数 —— 保持与原代码一致
71
+ # -------------------------------------------------------------------------
72
+ from test_img_edit import pipe_img_edit
73
+ from test_img_to_txt import pipe_txt_gen
74
+ from test_txt_to_img import pipe_t2i
75
+
76
+ # -------------------------------------------------------------------------
77
+ # 推理封装(均运行在 DEVICE)
78
+ # -------------------------------------------------------------------------
79
+ def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
80
+ final_seed: int, guidance_scale: float,
81
+ progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
82
  set_global_seed(final_seed)
83
+ return pipe_t2i(model, prompt, height, width, steps,
84
+ cfg=guidance_scale, seed=final_seed)
85
 
86
+ def process_img_to_txt(prompt: str, img: Image.Image,
87
+ progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
88
+ return pipe_txt_gen(model, img, prompt)
 
89
 
90
+ def process_img_txt_to_img(prompt: str, img: Image.Image, steps: int,
91
+ final_seed: int, txt_cfg: float, img_cfg: float,
92
+ progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
93
  set_global_seed(final_seed)
94
+ return pipe_img_edit(model, img, prompt, steps,
95
+ txt_cfg, img_cfg, seed=final_seed)
96
 
97
+ # -------------------------------------------------------------------------
98
  # Gradio UI
99
+ # -------------------------------------------------------------------------
100
+ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
101
+ gr.Markdown("# Ovis-U1-3B\n✨ 多模态文本-图像 DEMO(CPU 版)")
102
 
103
  with gr.Row():
104
  with gr.Column():
105
  with gr.Tabs():
106
+ # ---------------------- Tab 1 图像 + 文本 → 图像 ----------------------
107
  with gr.TabItem("Image + Text → Image"):
108
  edit_image_input = gr.Image(label="Input Image", type="pil")
109
  with gr.Row():
 
117
  run_edit_image_btn = gr.Button("Run", scale=0)
118
 
119
  with gr.Accordion("Advanced Settings", open=False):
 
120
  with gr.Row():
 
121
  edit_img_guidance_slider = gr.Slider(
122
  label="Image Guidance Scale",
123
+ minimum=1.0, maximum=10.0, step=0.1, value=1.5
 
124
  )
 
125
  edit_txt_guidance_slider = gr.Slider(
126
  label="Text Guidance Scale",
127
+ minimum=1.0, maximum=30.0, step=0.5, value=6.0
 
128
  )
 
129
  edit_num_steps_slider = gr.Slider(
130
+ label="Steps", minimum=40, maximum=100, value=50, step=1
 
 
131
  )
132
  edit_seed_slider = gr.Slider(
133
+ label="Seed", minimum=0, maximum=MAX_SEED,
 
134
  step=1, value=42
135
  )
136
  edit_randomize_checkbox = gr.Checkbox(
137
  label="Randomize seed", value=False
138
  )
139
 
 
 
 
 
 
140
  gr.Examples(
141
+ examples=[
142
+ ["imgs/train.png", "Modify this image in a Ghibli style."],
143
+ ["imgs/chair.png", "Transfer the image into a faceted low-poly 3-D render style."],
144
+ ["imgs/car.png", "Replace the tiny house on wheels in the image with a vintage car."],
145
+ ],
146
  inputs=[edit_image_input, edit_prompt_input],
147
+ cache_examples=False,
148
  label="Image Editing Examples"
149
  )
150
 
151
+ # ---------------------- Tab 2 文本 → 图像 ----------------------
152
  with gr.TabItem("Text → Image"):
153
  with gr.Row():
154
  prompt_gen_input = gr.Textbox(
 
163
  with gr.Accordion("Advanced Settings", open=False):
164
  with gr.Row():
165
  height_slider = gr.Slider(
166
+ label="height", minimum=256, maximum=1536,
 
167
  value=1024, step=32
168
  )
169
  width_slider = gr.Slider(
170
+ label="width", minimum=256, maximum=1536,
 
171
  value=1024, step=32
172
  )
 
173
  guidance_slider = gr.Slider(
174
  label="Guidance Scale",
175
+ minimum=1.0, maximum=30.0, step=0.5, value=5.0
 
176
  )
 
177
  num_steps_slider = gr.Slider(
178
+ label="Steps", minimum=40, maximum=100, value=50, step=1
 
 
179
  )
180
  seed_slider = gr.Slider(
181
+ label="Seed", minimum=0, maximum=MAX_SEED,
 
182
  step=1, value=42
183
  )
184
  randomize_checkbox = gr.Checkbox(
185
  label="Randomize seed", value=False
186
  )
187
 
 
 
 
 
 
188
  gr.Examples(
189
+ examples=[
190
+ ["A breathtaking fairy with teal wings sits gracefully on a lotus flower in a serene pond, exuding elegance."],
191
+ ["A winter mountain landscape at deep night with snowy terrain and colorful flowers, portrayed as an anime background illustration."],
192
+ ["A photo of a pug wearing a cowboy hat and bandana, sitting on a hay bale."]
193
+ ],
194
  inputs=[prompt_gen_input],
195
+ cache_examples=False,
196
  label="Image Generation Examples"
197
  )
198
 
199
+ # ---------------------- Tab 3 图像 → 文本 ----------------------
200
  with gr.TabItem("Image → Text"):
201
  image_understand_input = gr.Image(label="Input Image", type="pil")
202
  with gr.Row():
203
  prompt_understand_input = gr.Textbox(
204
+ label="Prompt", show_label=False,
 
205
  placeholder="Describe the question about image...",
206
+ container=False, lines=1
 
207
  )
208
  run_image_understand_btn = gr.Button("Run", scale=0)
209
 
 
 
 
 
 
210
  gr.Examples(
211
+ examples=[
212
+ ["imgs/table.webp", "In what scenario does this picture take place?"],
213
+ ["imgs/count.png", "How many broccoli are there in the picture?"],
214
+ ["imgs/foot.webp", "Where is this picture located?"],
215
+ ],
216
  inputs=[image_understand_input, prompt_understand_input],
217
+ cache_examples=False,
218
  label="Image Understanding Examples"
219
  )
 
 
220
 
221
+ clean_btn = gr.Button("Clear All Inputs / Outputs")
 
 
222
 
223
+ # ---------------------- 输出区 ----------------------
224
+ with gr.Column():
225
+ output_gallery = gr.Gallery(label="Generated Images",
226
+ columns=2, visible=True)
227
+ output_text = gr.Textbox(label="Generated Text",
228
+ visible=False, lines=5,
229
+ interactive=False)
230
+
231
+ # ---------------------------------------------------------------------
232
+ # 事件绑定
233
+ # ---------------------------------------------------------------------
234
+ def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
235
+ progress=gr.Progress(track_tqdm=True)):
236
  if img is None:
237
  return (
238
  gr.update(value=[], visible=False),
239
  gr.update(value="Please upload an image for editing.", visible=True)
240
  )
241
+ imgs = process_img_txt_to_img(prompt, img, steps, seed,
242
+ txt_cfg, img_cfg, progress)
243
  return (
244
  gr.update(value=imgs, visible=True),
245
  gr.update(value="", visible=False)
246
  )
247
 
248
+ def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
249
+ progress=gr.Progress(track_tqdm=True)):
250
+ imgs = process_txt_to_img(prompt, height, width, steps, seed,
251
+ guidance, progress)
252
  return (
253
  gr.update(value=imgs, visible=True),
254
  gr.update(value="", visible=False)
255
  )
256
 
257
+ def run_img_to_txt_tab(img, prompt,
258
+ progress=gr.Progress(track_tqdm=True)):
259
  if img is None:
260
  return (
261
  gr.update(value=[], visible=False),
262
  gr.update(value="Please upload an image for understanding.", visible=True)
263
  )
264
+ txt = process_img_to_txt(prompt, img, progress)
265
  return (
266
  gr.update(value=[], visible=False),
267
  gr.update(value=txt, visible=True)
268
  )
269
 
270
  def clean_all_fn():
271
+ """重置全部输入 / 输出"""
272
  return (
273
+ # Tab 1
274
+ gr.update(value=None), gr.update(value=""),
275
+ gr.update(value=1.5), gr.update(value=6.0),
276
+ gr.update(value=50), gr.update(value=42),
 
 
 
277
  gr.update(value=False),
278
+ # Tab 2
279
+ gr.update(value=""), gr.update(value=1024),
280
+ gr.update(value=1024), gr.update(value=5.0),
281
+ gr.update(value=50), gr.update(value=42),
282
+ gr.update(value=False),
283
+ # Tab 3
284
+ gr.update(value=None), gr.update(value=""),
 
 
 
 
285
  # Outputs
286
+ gr.update(value=[], visible=True),
287
+ gr.update(value="", visible=False)
288
  )
289
 
290
+ # ---------- Tab 1 ----------
291
+ edit_inputs = [
292
+ edit_prompt_input, edit_image_input,
293
+ edit_num_steps_slider, edit_seed_slider,
294
+ edit_txt_guidance_slider, edit_img_guidance_slider
295
+ ]
296
+ run_edit_image_btn.click(randomize_seed_fn,
297
+ [edit_seed_slider, edit_randomize_checkbox],
298
+ [edit_seed_slider]).then(
299
+ run_img_txt_to_img_tab, edit_inputs,
300
+ [output_gallery, output_text]
301
  )
302
+ edit_prompt_input.submit(randomize_seed_fn,
303
+ [edit_seed_slider, edit_randomize_checkbox],
304
+ [edit_seed_slider]).then(
305
+ run_img_txt_to_img_tab, edit_inputs,
306
+ [output_gallery, output_text]
 
 
 
 
307
  )
308
 
309
+ # ---------- Tab 2 ----------
310
+ gen_inputs = [
311
+ prompt_gen_input, height_slider, width_slider,
312
+ num_steps_slider, seed_slider, guidance_slider
313
+ ]
314
+ run_image_gen_btn.click(randomize_seed_fn,
315
+ [seed_slider, randomize_checkbox],
316
+ [seed_slider]).then(
317
+ run_txt_to_img_tab, gen_inputs,
318
+ [output_gallery, output_text]
 
319
  )
320
+ prompt_gen_input.submit(randomize_seed_fn,
321
+ [seed_slider, randomize_checkbox],
322
+ [seed_slider]).then(
323
+ run_txt_to_img_tab, gen_inputs,
324
+ [output_gallery, output_text]
 
 
 
 
325
  )
326
 
327
+ # ---------- Tab 3 ----------
328
  understand_inputs = [image_understand_input, prompt_understand_input]
329
+ run_image_understand_btn.click(run_img_to_txt_tab,
330
+ understand_inputs,
331
+ [output_gallery, output_text])
332
+ prompt_understand_input.submit(run_img_to_txt_tab,
333
+ understand_inputs,
334
+ [output_gallery, output_text])
335
+
336
+ # ---------- 清空 ----------
337
+ clean_btn.click(clean_all_fn, [], [
338
+ edit_image_input, edit_prompt_input, edit_img_guidance_slider,
339
+ edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
340
+ edit_randomize_checkbox, prompt_gen_input, height_slider,
341
+ width_slider, guidance_slider, num_steps_slider, seed_slider,
342
+ randomize_checkbox, image_understand_input, prompt_understand_input,
343
+ output_gallery, output_text
344
+ ])
345
+
346
+ # -------------------------------------------------------------------------
347
+ # 启动
348
+ # -------------------------------------------------------------------------
 
 
 
 
 
349
  if __name__ == "__main__":
350
+ demo.launch(share=True) # HF Spaces 自动监听 7860 端口