tori29umai commited on
Commit
6eb3715
·
verified ·
1 Parent(s): a03b5b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -152
app.py CHANGED
@@ -34,89 +34,68 @@ scheduler_config = {
34
  "use_karras_sigmas": False,
35
  }
36
 
37
- # Initialize scheduler with Lightning config
38
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
39
 
40
- # Load the model pipeline
41
- pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509",
42
- scheduler=scheduler,
43
- torch_dtype=dtype).to(device)
 
 
 
44
  pipe.load_lora_weights(
45
- "2vXpSwA7/iroiro-lora",
46
- weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
47
- )
48
  pipe.fuse_lora(lora_scale=0.8)
49
-
50
- # Apply the same optimizations from the first version
51
  pipe.transformer.__class__ = QwenImageTransformer2DModel
52
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
53
-
54
- # --- Ahead-of-time compilation ---
55
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
56
 
57
- # --- UI Constants and Helpers ---
58
  MAX_SEED = np.iinfo(np.int32).max
59
-
60
- # 固定プロンプト定義
61
  PROMPTS = {
62
- "front": "Move the camera to a front-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. Background is plain white.",
63
- "back": "Move the camera to a back-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. Background is plain white.",
64
- "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced. Background is plain white.",
65
- "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced. Background is plain white."
66
  }
67
 
68
  def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
69
- """単一の視点の画像を生成"""
70
- negative_prompt = " "
71
  generator = torch.Generator(device=device).manual_seed(seed)
72
-
73
- print(f"Generating with prompt: '{prompt}'")
74
- print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
75
-
76
- # Generate the image
77
  result = pipe(
78
  image=input_images if input_images else None,
79
  prompt=prompt,
80
- height=None,
81
- width=None,
82
- negative_prompt=negative_prompt,
83
  num_inference_steps=num_inference_steps,
84
  generator=generator,
85
  true_cfg_scale=true_guidance_scale,
86
  num_images_per_prompt=1,
87
  ).images
88
-
89
  return result[0]
90
 
91
- # --- NEW: 横連結ユーティリティ ---
92
  def concat_images_horizontally(images, bg_color=(255, 255, 255)):
93
- """
94
- 複数のPIL画像を横に連結して1枚のPIL画像を返す。
95
- すべて同じ高さにスケールしてアスペクト比は維持。
96
- """
97
  images = [img.convert("RGB") for img in images if img is not None]
98
  if not images:
99
  return None
100
-
101
- # 連結の基準高さ(最大の高さ)に合わせて横幅を等比リサイズ
102
- target_h = max(img.height for img in images)
103
  resized = []
104
  for img in images:
105
- if img.height != target_h:
106
- new_w = int(img.width * (target_h / img.height))
107
- img = img.resize((new_w, target_h), Image.LANCZOS)
108
  resized.append(img)
109
-
110
- total_w = sum(img.width for img in resized)
111
- canvas = Image.new("RGB", (total_w, target_h), bg_color)
112
-
113
  x = 0
114
  for img in resized:
115
  canvas.paste(img, (x, 0))
116
  x += img.width
117
  return canvas
118
 
119
- # --- Main Inference Function ---
120
  @spaces.GPU(duration=300)
121
  def generate_turnaround(
122
  image,
@@ -126,139 +105,68 @@ def generate_turnaround(
126
  num_inference_steps=4,
127
  progress=gr.Progress(track_tqdm=True),
128
  ):
129
- """
130
- 入力画像から4つの視点(正面、背面、左側面、右側面)の立ち絵を生成
131
- さらに「正面 → 右向き → 背面 → 左向き」を横並びで連結した画像も返す
132
- """
133
  if randomize_seed:
134
  seed = random.randint(0, MAX_SEED)
135
-
136
- # 入力画像の確認
137
  if image is None:
138
  return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"
139
-
140
- # PIL画像として処理
141
  if isinstance(image, Image.Image):
142
  input_image = image.convert("RGB")
143
  else:
144
- try:
145
- input_image = Image.open(image).convert("RGB")
146
- except:
147
- return None, None, None, None, None, seed, "エラー: 画像の読み込みに失敗しました"
148
-
149
  pil_images = [input_image]
150
-
151
- # 1. 正面立ち絵を生成
152
- progress(0.25, desc="正面立ち絵を生成中...")
153
- front_image = generate_single_view(pil_images, PROMPTS["front"], seed, num_inference_steps, true_guidance_scale)
154
-
155
- # 2. 正面立ち絵を入力として背面を生成
156
- progress(0.50, desc="背面立ち絵を生成中...")
157
- back_image = generate_single_view([front_image], PROMPTS["back"], seed+1, num_inference_steps, true_guidance_scale)
158
-
159
- # 3. 正面立ち絵を入力として左側面を生成
160
- progress(0.75, desc="左側面立ち絵を生成中...")
161
- left_image = generate_single_view([front_image], PROMPTS["left"], seed+2, num_inference_steps, true_guidance_scale)
162
-
163
- # 4. 正面立ち絵を入力として右側面を生成
164
- progress(1.0, desc="右側面立ち絵を生成中...")
165
- right_image = generate_single_view([front_image], PROMPTS["right"], seed+3, num_inference_steps, true_guidance_scale)
166
 
167
- # --- NEW: 指定順で横連結(正面 → 右向き → 背面 → 左向き) ---
168
- concatenated = concat_images_horizontally([front_image, right_image, back_image, left_image])
 
 
 
169
 
170
- return front_image, back_image, left_image, right_image, concatenated, seed, "✅ 4視点の立ち絵生成と横連結画像の出力が完了しました"
 
171
 
172
- # --- UI Layout ---
 
 
 
 
 
 
173
  css = """
174
- #col-container {
175
- margin: 0 auto;
176
- max-width: 1400px;
177
- }
178
- .view-label {
179
- text-align: center;
180
- font-weight: bold;
181
- margin-top: 10px;
182
- }
183
- /* 画像のアスペクト比を保持 */
184
- .image-container img {
185
- object-fit: contain !important;
186
- max-width: 100%;
187
- max-height: 100%;
188
- }
189
  """
190
 
191
  with gr.Blocks(css=css) as demo:
192
- gr.Markdown("# キャラクター4視点立ち絵自動生成")
193
- gr.Markdown("キャラクター画像をアップロードすると、正面・背面・左側面・右側面の4つの立ち絵を自動生成します")
194
 
195
  with gr.Column(elem_id="col-container"):
196
- with gr.Row():
197
- input_image = gr.Image(
198
- label="入力画像(キャラクター画像をアップロード)",
199
- show_label=True,
200
- type="pil",
201
- height=500,
202
- sources=["upload", "clipboard"]
203
- )
204
-
205
- run_button = gr.Button("🎨 4視点立ち絵を生成", variant="primary", size="lg")
206
-
207
  status_text = gr.Textbox(label="ステータス", interactive=False)
208
-
209
  with gr.Row():
210
- with gr.Column():
211
- result_front = gr.Image(label="正面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
212
- with gr.Column():
213
- result_back = gr.Image(label="背面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
214
-
215
  with gr.Row():
216
- with gr.Column():
217
- result_left = gr.Image(label="左側面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
218
- with gr.Column():
219
- result_right = gr.Image(label="右側面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
220
 
221
- # --- NEW: 横連結画像の出力欄 ---
222
- result_concat = gr.Image(label="連結(正面 → 右向き → 背面 → 左向き)", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
223
 
224
  with gr.Accordion("⚙️ 詳細設定", open=False):
225
- seed = gr.Slider(
226
- label="Seed",
227
- minimum=0,
228
- maximum=MAX_SEED,
229
- step=1,
230
- value=0,
231
- )
232
-
233
  randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
234
-
235
- with gr.Row():
236
- true_guidance_scale = gr.Slider(
237
- label="True guidance scale",
238
- minimum=1.0,
239
- maximum=10.0,
240
- step=0.1,
241
- value=1.0
242
- )
243
-
244
- num_inference_steps = gr.Slider(
245
- label="生成ステップ数",
246
- minimum=1,
247
- maximum=40,
248
- step=1,
249
- value=4,
250
- )
251
 
252
  run_button.click(
253
  fn=generate_turnaround,
254
- inputs=[
255
- input_image,
256
- seed,
257
- randomize_seed,
258
- true_guidance_scale,
259
- num_inference_steps,
260
- ],
261
- # --- NEW: 5つ目の出力として連結画像を追加 ---
262
  outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
263
  )
264
 
 
34
  "use_karras_sigmas": False,
35
  }
36
 
37
+ # Initialize scheduler
38
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
39
 
40
+ # Load model
41
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
42
+ "Qwen/Qwen-Image-Edit-2509",
43
+ scheduler=scheduler,
44
+ torch_dtype=dtype
45
+ ).to(device)
46
+
47
  pipe.load_lora_weights(
48
+ "2vXpSwA7/iroiro-lora",
49
+ weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
50
+ )
51
  pipe.fuse_lora(lora_scale=0.8)
 
 
52
  pipe.transformer.__class__ = QwenImageTransformer2DModel
53
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
 
 
54
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
55
 
56
+ # --- Constants ---
57
  MAX_SEED = np.iinfo(np.int32).max
 
 
58
  PROMPTS = {
59
+ "front": "Move the camera to a front-facing position so the full body of the character is visible. Background is plain white.",
60
+ "back": "Move the camera to a back-facing position so the full body of the character is visible. Background is plain white.",
61
+ "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. Background is plain white.",
62
+ "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. Background is plain white."
63
  }
64
 
65
  def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
66
+ """単一視点画像生成"""
 
67
  generator = torch.Generator(device=device).manual_seed(seed)
 
 
 
 
 
68
  result = pipe(
69
  image=input_images if input_images else None,
70
  prompt=prompt,
71
+ negative_prompt=" ",
 
 
72
  num_inference_steps=num_inference_steps,
73
  generator=generator,
74
  true_cfg_scale=true_guidance_scale,
75
  num_images_per_prompt=1,
76
  ).images
 
77
  return result[0]
78
 
 
79
  def concat_images_horizontally(images, bg_color=(255, 255, 255)):
80
+ """画像を横に連結"""
 
 
 
81
  images = [img.convert("RGB") for img in images if img is not None]
82
  if not images:
83
  return None
84
+ h = max(img.height for img in images)
 
 
85
  resized = []
86
  for img in images:
87
+ if img.height != h:
88
+ w = int(img.width * (h / img.height))
89
+ img = img.resize((w, h), Image.LANCZOS)
90
  resized.append(img)
91
+ w_total = sum(img.width for img in resized)
92
+ canvas = Image.new("RGB", (w_total, h), bg_color)
 
 
93
  x = 0
94
  for img in resized:
95
  canvas.paste(img, (x, 0))
96
  x += img.width
97
  return canvas
98
 
 
99
  @spaces.GPU(duration=300)
100
  def generate_turnaround(
101
  image,
 
105
  num_inference_steps=4,
106
  progress=gr.Progress(track_tqdm=True),
107
  ):
108
+ """4視点+横連結PNG生成"""
 
 
 
109
  if randomize_seed:
110
  seed = random.randint(0, MAX_SEED)
 
 
111
  if image is None:
112
  return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"
113
+
 
114
  if isinstance(image, Image.Image):
115
  input_image = image.convert("RGB")
116
  else:
117
+ input_image = Image.open(image).convert("RGB")
118
+
 
 
 
119
  pil_images = [input_image]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ progress(0.25, desc="正面生成中...")
122
+ front = generate_single_view(pil_images, PROMPTS["front"], seed, num_inference_steps, true_guidance_scale)
123
+
124
+ progress(0.5, desc="背面生成中...")
125
+ back = generate_single_view([front], PROMPTS["back"], seed+1, num_inference_steps, true_guidance_scale)
126
 
127
+ progress(0.75, desc="左側面生成中...")
128
+ left = generate_single_view([front], PROMPTS["left"], seed+2, num_inference_steps, true_guidance_scale)
129
 
130
+ progress(1.0, desc="右側面生成中...")
131
+ right = generate_single_view([front], PROMPTS["right"], seed+3, num_inference_steps, true_guidance_scale)
132
+
133
+ concat = concat_images_horizontally([front, right, back, left])
134
+ return front, back, left, right, concat, seed, "✅ PNG形式で4視点+連結画像を生成しました"
135
+
136
+ # --- UI ---
137
  css = """
138
+ #col-container {margin: 0 auto; max-width: 1400px;}
139
+ .image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  """
141
 
142
  with gr.Blocks(css=css) as demo:
143
+ gr.Markdown("# キャラクター4視点立ち絵自動生成(PNG出力対応)")
144
+ gr.Markdown("アップロードしたキャラクター画像から正面・背面・左右側面、さらに4枚連結のPNG画像を出力します。")
145
 
146
  with gr.Column(elem_id="col-container"):
147
+ input_image = gr.Image(label="入力画像", type="pil", height=500)
148
+ run_button = gr.Button("🎨 生成開始", variant="primary")
 
 
 
 
 
 
 
 
 
149
  status_text = gr.Textbox(label="ステータス", interactive=False)
150
+
151
  with gr.Row():
152
+ result_front = gr.Image(label="正面", type="pil", format="png", height=400, show_download_button=True)
153
+ result_back = gr.Image(label="背面", type="pil", format="png", height=400, show_download_button=True)
 
 
 
154
  with gr.Row():
155
+ result_left = gr.Image(label="左側面", type="pil", format="png", height=400, show_download_button=True)
156
+ result_right = gr.Image(label="右側面", type="pil", format="png", height=400, show_download_button=True)
 
 
157
 
158
+ # PNG連結出力
159
+ result_concat = gr.Image(label="連結画像(正面→右→背面→左)", type="pil", format="png", height=400, show_download_button=True)
160
 
161
  with gr.Accordion("⚙️ 詳細設定", open=False):
162
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
 
 
 
 
 
 
 
163
  randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
164
+ true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
165
+ num_inference_steps = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  run_button.click(
168
  fn=generate_turnaround,
169
+ inputs=[input_image, seed, randomize_seed, true_guidance_scale, num_inference_steps],
 
 
 
 
 
 
 
170
  outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
171
  )
172