Qwen-Image-2509-CharacterSheet

Running on Zero

App Files Files Community

tori29umai commited on Oct 17

Commit

6eb3715

verified ·

1 Parent(s): a03b5b8

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -152

app.py CHANGED Viewed

@@ -34,89 +34,68 @@ scheduler_config = {
     "use_karras_sigmas": False,
 }
-# Initialize scheduler with Lightning config
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-# Load the model pipeline
-pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509",
-                                                 scheduler=scheduler,
-                                                 torch_dtype=dtype).to(device)
 pipe.load_lora_weights(
-        "2vXpSwA7/iroiro-lora",
-        weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
-    )
 pipe.fuse_lora(lora_scale=0.8)
-# Apply the same optimizations from the first version
 pipe.transformer.__class__ = QwenImageTransformer2DModel
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-# --- Ahead-of-time compilation ---
 optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
-# --- UI Constants and Helpers ---
 MAX_SEED = np.iinfo(np.int32).max
-# 固定プロンプト定義
 PROMPTS = {
-    "front": "Move the camera to a front-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. Background is plain white.",
-    "back": "Move the camera to a back-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. Background is plain white.",
-    "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced. Background is plain white.",
-    "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced. Background is plain white."
 }
 def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
-    """単一の視点の画像を生成"""
-    negative_prompt = " "
     generator = torch.Generator(device=device).manual_seed(seed)
-    print(f"Generating with prompt: '{prompt}'")
-    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
-    # Generate the image
     result = pipe(
         image=input_images if input_images else None,
         prompt=prompt,
-        height=None,
-        width=None,
-        negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         generator=generator,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=1,
     ).images
     return result[0]
-# --- NEW: 横連結ユーティリティ ---
 def concat_images_horizontally(images, bg_color=(255, 255, 255)):
-    """
-    複数のPIL画像を横に連結して1枚のPIL画像を返す。
-    すべて同じ高さにスケールしてアスペクト比は維持。
-    """
     images = [img.convert("RGB") for img in images if img is not None]
     if not images:
         return None
-    # 連結の基準高さ（最大の高さ）に合わせて横幅を等比リサイズ
-    target_h = max(img.height for img in images)
     resized = []
     for img in images:
-        if img.height != target_h:
-            new_w = int(img.width * (target_h / img.height))
-            img = img.resize((new_w, target_h), Image.LANCZOS)
         resized.append(img)
-    total_w = sum(img.width for img in resized)
-    canvas = Image.new("RGB", (total_w, target_h), bg_color)
     x = 0
     for img in resized:
         canvas.paste(img, (x, 0))
         x += img.width
     return canvas
-# --- Main Inference Function ---
 @spaces.GPU(duration=300)
 def generate_turnaround(
     image,
@@ -126,139 +105,68 @@ def generate_turnaround(
     num_inference_steps=4,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """
-    入力画像から4つの視点（正面、背面、左側面、右側面）の立ち絵を生成
-    さらに「正面 → 右向き → 背面 → 左向き」を横並びで連結した画像も返す
-    """
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # 入力画像の確認
     if image is None:
         return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"
-    # PIL画像として処理
     if isinstance(image, Image.Image):
         input_image = image.convert("RGB")
     else:
-        try:
-            input_image = Image.open(image).convert("RGB")
-        except:
-            return None, None, None, None, None, seed, "エラー: 画像の読み込みに失敗しました"
     pil_images = [input_image]
-    # 1. 正面立ち絵を生成
-    progress(0.25, desc="正面立ち絵を生成中...")
-    front_image = generate_single_view(pil_images, PROMPTS["front"], seed, num_inference_steps, true_guidance_scale)
-    # 2. 正面立ち絵を入力として背面を生成
-    progress(0.50, desc="背面立ち絵を生成中...")
-    back_image = generate_single_view([front_image], PROMPTS["back"], seed+1, num_inference_steps, true_guidance_scale)
-    # 3. 正面立ち絵を入力として左側面を生成
-    progress(0.75, desc="左側面立ち絵を生成中...")
-    left_image = generate_single_view([front_image], PROMPTS["left"], seed+2, num_inference_steps, true_guidance_scale)
-    # 4. 正面立ち絵を入力として右側面を生成
-    progress(1.0, desc="右側面立ち絵を生成中...")
-    right_image = generate_single_view([front_image], PROMPTS["right"], seed+3, num_inference_steps, true_guidance_scale)
-    # --- NEW: 指定順で横連結（正面 → 右向き → 背面 → 左向き） ---
-    concatenated = concat_images_horizontally([front_image, right_image, back_image, left_image])
-    return front_image, back_image, left_image, right_image, concatenated, seed, "✅ 4視点の立ち絵生成と横連結画像の出力が完了しました"
-# --- UI Layout ---
 css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 1400px;
-}
-.view-label {
-    text-align: center;
-    font-weight: bold;
-    margin-top: 10px;
-}
-/* 画像のアスペクト比を保持 */
-.image-container img {
-    object-fit: contain !important;
-    max-width: 100%;
-    max-height: 100%;
-}
 """
 with gr.Blocks(css=css) as demo:
-    gr.Markdown("# キャラクター4視点立ち絵自動生成")
-    gr.Markdown("キャラクター画像をアップロードすると、正面・背面・左側面・右側面の4つの立ち絵を自動生成します")
     with gr.Column(elem_id="col-container"):
-        with gr.Row():
-            input_image = gr.Image(
-                label="入力画像（キャラクター画像をアップロード）",
-                show_label=True,
-                type="pil",
-                height=500,
-                sources=["upload", "clipboard"]
-            )
-        run_button = gr.Button("🎨 4視点立ち絵を生成", variant="primary", size="lg")
         status_text = gr.Textbox(label="ステータス", interactive=False)
         with gr.Row():
-            with gr.Column():
-                result_front = gr.Image(label="正面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
-            with gr.Column():
-                result_back = gr.Image(label="背面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
         with gr.Row():
-            with gr.Column():
-                result_left = gr.Image(label="左側面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
-            with gr.Column():
-                result_right = gr.Image(label="右側面", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
-        # --- NEW: 横連結画像の出力欄 ---
-        result_concat = gr.Image(label="連結（正面 → 右向き → 背面 → 左向き）", type="pil", height=500, show_download_button=True, container=True, image_mode="RGB")
         with gr.Accordion("⚙️ 詳細設定", open=False):
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
             randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
-            with gr.Row():
-                true_guidance_scale = gr.Slider(
-                    label="True guidance scale",
-                    minimum=1.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=1.0
-                )
-                num_inference_steps = gr.Slider(
-                    label="生成ステップ数",
-                    minimum=1,
-                    maximum=40,
-                    step=1,
-                    value=4,
-                )
     run_button.click(
         fn=generate_turnaround,
-        inputs=[
-            input_image,
-            seed,
-            randomize_seed,
-            true_guidance_scale,
-            num_inference_steps,
-        ],
-        # --- NEW: 5つ目の出力として連結画像を追加 ---
         outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
     )

     "use_karras_sigmas": False,
 }
+# Initialize scheduler
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+# Load model
+pipe = QwenImageEditPlusPipeline.from_pretrained(
+    "Qwen/Qwen-Image-Edit-2509",
+    scheduler=scheduler,
+    torch_dtype=dtype
+).to(device)
 pipe.load_lora_weights(
+    "2vXpSwA7/iroiro-lora",
+    weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
+)
 pipe.fuse_lora(lora_scale=0.8)
 pipe.transformer.__class__ = QwenImageTransformer2DModel
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
 optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
+# --- Constants ---
 MAX_SEED = np.iinfo(np.int32).max
 PROMPTS = {
+    "front": "Move the camera to a front-facing position so the full body of the character is visible. Background is plain white.",
+    "back": "Move the camera to a back-facing position so the full body of the character is visible. Background is plain white.",
+    "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. Background is plain white.",
+    "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. Background is plain white."
 }
 def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
+    """単一視点画像生成"""
     generator = torch.Generator(device=device).manual_seed(seed)
     result = pipe(
         image=input_images if input_images else None,
         prompt=prompt,
+        negative_prompt=" ",
         num_inference_steps=num_inference_steps,
         generator=generator,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=1,
     ).images
     return result[0]
 def concat_images_horizontally(images, bg_color=(255, 255, 255)):
+    """画像を横に連結"""
     images = [img.convert("RGB") for img in images if img is not None]
     if not images:
         return None
+    h = max(img.height for img in images)
     resized = []
     for img in images:
+        if img.height != h:
+            w = int(img.width * (h / img.height))
+            img = img.resize((w, h), Image.LANCZOS)
         resized.append(img)
+    w_total = sum(img.width for img in resized)
+    canvas = Image.new("RGB", (w_total, h), bg_color)
     x = 0
     for img in resized:
         canvas.paste(img, (x, 0))
         x += img.width
     return canvas
 @spaces.GPU(duration=300)
 def generate_turnaround(
     image,
     num_inference_steps=4,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """4視点＋横連結PNG生成"""
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if image is None:
         return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"
     if isinstance(image, Image.Image):
         input_image = image.convert("RGB")
     else:
+        input_image = Image.open(image).convert("RGB")
     pil_images = [input_image]
+    progress(0.25, desc="正面生成中...")
+    front = generate_single_view(pil_images, PROMPTS["front"], seed, num_inference_steps, true_guidance_scale)
+    progress(0.5, desc="背面生成中...")
+    back = generate_single_view([front], PROMPTS["back"], seed+1, num_inference_steps, true_guidance_scale)
+    progress(0.75, desc="左側面生成中...")
+    left = generate_single_view([front], PROMPTS["left"], seed+2, num_inference_steps, true_guidance_scale)
+    progress(1.0, desc="右側面生成中...")
+    right = generate_single_view([front], PROMPTS["right"], seed+3, num_inference_steps, true_guidance_scale)
+    concat = concat_images_horizontally([front, right, back, left])
+    return front, back, left, right, concat, seed, "✅ PNG形式で4視点＋連結画像を生成しました"
+# --- UI ---
 css = """
+#col-container {margin: 0 auto; max-width: 1400px;}
+.image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
 """
 with gr.Blocks(css=css) as demo:
+    gr.Markdown("# キャラクター4視点立ち絵自動生成（PNG出力対応）")
+    gr.Markdown("アップロードしたキャラクター画像から正面・背面・左右側面、さらに4枚連結のPNG画像を出力します。")
     with gr.Column(elem_id="col-container"):
+        input_image = gr.Image(label="入力画像", type="pil", height=500)
+        run_button = gr.Button("🎨 生成開始", variant="primary")
         status_text = gr.Textbox(label="ステータス", interactive=False)
         with gr.Row():
+            result_front = gr.Image(label="正面", type="pil", format="png", height=400, show_download_button=True)
+            result_back = gr.Image(label="背面", type="pil", format="png", height=400, show_download_button=True)
         with gr.Row():
+            result_left = gr.Image(label="左側面", type="pil", format="png", height=400, show_download_button=True)
+            result_right = gr.Image(label="右側面", type="pil", format="png", height=400, show_download_button=True)
+        # PNG連結出力
+        result_concat = gr.Image(label="連結画像（正面→右→背面→左）", type="pil", format="png", height=400, show_download_button=True)
         with gr.Accordion("⚙️ 詳細設定", open=False):
+            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
             randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
+            true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
+            num_inference_steps = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4)
     run_button.click(
         fn=generate_turnaround,
+        inputs=[input_image, seed, randomize_seed, true_guidance_scale, num_inference_steps],
         outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
     )