Ovis-U1-3B-cpu

Runtime error

App Files Files Community

innoai commited on Jun 30

Commit

fa6b263

verified ·

1 Parent(s): d53478b

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -106

app.py CHANGED Viewed

@@ -1,40 +1,84 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Ovis-U1-3B  多模态 DEMO
-兼容 Hugging Face CPU Space（无 GPU 驱动）
-依赖：Python 3.10+、gradio 4.*, torch 2.*、transformers 4.41.*
 """
 import os
 import subprocess
 import random
 import numpy as np
 import torch
 from PIL import Image
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM
-# -------------------------------------------------------------------------
-# 可选：GPU 环境才能真正用到 flash-attn；CPU Space 可忽略安装异常
-# -------------------------------------------------------------------------
-try:
-    subprocess.run(
-        "pip install flash-attn==2.6.3 --no-build-isolation",
-        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-        shell=True,
-        check=True,
-    )
-except subprocess.CalledProcessError:
-    print("[INFO] flash-attn 安装失败（CPU 环境可忽略）")
-# -------------------------------------------------------------------------
-# 常量与工具函数
-# -------------------------------------------------------------------------
 MAX_SEED = 10_000
-DEVICE   = "cuda" if torch.cuda.is_available() else "cpu"
-DTYPE    = torch.bfloat16 if DEVICE == "cuda" else torch.float32   # CPU 默认用 fp32
 def set_global_seed(seed: int = 42) -> None:
     """统一设置随机种子（CPU / CUDA 自适应）"""
@@ -45,37 +89,29 @@ def set_global_seed(seed: int = 42) -> None:
         torch.cuda.manual_seed_all(seed)
 def randomize_seed_fn(seed: int, randomize: bool) -> int:
-    """UI 侧 seed 随机化"""
     return random.randint(0, MAX_SEED) if randomize else seed
-# -------------------------------------------------------------------------
-# 加载模型
-# -------------------------------------------------------------------------
-HF_TOKEN      = os.getenv("HF_TOKEN")          # 如果私有模型需 token
-HUB_MODEL_ID  = "AIDC-AI/Ovis-U1-3B"
-print(f"[INFO] Loading {HUB_MODEL_ID} on {DEVICE} ...")
 model = AutoModelForCausalLM.from_pretrained(
-    HUB_MODEL_ID,
     torch_dtype=DTYPE,
-    low_cpu_mem_usage=True,    # 显著降低 CPU 占用
-    device_map="auto",         # cuda 自动放 GPU，CPU 环境全部放 CPU
     token=HF_TOKEN,
     trust_remote_code=True
-).eval()                       # 评估模式
 print("[INFO] Model ready!")
-# -------------------------------------------------------------------------
-# 引入自定义管线函数 —— 保持与原代码一致
-# -------------------------------------------------------------------------
-from test_img_edit import pipe_img_edit
-from test_img_to_txt import pipe_txt_gen
-from test_txt_to_img import pipe_t2i
-# -------------------------------------------------------------------------
-# 推理封装（均运行在 DEVICE）
-# -------------------------------------------------------------------------
 def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
                        final_seed: int, guidance_scale: float,
                        progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
@@ -94,23 +130,24 @@ def process_img_txt_to_img(prompt: str, img: Image.Image, steps: int,
     return pipe_img_edit(model, img, prompt, steps,
                          txt_cfg, img_cfg, seed=final_seed)
-# -------------------------------------------------------------------------
-# Gradio UI
-# -------------------------------------------------------------------------
-with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
-    gr.Markdown("# Ovis-U1-3B\n✨ 多模态文本-图像 DEMO（CPU 版）")
     with gr.Row():
         with gr.Column():
             with gr.Tabs():
-                # ---------------------- Tab 1  图像 + 文本 → 图像 ----------------------
                 with gr.TabItem("Image + Text → Image"):
                     edit_image_input = gr.Image(label="Input Image", type="pil")
                     with gr.Row():
                         edit_prompt_input = gr.Textbox(
                             label="Prompt",
                             show_label=False,
-                            placeholder="Describe the editing instruction...",
                             container=False,
                             lines=1
                         )
@@ -130,8 +167,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
                             label="Steps", minimum=40, maximum=100, value=50, step=1
                         )
                         edit_seed_slider = gr.Slider(
-                            label="Seed", minimum=0, maximum=MAX_SEED,
-                            step=1, value=42
                         )
                         edit_randomize_checkbox = gr.Checkbox(
                             label="Randomize seed", value=False
@@ -148,15 +184,13 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
                         label="Image Editing Examples"
                     )
-                # ---------------------- Tab 2  文本 → 图像 ----------------------
                 with gr.TabItem("Text → Image"):
                     with gr.Row():
                         prompt_gen_input = gr.Textbox(
-                            label="Prompt",
-                            show_label=False,
-                            placeholder="Describe the image you want...",
-                            container=False,
-                            lines=1
                         )
                         run_image_gen_btn = gr.Button("Run", scale=0)
@@ -171,15 +205,14 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
                                 value=1024, step=32
                             )
                         guidance_slider = gr.Slider(
-                            label="Guidance Scale",
-                            minimum=1.0, maximum=30.0, step=0.5, value=5.0
                         )
                         num_steps_slider = gr.Slider(
                             label="Steps", minimum=40, maximum=100, value=50, step=1
                         )
                         seed_slider = gr.Slider(
-                            label="Seed", minimum=0, maximum=MAX_SEED,
-                            step=1, value=42
                         )
                         randomize_checkbox = gr.Checkbox(
                             label="Randomize seed", value=False
@@ -196,13 +229,13 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
                         label="Image Generation Examples"
                     )
-                # ---------------------- Tab 3  图像 → 文本 ----------------------
                 with gr.TabItem("Image → Text"):
                     image_understand_input = gr.Image(label="Input Image", type="pil")
                     with gr.Row():
                         prompt_understand_input = gr.Textbox(
                             label="Prompt", show_label=False,
-                            placeholder="Describe the question about image...",
                             container=False, lines=1
                         )
                         run_image_understand_btn = gr.Button("Run", scale=0)
@@ -220,55 +253,38 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
             clean_btn = gr.Button("Clear All Inputs / Outputs")
-        # ---------------------- 输出区 ----------------------
         with gr.Column():
-            output_gallery = gr.Gallery(label="Generated Images",
-                                        columns=2, visible=True)
-            output_text = gr.Textbox(label="Generated Text",
-                                     visible=False, lines=5,
-                                     interactive=False)
-    # ---------------------------------------------------------------------
-    # 事件绑定
-    # ---------------------------------------------------------------------
     def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
                                progress=gr.Progress(track_tqdm=True)):
         if img is None:
-            return (
-                gr.update(value=[], visible=False),
-                gr.update(value="Please upload an image for editing.", visible=True)
-            )
-        imgs = process_img_txt_to_img(prompt, img, steps, seed,
-                                      txt_cfg, img_cfg, progress)
-        return (
-            gr.update(value=imgs, visible=True),
-            gr.update(value="", visible=False)
-        )
     def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
                            progress=gr.Progress(track_tqdm=True)):
-        imgs = process_txt_to_img(prompt, height, width, steps, seed,
-                                  guidance, progress)
-        return (
-            gr.update(value=imgs, visible=True),
-            gr.update(value="", visible=False)
-        )
     def run_img_to_txt_tab(img, prompt,
                            progress=gr.Progress(track_tqdm=True)):
         if img is None:
-            return (
-                gr.update(value=[], visible=False),
-                gr.update(value="Please upload an image for understanding.", visible=True)
-            )
         txt = process_img_to_txt(prompt, img, progress)
-        return (
-            gr.update(value=[], visible=False),
-            gr.update(value=txt, visible=True)
-        )
     def clean_all_fn():
-        """重置全部输入 / 输出"""
         return (
             # Tab 1
             gr.update(value=None), gr.update(value=""),
@@ -287,7 +303,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
             gr.update(value="",  visible=False)
         )
-    # ---------- Tab 1 ----------
     edit_inputs = [
         edit_prompt_input, edit_image_input,
         edit_num_steps_slider, edit_seed_slider,
@@ -306,7 +322,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
         [output_gallery, output_text]
     )
-    # ---------- Tab 2 ----------
     gen_inputs = [
         prompt_gen_input, height_slider, width_slider,
         num_steps_slider, seed_slider, guidance_slider
@@ -324,7 +340,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
         [output_gallery, output_text]
     )
-    # ---------- Tab 3 ----------
     understand_inputs = [image_understand_input, prompt_understand_input]
     run_image_understand_btn.click(run_img_to_txt_tab,
                                    understand_inputs,
@@ -333,7 +349,7 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
                                    understand_inputs,
                                    [output_gallery, output_text])
-    # ---------- 清空 ----------
     clean_btn.click(clean_all_fn, [], [
         edit_image_input, edit_prompt_input, edit_img_guidance_slider,
         edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
@@ -343,8 +359,9 @@ with gr.Blocks(title="Ovis-U1-3B (CPU)") as demo:
         output_gallery, output_text
     ])
-# -------------------------------------------------------------------------
-# 启动
-# -------------------------------------------------------------------------
 if __name__ == "__main__":
-    demo.launch(share=True)     # HF Spaces 自动监听 7860 端口

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+Ovis-U1-3B 多模态 DEMO（CPU / GPU 自适应版本）
+依赖：Python 3.10+、torch 2.*、transformers 4.41.*、gradio 4.*
 """
+# ───────────────────────────────────────────────────────────────
+# ① 在任何 transformers / flash_attn 导入之前完成环境处理
+# ───────────────────────────────────────────────────────────────
 import os
+import sys
+import types
 import subprocess
 import random
 import numpy as np
 import torch
+# 判断是否有 CUDA
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE  = torch.bfloat16 if DEVICE == "cuda" else torch.float32   # CPU → float32
+# ---------- CPU 环境：屏蔽 flash-attn ----------
+if DEVICE == "cpu":
+    # 尝试卸载已存在的 flash-attn（若基础镜像预装）
+    subprocess.run("pip uninstall -y flash-attn", shell=True,
+                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    # 构造“空壳” flash_attn 模块，提供最简位置旋转实现
+    fake_flash_attn   = types.ModuleType("flash_attn")
+    fake_layers       = types.ModuleType("flash_attn.layers")
+    fake_rotary       = types.ModuleType("flash_attn.layers.rotary")
+    def _cpu_apply_rotary_emb(x, cos, sin):
+        """纯 CPU 的旋转位置编码（慢，但可用）"""
+        x1, x2 = x[..., ::2], x[..., 1::2]
+        rot_x1 = x1 * cos - x2 * sin
+        rot_x2 = x1 * sin + x2 * cos
+        out = torch.empty_like(x)
+        out[..., ::2] = rot_x1
+        out[..., 1::2] = rot_x2
+        return out
+    fake_rotary.apply_rotary_emb = _cpu_apply_rotary_emb
+    fake_layers.rotary           = fake_rotary
+    fake_flash_attn.layers       = fake_layers
+    sys.modules.update({
+        "flash_attn":               fake_flash_attn,
+        "flash_attn.layers":        fake_layers,
+        "flash_attn.layers.rotary": fake_rotary,
+    })
+else:
+    # GPU 环境：如有需要安装 flash-attn（安装失败不致命）
+    try:
+        subprocess.run(
+            "pip install flash-attn==2.6.3 --no-build-isolation",
+            env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+            shell=True,
+            check=True,
+        )
+    except subprocess.CalledProcessError:
+        print("[WARN] flash-attn 安装失败，推理性能可能受影响。")
+# ───────────────────────────────────────────────────────────────
+# ② 常规依赖
+# ───────────────────────────────────────────────────────────────
 from PIL import Image
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM
+# 用户自定义管线脚本（保持原有路径）
+from test_img_edit   import pipe_img_edit
+from test_img_to_txt import pipe_txt_gen
+from test_txt_to_img import pipe_t2i
+# ───────────────────────────────────────────────────────────────
+# ③ 工具函数 & 常量
+# ───────────────────────────────────────────────────────────────
 MAX_SEED = 10_000
 def set_global_seed(seed: int = 42) -> None:
     """统一设置随机种子（CPU / CUDA 自适应）"""
         torch.cuda.manual_seed_all(seed)
 def randomize_seed_fn(seed: int, randomize: bool) -> int:
+    """根据复选框决定是否随机种子"""
     return random.randint(0, MAX_SEED) if randomize else seed
+# ───────────────────────────────────────────────────────────────
+# ④ 加载模型
+# ───────────────────────────────────────────────────────────────
+HF_TOKEN  = os.getenv("HF_TOKEN")            # 私有仓库请在 Space Secret 设置
+MODEL_ID  = "AIDC-AI/Ovis-U1-3B"
+print(f"[INFO] Loading {MODEL_ID} on {DEVICE} …")
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
     torch_dtype=DTYPE,
+    low_cpu_mem_usage=True,   # 减低 RSS
+    device_map="auto",        # CPU 环境全部放 CPU
     token=HF_TOKEN,
     trust_remote_code=True
+).eval()
 print("[INFO] Model ready!")
+# ───────────────────────────────────────────────────────────────
+# ⑤ 推理封装
+# ───────────────────────────────────────────────────────────────
 def process_txt_to_img(prompt: str, height: int, width: int, steps: int,
                        final_seed: int, guidance_scale: float,
                        progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
     return pipe_img_edit(model, img, prompt, steps,
                          txt_cfg, img_cfg, seed=final_seed)
+# ───────────────────────────────────────────────────────────────
+# ⑥ Gradio UI
+# ───────────────────────────────────────────────────────────────
+with gr.Blocks(title="Ovis-U1-3B (CPU/GPU adaptive)") as demo:
+    gr.Markdown("# Ovis-U1-3B\n多模态文本-图像 DEMO（CPU/GPU 自适应版）")
     with gr.Row():
+        # -------- 左侧：输入区 --------
         with gr.Column():
             with gr.Tabs():
+                # ── Tab 1: Image + Text → Image ──
                 with gr.TabItem("Image + Text → Image"):
                     edit_image_input = gr.Image(label="Input Image", type="pil")
                     with gr.Row():
                         edit_prompt_input = gr.Textbox(
                             label="Prompt",
                             show_label=False,
+                            placeholder="Describe the editing instruction…",
                             container=False,
                             lines=1
                         )
                             label="Steps", minimum=40, maximum=100, value=50, step=1
                         )
                         edit_seed_slider = gr.Slider(
+                            label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
                         )
                         edit_randomize_checkbox = gr.Checkbox(
                             label="Randomize seed", value=False
                         label="Image Editing Examples"
                     )
+                # ── Tab 2: Text → Image ──
                 with gr.TabItem("Text → Image"):
                     with gr.Row():
                         prompt_gen_input = gr.Textbox(
+                            label="Prompt", show_label=False,
+                            placeholder="Describe the image you want…",
+                            container=False, lines=1
                         )
                         run_image_gen_btn = gr.Button("Run", scale=0)
                                 value=1024, step=32
                             )
                         guidance_slider = gr.Slider(
+                            label="Guidance Scale", minimum=1.0,
+                            maximum=30.0, step=0.5, value=5.0
                         )
                         num_steps_slider = gr.Slider(
                             label="Steps", minimum=40, maximum=100, value=50, step=1
                         )
                         seed_slider = gr.Slider(
+                            label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
                         )
                         randomize_checkbox = gr.Checkbox(
                             label="Randomize seed", value=False
                         label="Image Generation Examples"
                     )
+                # ── Tab 3: Image → Text ──
                 with gr.TabItem("Image → Text"):
                     image_understand_input = gr.Image(label="Input Image", type="pil")
                     with gr.Row():
                         prompt_understand_input = gr.Textbox(
                             label="Prompt", show_label=False,
+                            placeholder="Describe the question about image…",
                             container=False, lines=1
                         )
                         run_image_understand_btn = gr.Button("Run", scale=0)
             clean_btn = gr.Button("Clear All Inputs / Outputs")
+        # -------- 右侧：输出区 --------
         with gr.Column():
+            output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True)
+            output_text    = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
+    # ───────────────────────── 事件绑定 ─────────────────────────
     def run_img_txt_to_img_tab(prompt, img, steps, seed, txt_cfg, img_cfg,
                                progress=gr.Progress(track_tqdm=True)):
         if img is None:
+            return (gr.update(value=[], visible=False),
+                    gr.update(value="Please upload an image for editing.", visible=True))
+        imgs = process_img_txt_to_img(prompt, img, steps, seed, txt_cfg, img_cfg, progress)
+        return (gr.update(value=imgs, visible=True),
+                gr.update(value="", visible=False))
     def run_txt_to_img_tab(prompt, height, width, steps, seed, guidance,
                            progress=gr.Progress(track_tqdm=True)):
+        imgs = process_txt_to_img(prompt, height, width, steps, seed, guidance, progress)
+        return (gr.update(value=imgs, visible=True),
+                gr.update(value="", visible=False))
     def run_img_to_txt_tab(img, prompt,
                            progress=gr.Progress(track_tqdm=True)):
         if img is None:
+            return (gr.update(value=[], visible=False),
+                    gr.update(value="Please upload an image for understanding.", visible=True))
         txt = process_img_to_txt(prompt, img, progress)
+        return (gr.update(value=[], visible=False),
+                gr.update(value=txt, visible=True))
     def clean_all_fn():
+        """重置全部输入/输出控件"""
         return (
             # Tab 1
             gr.update(value=None), gr.update(value=""),
             gr.update(value="",  visible=False)
         )
+    # ------ Tab 1 绑定 ------
     edit_inputs = [
         edit_prompt_input, edit_image_input,
         edit_num_steps_slider, edit_seed_slider,
         [output_gallery, output_text]
     )
+    # ------ Tab 2 绑定 ------
     gen_inputs = [
         prompt_gen_input, height_slider, width_slider,
         num_steps_slider, seed_slider, guidance_slider
         [output_gallery, output_text]
     )
+    # ------ Tab 3 绑定 ------
     understand_inputs = [image_understand_input, prompt_understand_input]
     run_image_understand_btn.click(run_img_to_txt_tab,
                                    understand_inputs,
                                    understand_inputs,
                                    [output_gallery, output_text])
+    # 清空
     clean_btn.click(clean_all_fn, [], [
         edit_image_input, edit_prompt_input, edit_img_guidance_slider,
         edit_txt_guidance_slider, edit_num_steps_slider, edit_seed_slider,
         output_gallery, output_text
     ])
+# ───────────────────────────────────────────────────────────────
+# ⑦ 启动 Space
+# ───────────────────────────────────────────────────────────────
 if __name__ == "__main__":
+    # HF Spaces 默认监听 0.0.0.0:7860，无需 share=True
+    demo.launch()