i3d

Paused

App Files Files Community

rgndgn commited on Feb 14

Commit

efccc85

verified ·

1 Parent(s): 96e3b91

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +247 -247

gradio_app.py CHANGED Viewed

@@ -1,248 +1,248 @@
-import spaces
-import os
-import tempfile
-from typing import Any
-import torch
-import numpy as np
-from PIL import Image
-import gradio as gr
-import trimesh
-from transparent_background import Remover
-import subprocess
-def install_cuda_toolkit():
-    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
-    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
-    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
-    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
-    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
-    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
-    os.environ["CUDA_HOME"] = "/usr/local/cuda"
-    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
-    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
-        os.environ["CUDA_HOME"],
-        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
-    )
-    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
-    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
-install_cuda_toolkit()
-# Import and setup SPAR3D
-os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
-import spar3d.utils as spar3d_utils
-from spar3d.system import SPAR3D
-# Constants
-COND_WIDTH = 512
-COND_HEIGHT = 512
-COND_DISTANCE = 2.2
-COND_FOVY = 0.591627
-BACKGROUND_COLOR = [0.5, 0.5, 0.5]
-# Initialize models
-device = spar3d_utils.get_device()
-bg_remover = Remover()
-spar3d_model = SPAR3D.from_pretrained(
-    "stabilityai/stable-point-aware-3d",
-    config_name="config.yaml",
-    weight_name="model.safetensors"
-).eval().to(device)
-# Initialize camera parameters
-c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
-intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
-    COND_FOVY, COND_HEIGHT, COND_WIDTH
-)
-def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.Image:
-    """Create an RGBA image from RGB image and optional mask."""
-    rgba_image = rgb_image.convert('RGBA')
-    if mask is not None:
-        # Ensure mask is 2D before converting to alpha
-        if len(mask.shape) > 2:
-            mask = mask.squeeze()
-        alpha = Image.fromarray((mask * 255).astype(np.uint8))
-        rgba_image.putalpha(alpha)
-    return rgba_image
-def create_batch(input_image: Image.Image) -> dict[str, Any]:
-    """Prepare image batch for model input."""
-    # Resize and convert input image to numpy array
-    resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
-    img_array = np.array(resized_image).astype(np.float32) / 255.0
-    # Extract RGB and alpha channels
-    if img_array.shape[-1] == 4:  # RGBA
-        rgb = img_array[..., :3]
-        mask = img_array[..., 3:4]
-    else:  # RGB
-        rgb = img_array
-        mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
-    # Convert to tensors while keeping channel-last format
-    rgb = torch.from_numpy(rgb).float()  # [H, W, 3]
-    mask = torch.from_numpy(mask).float()  # [H, W, 1]
-    # Create background blend (match channel-last format)
-    bg_tensor = torch.tensor(BACKGROUND_COLOR).view(1, 1, 3)  # [1, 1, 3]
-    # Blend RGB with background using mask (all in channel-last format)
-    rgb_cond = torch.lerp(bg_tensor, rgb, mask)  # [H, W, 3]
-    # Move channels to correct dimension and add batch dimension
-    # Important: For SPAR3D image tokenizer, we need [B, H, W, C] format
-    rgb_cond = rgb_cond.unsqueeze(0)  # [1, H, W, 3]
-    mask = mask.unsqueeze(0)  # [1, H, W, 1]
-    # Create the batch dictionary
-    batch = {
-        "rgb_cond": rgb_cond,  # [1, H, W, 3]
-        "mask_cond": mask,  # [1, H, W, 1]
-        "c2w_cond": c2w_cond.unsqueeze(0),  # [1, 4, 4]
-        "intrinsic_cond": intrinsic.unsqueeze(0),  # [1, 3, 3]
-        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),  # [1, 3, 3]
-    }
-    for k, v in batch.items():
-        print(f"[debug] {k} final shape:", v.shape)
-    return batch
-def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
-    """Process batch through model and generate point cloud."""
-    batch_size = batch["rgb_cond"].shape[0]
-    assert batch_size == 1, f"Expected batch size 1, got {batch_size}"
-    # Generate point cloud tokens
-    try:
-        cond_tokens = system.forward_pdiff_cond(batch)
-    except Exception as e:
-        print("\n[ERROR] Failed in forward_pdiff_cond:")
-        print(e)
-        print("\nInput tensor properties:")
-        print("rgb_cond dtype:", batch["rgb_cond"].dtype)
-        print("rgb_cond device:", batch["rgb_cond"].device)
-        print("rgb_cond requires_grad:", batch["rgb_cond"].requires_grad)
-        raise
-    # Sample points
-    sample_iter = system.sampler.sample_batch_progressive(
-        batch_size,
-        cond_tokens,
-        guidance_scale=guidance_scale,
-        device=device
-    )
-    # Get final samples
-    for x in sample_iter:
-        samples = x["xstart"]
-    pc_cond = samples.permute(0, 2, 1).float()
-    # Normalize point cloud
-    pc_cond = spar3d_utils.normalize_pc_bbox(pc_cond)
-    # Subsample to 512 points
-    pc_cond = pc_cond[:, torch.randperm(pc_cond.shape[1])[:512]]
-    return pc_cond
-@spaces.GPU
-@torch.inference_mode()
-def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image | None]:
-    """Generate image from prompt and convert to 3D model."""
-    # Generate random seed
-    seed = np.random.randint(0, np.iinfo(np.int32).max)
-    try:
-        rgb_image = image.convert('RGB')
-        # bg_remover returns a PIL Image already, no need to convert
-        no_bg_image = bg_remover.process(rgb_image)
-        print(f"[debug] no_bg_image type: {type(no_bg_image)}, mode: {no_bg_image.mode}")
-        # Convert to RGBA if not already
-        rgba_image = no_bg_image.convert('RGBA')
-        print(f"[debug] rgba_image mode: {rgba_image.mode}")
-        processed_image = spar3d_utils.foreground_crop(
-            rgba_image,
-            crop_ratio=1.3,
-            newsize=(COND_WIDTH, COND_HEIGHT),
-            no_crop=False
-        )
-        # Show the processed image alpha channel for debugging
-        alpha = np.array(processed_image)[:, :, 3]
-        print(f"[debug] Alpha channel stats - min: {alpha.min()}, max: {alpha.max()}, unique: {np.unique(alpha)}")
-        # Prepare batch for processing
-        batch = create_batch(processed_image)
-        batch = {k: v.to(device) for k, v in batch.items()}
-        # Generate point cloud
-        pc_cond = forward_model(
-            batch,
-            spar3d_model,
-            guidance_scale=3.0,
-            seed=seed,
-            device=device
-        )
-        batch["pc_cond"] = pc_cond
-        # Generate mesh
-        with torch.no_grad():
-            with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
-                trimesh_mesh, _ = spar3d_model.generate_mesh(
-                    batch,
-                    1024,  # texture_resolution
-                    remesh="none",
-                    vertex_count=-1,
-                    estimate_illumination=True
-                )
-                trimesh_mesh = trimesh_mesh[0]
-        # Export to GLB
-        temp_dir = tempfile.mkdtemp()
-        output_path = os.path.join(temp_dir, 'output.glb')
-        trimesh_mesh.export(output_path, file_type="glb", include_normals=True)
-        return output_path
-    except Exception as e:
-        print(f"Error during generation: {str(e)}")
-        import traceback
-        traceback.print_exc()
-        return None
-# Create Gradio app using Blocks
-with gr.Blocks() as demo:
-    gr.Markdown("This space is based on [Stable Point-Aware 3D](https://huggingface.co/spaces/stabilityai/stable-point-aware-3d) by Stability AI, [Text to 3D](https://huggingface.co/spaces/jbilcke-hf/text-to-3d) by jbilcke-hf.")
-    with gr.Row():
-        input_img = gr.Image(
-            type="pil", label="Input Image", sources="upload", image_mode="RGBA"
-        )
-    with gr.Row():
-        model_output = gr.Model3D(
-            label="Generated .GLB model",
-            clear_color=[0.0, 0.0, 0.0, 0.0],
-        )
-    # Event handler
-    input_img.upload(
-        fn=generate_and_process_3d,
-        inputs=[input_img],
-        outputs=[model_output],
-        api_name="generate"
-    )
-if __name__ == "__main__":
     demo.queue().launch()

+import spaces
+import os
+import tempfile
+from typing import Any
+import torch
+import numpy as np
+from PIL import Image
+import gradio as gr
+import trimesh
+from transparent_background import Remover
+import subprocess
+def install_cuda_toolkit():
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
+    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+        os.environ["CUDA_HOME"],
+        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+    )
+    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+install_cuda_toolkit()
+# Import and setup SPAR3D
+os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
+import spar3d.utils as spar3d_utils
+from spar3d.system import SPAR3D
+# Constants
+COND_WIDTH = 512
+COND_HEIGHT = 512
+COND_DISTANCE = 2.2
+COND_FOVY = 0.591627
+BACKGROUND_COLOR = [0.5, 0.5, 0.5]
+# Initialize models
+device = spar3d_utils.get_device()
+bg_remover = Remover()
+spar3d_model = SPAR3D.from_pretrained(
+    "stabilityai/stable-point-aware-3d",
+    config_name="config.yaml",
+    weight_name="model.safetensors"
+).eval().to(device)
+# Initialize camera parameters
+c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
+intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
+    COND_FOVY, COND_HEIGHT, COND_WIDTH
+)
+def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.Image:
+    """Create an RGBA image from RGB image and optional mask."""
+    rgba_image = rgb_image.convert('RGBA')
+    if mask is not None:
+        # Ensure mask is 2D before converting to alpha
+        if len(mask.shape) > 2:
+            mask = mask.squeeze()
+        alpha = Image.fromarray((mask * 255).astype(np.uint8))
+        rgba_image.putalpha(alpha)
+    return rgba_image
+def create_batch(input_image: Image.Image) -> dict[str, Any]:
+    """Prepare image batch for model input."""
+    # Resize and convert input image to numpy array
+    resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
+    img_array = np.array(resized_image).astype(np.float32) / 255.0
+    # Extract RGB and alpha channels
+    if img_array.shape[-1] == 4:  # RGBA
+        rgb = img_array[..., :3]
+        mask = img_array[..., 3:4]
+    else:  # RGB
+        rgb = img_array
+        mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
+    # Convert to tensors while keeping channel-last format
+    rgb = torch.from_numpy(rgb).float()  # [H, W, 3]
+    mask = torch.from_numpy(mask).float()  # [H, W, 1]
+    # Create background blend (match channel-last format)
+    bg_tensor = torch.tensor(BACKGROUND_COLOR).view(1, 1, 3)  # [1, 1, 3]
+    # Blend RGB with background using mask (all in channel-last format)
+    rgb_cond = torch.lerp(bg_tensor, rgb, mask)  # [H, W, 3]
+    # Move channels to correct dimension and add batch dimension
+    # Important: For SPAR3D image tokenizer, we need [B, H, W, C] format
+    rgb_cond = rgb_cond.unsqueeze(0)  # [1, H, W, 3]
+    mask = mask.unsqueeze(0)  # [1, H, W, 1]
+    # Create the batch dictionary
+    batch = {
+        "rgb_cond": rgb_cond,  # [1, H, W, 3]
+        "mask_cond": mask,  # [1, H, W, 1]
+        "c2w_cond": c2w_cond.unsqueeze(0),  # [1, 4, 4]
+        "intrinsic_cond": intrinsic.unsqueeze(0),  # [1, 3, 3]
+        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),  # [1, 3, 3]
+    }
+    for k, v in batch.items():
+        print(f"[debug] {k} final shape:", v.shape)
+    return batch
+def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
+    """Process batch through model and generate point cloud."""
+    batch_size = batch["rgb_cond"].shape[0]
+    assert batch_size == 1, f"Expected batch size 1, got {batch_size}"
+    # Generate point cloud tokens
+    try:
+        cond_tokens = system.forward_pdiff_cond(batch)
+    except Exception as e:
+        print("\n[ERROR] Failed in forward_pdiff_cond:")
+        print(e)
+        print("\nInput tensor properties:")
+        print("rgb_cond dtype:", batch["rgb_cond"].dtype)
+        print("rgb_cond device:", batch["rgb_cond"].device)
+        print("rgb_cond requires_grad:", batch["rgb_cond"].requires_grad)
+        raise
+    # Sample points
+    sample_iter = system.sampler.sample_batch_progressive(
+        batch_size,
+        cond_tokens,
+        guidance_scale=guidance_scale,
+        device=device
+    )
+    # Get final samples
+    for x in sample_iter:
+        samples = x["xstart"]
+    pc_cond = samples.permute(0, 2, 1).float()
+    # Normalize point cloud
+    pc_cond = spar3d_utils.normalize_pc_bbox(pc_cond)
+    # Subsample to 512 points
+    pc_cond = pc_cond[:, torch.randperm(pc_cond.shape[1])[:512]]
+    return pc_cond
+@spaces.GPU
+@torch.inference_mode()
+def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image | None]:
+    """Generate image from prompt and convert to 3D model."""
+    # Generate random seed
+    seed = np.random.randint(0, np.iinfo(np.int32).max)
+    try:
+        rgb_image = image.convert('RGB')
+        # bg_remover returns a PIL Image already, no need to convert
+        no_bg_image = bg_remover.process(rgb_image)
+        print(f"[debug] no_bg_image type: {type(no_bg_image)}, mode: {no_bg_image.mode}")
+        # Convert to RGBA if not already
+        rgba_image = no_bg_image.convert('RGBA')
+        print(f"[debug] rgba_image mode: {rgba_image.mode}")
+        processed_image = spar3d_utils.foreground_crop(
+            rgba_image,
+            crop_ratio=1.3,
+            newsize=(COND_WIDTH, COND_HEIGHT),
+            no_crop=False
+        )
+        # Show the processed image alpha channel for debugging
+        alpha = np.array(processed_image)[:, :, 3]
+        print(f"[debug] Alpha channel stats - min: {alpha.min()}, max: {alpha.max()}, unique: {np.unique(alpha)}")
+        # Prepare batch for processing
+        batch = create_batch(processed_image)
+        batch = {k: v.to(device) for k, v in batch.items()}
+        # Generate point cloud
+        pc_cond = forward_model(
+            batch,
+            spar3d_model,
+            guidance_scale=3.0,
+            seed=seed,
+            device=device
+        )
+        batch["pc_cond"] = pc_cond
+        # Generate mesh
+        with torch.no_grad():
+            with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
+                trimesh_mesh, _ = spar3d_model.generate_mesh(
+                    batch,
+                    1024,  # texture_resolution
+                    remesh="none",
+                    vertex_count=-1,
+                    estimate_illumination=True
+                )
+                trimesh_mesh = trimesh_mesh[0]
+        # Export to GLB
+        temp_dir = tempfile.mkdtemp()
+        output_path = os.path.join(temp_dir, 'mesh.glb')
+        trimesh_mesh.export(output_path, file_type="glb", include_normals=True)
+        return output_path
+    except Exception as e:
+        print(f"Error during generation: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return None
+# Create Gradio app using Blocks
+with gr.Blocks() as demo:
+    gr.Markdown("This space is based on [Stable Point-Aware 3D](https://huggingface.co/spaces/stabilityai/stable-point-aware-3d) by Stability AI, [Text to 3D](https://huggingface.co/spaces/jbilcke-hf/text-to-3d) by jbilcke-hf.")
+    with gr.Row():
+        input_img = gr.Image(
+            type="pil", label="Input Image", sources="upload", image_mode="RGBA"
+        )
+    with gr.Row():
+        model_output = gr.Model3D(
+            label="Generated .GLB model",
+            clear_color=[0.0, 0.0, 0.0, 0.0],
+        )
+    # Event handler
+    input_img.upload(
+        fn=generate_and_process_3d,
+        inputs=[input_img],
+        outputs=[model_output],
+        api_name="generate"
+    )
+if __name__ == "__main__":
     demo.queue().launch()