Spaces:

HorizonRobotics
/

EmbodiedGen-Text-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on 24 days ago

Commit

32ccac0

1 Parent(s): 3b7b0b9

update

Browse files

Files changed (14) hide show

app.py +1 -3
app_style.py +27 -0
common.py +3 -30
embodied_gen/data/asset_converter.py +204 -38
embodied_gen/data/convex_decomposer.py +19 -7
embodied_gen/data/mesh_operator.py +1 -0
embodied_gen/envs/pick_embodiedgen.py +4 -2
embodied_gen/models/layout.py +4 -3
embodied_gen/scripts/compose_layout.py +9 -0
embodied_gen/scripts/gen_layout.py +13 -2
embodied_gen/scripts/simulate_sapien.py +2 -2
embodied_gen/utils/geometry.py +72 -17
embodied_gen/validators/quality_checkers.py +7 -6
embodied_gen/validators/urdf_convertor.py +2 -6

app.py CHANGED Viewed

@@ -21,20 +21,18 @@ os.environ["GRADIO_APP"] = "textto3d"
 import gradio as gr
 from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_text_content,
-    custom_theme,
     end_session,
     extract_3d_representations_v2,
     extract_urdf,
     get_cached_image,
     get_seed,
     get_selected_image,
-    image_css,
     image_to_3d,
-    lighting_css,
     start_session,
     text2image_fn,
 )

 import gradio as gr
+from app_style import custom_theme, image_css, lighting_css
 from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_text_content,
     end_session,
     extract_3d_representations_v2,
     extract_urdf,
     get_cached_image,
     get_seed,
     get_selected_image,
     image_to_3d,
     start_session,
     text2image_fn,
 )

app_style.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from gradio.themes import Soft
+from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
+lighting_css = """
+<style>
+#lighter_mesh canvas {
+    filter: brightness(1.9) !important;
+}
+</style>
+"""
+image_css = """
+<style>
+.image_fit .image-frame {
+object-fit: contain !important;
+height: 100% !important;
+}
+</style>
+"""
+custom_theme = Soft(
+    primary_hue=stone,
+    secondary_hue=gray,
+    radius_size="md",
+    text_size="sm",
+    spacing_size="sm",
+)

common.py CHANGED Viewed

@@ -30,8 +30,6 @@ import torch
 import torch.nn.functional as F
 import trimesh
 from easydict import EasyDict as edict
-from gradio.themes import Soft
-from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
 from PIL import Image
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.differentiable_render import entrypoint as render_api
@@ -151,6 +149,7 @@ if os.getenv("GRADIO_APP") == "imageto3d":
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
 elif os.getenv("GRADIO_APP") == "textto3d":
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
@@ -168,6 +167,7 @@ elif os.getenv("GRADIO_APP") == "textto3d":
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
     )
 elif os.getenv("GRADIO_APP") == "texture_edit":
     PIPELINE_IP = build_texture_gen_pipe(
         base_ckpt_dir="./weights",
@@ -182,34 +182,7 @@ elif os.getenv("GRADIO_APP") == "texture_edit":
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
     )
-os.makedirs(TMP_DIR, exist_ok=True)
-lighting_css = """
-<style>
-#lighter_mesh canvas {
-    filter: brightness(1.9) !important;
-}
-</style>
-"""
-image_css = """
-<style>
-.image_fit .image-frame {
-object-fit: contain !important;
-height: 100% !important;
-}
-</style>
-"""
-custom_theme = Soft(
-    primary_hue=stone,
-    secondary_hue=gray,
-    radius_size="md",
-    text_size="sm",
-    spacing_size="sm",
-)
 def start_session(req: gr.Request) -> None:

 import torch.nn.functional as F
 import trimesh
 from easydict import EasyDict as edict
 from PIL import Image
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.differentiable_render import entrypoint as render_api
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
+    os.makedirs(TMP_DIR, exist_ok=True)
 elif os.getenv("GRADIO_APP") == "textto3d":
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
     )
+    os.makedirs(TMP_DIR, exist_ok=True)
 elif os.getenv("GRADIO_APP") == "texture_edit":
     PIPELINE_IP = build_texture_gen_pipe(
         base_ckpt_dir="./weights",
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
     )
+    os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:

embodied_gen/data/asset_converter.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 import xml.etree.ElementTree as ET
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from shutil import copy
 import trimesh
@@ -145,18 +146,20 @@ class MeshtoMJCFConverter(AssetConverterBase):
             texture=f"texture_{name}",
             reflectance=str(reflectance),
         )
-        ET.SubElement(
-            mujoco_element,
-            "texture",
-            name=f"texture_{name}",
-            type="2d",
-            file=f"{dirname}/material_0.png",
-        )
-        self._copy_asset_file(
-            f"{input_dir}/{dirname}/material_0.png",
-            f"{output_dir}/{dirname}/material_0.png",
-        )
         return material
@@ -213,6 +216,163 @@ class MeshtoMJCFConverter(AssetConverterBase):
         logger.info(f"Successfully converted {urdf_path} → {mjcf_path}")
 class MeshtoUSDConverter(AssetConverterBase):
     """Convert Mesh file from URDF into USD format."""
@@ -455,34 +615,34 @@ class AssetConverterFactory:
 if __name__ == "__main__":
-    # target_asset_type = AssetType.MJCF
-    target_asset_type = AssetType.USD
-    urdf_paths = [
-        "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf",
-    ]
-    if target_asset_type == AssetType.MJCF:
-        output_files = [
-            "outputs/embodiedgen_assets/demo_assets/remote_control/mjcf/remote_control.mjcf",
-        ]
-        asset_converter = AssetConverterFactory.create(
-            target_type=AssetType.MJCF,
-            source_type=AssetType.URDF,
-        )
-    elif target_asset_type == AssetType.USD:
-        output_files = [
-            "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd",
-        ]
-        asset_converter = AssetConverterFactory.create(
-            target_type=AssetType.USD,
-            source_type=AssetType.MESH,
-        )
-    with asset_converter:
-        for urdf_path, output_file in zip(urdf_paths, output_files):
-            asset_converter.convert(urdf_path, output_file)
     # urdf_path = "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf"
     # output_file = "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd"
@@ -495,3 +655,9 @@ if __name__ == "__main__":
     # with asset_converter:
     #     asset_converter.convert(urdf_path, output_file)

 import xml.etree.ElementTree as ET
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
+from glob import glob
 from shutil import copy
 import trimesh
             texture=f"texture_{name}",
             reflectance=str(reflectance),
         )
+        for path in glob(f"{input_dir}/{dirname}/*.png"):
+            file_name = os.path.basename(path)
+            self._copy_asset_file(
+                path,
+                f"{output_dir}/{dirname}/{file_name}",
+            )
+            ET.SubElement(
+                mujoco_element,
+                "texture",
+                name=f"texture_{name}_{os.path.splitext(file_name)[0]}",
+                type="2d",
+                file=f"{dirname}/{file_name}",
+            )
         return material
         logger.info(f"Successfully converted {urdf_path} → {mjcf_path}")
+class URDFtoMJCFConverter(MeshtoMJCFConverter):
+    """Convert URDF files with joints to MJCF format, handling transformations from joints."""
+    def add_materials(
+        self,
+        mujoco_element: ET.Element,
+        link: ET.Element,
+        tag: str,
+        input_dir: str,
+        output_dir: str,
+        name: str,
+        reflectance: float = 0.2,
+    ) -> ET.Element:
+        """Add materials to the MJCF asset from the URDF link."""
+        element = link.find(tag)
+        geometry = element.find("geometry")
+        mesh = geometry.find("mesh")
+        filename = mesh.get("filename")
+        dirname = os.path.dirname(filename)
+        diffuse_texture = None
+        for path in glob(f"{input_dir}/{dirname}/*.png"):
+            file_name = os.path.basename(path)
+            self._copy_asset_file(
+                path,
+                f"{output_dir}/{dirname}/{file_name}",
+            )
+            texture_name = f"texture_{name}_{os.path.splitext(file_name)[0]}"
+            ET.SubElement(
+                mujoco_element,
+                "texture",
+                name=texture_name,
+                type="2d",
+                file=f"{dirname}/{file_name}",
+            )
+            if "diffuse" in file_name.lower():
+                diffuse_texture = texture_name
+        if diffuse_texture is None:
+            return None
+        material = ET.SubElement(
+            mujoco_element,
+            "material",
+            name=f"material_{name}",
+            texture=diffuse_texture,
+            reflectance=str(reflectance),
+        )
+        return material
+    def convert(self, urdf_path: str, mjcf_path: str, **kwargs) -> str:
+        """Convert a URDF file with joints to MJCF format."""
+        tree = ET.parse(urdf_path)
+        root = tree.getroot()
+        mujoco_struct = ET.Element("mujoco")
+        mujoco_struct.set("model", root.get("name"))
+        mujoco_asset = ET.SubElement(mujoco_struct, "asset")
+        mujoco_worldbody = ET.SubElement(mujoco_struct, "worldbody")
+        input_dir = os.path.dirname(urdf_path)
+        output_dir = os.path.dirname(mjcf_path)
+        os.makedirs(output_dir, exist_ok=True)
+        # Create a dictionary to store body elements for each link
+        body_dict = {}
+        # Process all links first
+        for idx, link in enumerate(root.findall("link")):
+            link_name = link.get("name", f"unnamed_link_{idx}")
+            body = ET.SubElement(mujoco_worldbody, "body", name=link_name)
+            body_dict[link_name] = body
+            # Add materials and geometry
+            visual_element = link.find("visual")
+            if visual_element is not None:
+                material = self.add_materials(
+                    mujoco_asset,
+                    link,
+                    "visual",
+                    input_dir,
+                    output_dir,
+                    name=str(idx),
+                )
+                self.add_geometry(
+                    mujoco_asset,
+                    link,
+                    body,
+                    "visual",
+                    input_dir,
+                    output_dir,
+                    f"visual_mesh_{idx}",
+                    material,
+                )
+            collision_element = link.find("collision")
+            if collision_element is not None:
+                self.add_geometry(
+                    mujoco_asset,
+                    link,
+                    body,
+                    "collision",
+                    input_dir,
+                    output_dir,
+                    f"collision_mesh_{idx}",
+                    is_collision=True,
+                )
+        # Process joints to set transformations and hierarchy
+        for joint in root.findall("joint"):
+            joint_type = joint.get("type")
+            if joint_type != "fixed":
+                logger.warning(
+                    f"Skipping non-fixed joint: {joint.get('name')}"
+                )
+                continue
+            parent_link = joint.find("parent").get("link")
+            child_link = joint.find("child").get("link")
+            origin = joint.find("origin")
+            if parent_link not in body_dict or child_link not in body_dict:
+                logger.warning(
+                    f"Parent or child link not found for joint: {joint.get('name')}"
+                )
+                continue
+            # Move child body under parent body in MJCF hierarchy
+            child_body = body_dict[child_link]
+            mujoco_worldbody.remove(child_body)
+            parent_body = body_dict[parent_link]
+            parent_body.append(child_body)
+            # Apply joint origin transformation to child body
+            if origin is not None:
+                xyz = origin.get("xyz", "0 0 0")
+                rpy = origin.get("rpy", "0 0 0")
+                child_body.set("pos", xyz)
+                # Convert rpy to MJCF euler format (degrees)
+                rpy_floats = list(map(float, rpy.split()))
+                rotation = Rotation.from_euler(
+                    "xyz", rpy_floats, degrees=False
+                )
+                euler_deg = rotation.as_euler("xyz", degrees=True)
+                child_body.set(
+                    "euler", f"{euler_deg[0]} {euler_deg[1]} {euler_deg[2]}"
+                )
+        tree = ET.ElementTree(mujoco_struct)
+        ET.indent(tree, space="  ", level=0)
+        tree.write(mjcf_path, encoding="utf-8", xml_declaration=True)
+        logger.info(f"Successfully converted {urdf_path} → {mjcf_path}")
+        return mjcf_path
 class MeshtoUSDConverter(AssetConverterBase):
     """Convert Mesh file from URDF into USD format."""
 if __name__ == "__main__":
+    # # target_asset_type = AssetType.MJCF
+    # target_asset_type = AssetType.USD
+    # urdf_paths = [
+    #     "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf",
+    # ]
+    # if target_asset_type == AssetType.MJCF:
+    #     output_files = [
+    #         "outputs/embodiedgen_assets/demo_assets/remote_control/mjcf/remote_control.mjcf",
+    #     ]
+    #     asset_converter = AssetConverterFactory.create(
+    #         target_type=AssetType.MJCF,
+    #         source_type=AssetType.URDF,
+    #     )
+    # elif target_asset_type == AssetType.USD:
+    #     output_files = [
+    #         "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd",
+    #     ]
+    #     asset_converter = AssetConverterFactory.create(
+    #         target_type=AssetType.USD,
+    #         source_type=AssetType.MESH,
+    #     )
+    # with asset_converter:
+    #     for urdf_path, output_file in zip(urdf_paths, output_files):
+    #         asset_converter.convert(urdf_path, output_file)
     # urdf_path = "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf"
     # output_file = "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd"
     # with asset_converter:
     #     asset_converter.convert(urdf_path, output_file)
+    urdf_path = "/home/users/xinjie.wang/xinjie/infinigen/outputs/exports/kitchen_simple_solve_nos_i_urdf/export_scene/scene.urdf"
+    output_file = "/home/users/xinjie.wang/xinjie/infinigen/outputs/exports/kitchen_simple_solve_nos_i_urdf/mjcf/scene.urdf"
+    asset_converter = URDFtoMJCFConverter()
+    with asset_converter:
+        asset_converter.convert(urdf_path, output_file)

embodied_gen/data/convex_decomposer.py CHANGED Viewed

@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
 __all__ = [
     "decompose_convex_coacd",
     "decompose_convex_mesh",
-    "decompose_convex_process",
 ]
@@ -37,6 +37,7 @@ def decompose_convex_coacd(
     params: dict,
     verbose: bool = False,
     auto_scale: bool = True,
 ) -> None:
     coacd.set_log_level("info" if verbose else "warn")
@@ -44,14 +45,22 @@ def decompose_convex_coacd(
     mesh = coacd.Mesh(mesh.vertices, mesh.faces)
     result = coacd.run_coacd(mesh, **params)
-    combined = sum([trimesh.Trimesh(*m) for m in result])
     # Compute collision_scale because convex decomposition usually makes the mesh larger.
     if auto_scale:
-        convex_mesh_shape = np.ptp(combined.vertices, axis=0)
         visual_mesh_shape = np.ptp(mesh.vertices, axis=0)
-        rescale = visual_mesh_shape / convex_mesh_shape
-        combined.vertices *= rescale
     combined.export(outfile)
@@ -71,6 +80,7 @@ def decompose_convex_mesh(
     merge: bool = True,
     seed: int = 0,
     auto_scale: bool = True,
     verbose: bool = False,
 ) -> str:
     """Decompose a mesh into convex parts using the CoACD algorithm."""
@@ -95,7 +105,9 @@ def decompose_convex_mesh(
     )
     try:
-        decompose_convex_coacd(filename, outfile, params, verbose, auto_scale)
         if os.path.exists(outfile):
             return outfile
     except Exception as e:
@@ -106,7 +118,7 @@ def decompose_convex_mesh(
         try:
             params["preprocess_mode"] = "on"
             decompose_convex_coacd(
-                filename, outfile, params, verbose, auto_scale
             )
             if os.path.exists(outfile):
                 return outfile

 __all__ = [
     "decompose_convex_coacd",
     "decompose_convex_mesh",
+    "decompose_convex_mp",
 ]
     params: dict,
     verbose: bool = False,
     auto_scale: bool = True,
+    scale_factor: float = 1.0,
 ) -> None:
     coacd.set_log_level("info" if verbose else "warn")
     mesh = coacd.Mesh(mesh.vertices, mesh.faces)
     result = coacd.run_coacd(mesh, **params)
+    meshes = []
+    for v, f in result:
+        meshes.append(trimesh.Trimesh(v, f))
     # Compute collision_scale because convex decomposition usually makes the mesh larger.
     if auto_scale:
+        all_mesh = sum([trimesh.Trimesh(*m) for m in result])
+        convex_mesh_shape = np.ptp(all_mesh.vertices, axis=0)
         visual_mesh_shape = np.ptp(mesh.vertices, axis=0)
+        scale_factor *= visual_mesh_shape / convex_mesh_shape
+    combined = trimesh.Scene()
+    for mesh_part in meshes:
+        mesh_part.vertices *= scale_factor
+        combined.add_geometry(mesh_part)
     combined.export(outfile)
     merge: bool = True,
     seed: int = 0,
     auto_scale: bool = True,
+    scale_factor: float = 1.005,
     verbose: bool = False,
 ) -> str:
     """Decompose a mesh into convex parts using the CoACD algorithm."""
     )
     try:
+        decompose_convex_coacd(
+            filename, outfile, params, verbose, auto_scale, scale_factor
+        )
         if os.path.exists(outfile):
             return outfile
     except Exception as e:
         try:
             params["preprocess_mode"] = "on"
             decompose_convex_coacd(
+                filename, outfile, params, verbose, auto_scale, scale_factor
             )
             if os.path.exists(outfile):
                 return outfile

embodied_gen/data/mesh_operator.py CHANGED Viewed

@@ -403,6 +403,7 @@ class MeshFixer(object):
         )
         mesh.clean(inplace=True)
         mesh.clear_data()
         mesh = mesh.decimate(ratio, progress_bar=True)
         # Update vertices and faces

         )
         mesh.clean(inplace=True)
         mesh.clear_data()
+        mesh = mesh.triangulate()
         mesh = mesh.decimate(ratio, progress_bar=True)
         # Update vertices and faces

embodied_gen/envs/pick_embodiedgen.py CHANGED Viewed

@@ -74,7 +74,9 @@ class PickEmbodiedGen(BaseEnv):
         layout_file = kwargs.pop("layout_file", None)
         replace_objs = kwargs.pop("replace_objs", True)
         self.enable_grasp = kwargs.pop("enable_grasp", False)
-        self.init_quat = kwargs.pop("init_quat", [0.7071, 0, 0, 0.7071])
         # Add small offset in z-axis to avoid collision.
         self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
         self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
@@ -107,7 +109,7 @@ class PickEmbodiedGen(BaseEnv):
         self.bg_images = dict()
         if self.render_mode == "hybrid":
             self.bg_images = self.render_gs3d_images(
-                self.layouts, num_envs, self.init_quat
             )
     @staticmethod

         layout_file = kwargs.pop("layout_file", None)
         replace_objs = kwargs.pop("replace_objs", True)
         self.enable_grasp = kwargs.pop("enable_grasp", False)
+        self.init_3dgs_quat = kwargs.pop(
+            "init_3dgs_quat", [0.7071, 0, 0, 0.7071]
+        )
         # Add small offset in z-axis to avoid collision.
         self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
         self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
         self.bg_images = dict()
         if self.render_mode == "hybrid":
             self.bg_images = self.render_gs3d_images(
+                self.layouts, num_envs, self.init_3dgs_quat
             )
     @staticmethod

embodied_gen/models/layout.py CHANGED Viewed

@@ -77,10 +77,11 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
     - {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common
         household or office items or furniture, not abstract concepts, not too small like needle.
     - If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"),
-        you must decompose it into multiple individual instances (e.g., ["pen", "pen"], ["apple", "pear"]).
     - Containers that hold objects (e.g., "bowl of apples", "box of tools") must
-        be separated into individual items (e.g., ["bowl", "apple", "apple"]).
     - Do not include transparent objects such as "glass", "plastic", etc.
     - The output must be in compact JSON format and use Markdown syntax, just like the output in the example below.
     Examples:
@@ -170,7 +171,7 @@ LAYOUT_DISASSEMBLE_PROMPT = f"""
         "robot": "franka",
         "background": "office",
         "context": "table",
-        "manipulated_objs": ["pen", "pen", "grey bowl"],
         "distractor_objs": ["notepad", "cup"]
     }}
     ```

     - {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be common
         household or office items or furniture, not abstract concepts, not too small like needle.
     - If the input includes a plural or grouped object (e.g., "pens", "bottles", "plates", "fruit"),
+        you must decompose it into multiple individual instances (e.g., ["pen1", "pen2"], ["apple", "pear"]).
     - Containers that hold objects (e.g., "bowl of apples", "box of tools") must
+        be separated into individual items (e.g., ["bowl", "apple1", "apple2"]).
     - Do not include transparent objects such as "glass", "plastic", etc.
+    - All {Scene3DItemEnum.MANIPULATED_OBJS} and {Scene3DItemEnum.DISTRACTOR_OBJS} must be child node of {Scene3DItemEnum.CONTEXT}.
     - The output must be in compact JSON format and use Markdown syntax, just like the output in the example below.
     Examples:
         "robot": "franka",
         "background": "office",
         "context": "table",
+        "manipulated_objs": ["pen1", "pen2", "grey bowl"],
         "distractor_objs": ["notepad", "cup"]
     }}
     ```

embodied_gen/scripts/compose_layout.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import json
 import os
 from dataclasses import dataclass
 import tyro
@@ -51,6 +52,14 @@ def entrypoint(**kwargs):
     out_layout_path = f"{output_dir}/layout.json"
     layout_info = bfs_placement(args.layout_path, seed=args.seed)
     with open(out_layout_path, "w") as f:
         json.dump(layout_info.to_dict(), f, indent=4)

 import json
 import os
+import shutil
 from dataclasses import dataclass
 import tyro
     out_layout_path = f"{output_dir}/layout.json"
     layout_info = bfs_placement(args.layout_path, seed=args.seed)
+    origin_dir = os.path.dirname(args.layout_path)
+    for key in layout_info.assets:
+        src = f"{origin_dir}/{layout_info.assets[key]}"
+        dst = f"{output_dir}/{layout_info.assets[key]}"
+        if src == dst:
+            continue
+        shutil.copytree(src, dst, dirs_exist_ok=True)
     with open(out_layout_path, "w") as f:
         json.dump(layout_info.to_dict(), f, indent=4)

embodied_gen/scripts/gen_layout.py CHANGED Viewed

@@ -115,7 +115,19 @@ def entrypoint() -> None:
         # Background GEN (for efficiency, temp use retrieval instead)
         bg_node = layout_info.relation[Scene3DItemEnum.BACKGROUND.value]
         text = layout_info.objs_desc[bg_node]
-        match_key = SCENE_MATCHER.query(text, str(scene_dict))
         match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
         bg_save_dir = os.path.join(output_root, "background")
         copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
@@ -128,7 +140,6 @@ def entrypoint() -> None:
         layout_info = bfs_placement(
             layout_path,
-            limit_reach_range=True if args.insert_robot else False,
             seed=args.seed_layout,
         )
         layout_path = f"{output_root}/layout.json"

         # Background GEN (for efficiency, temp use retrieval instead)
         bg_node = layout_info.relation[Scene3DItemEnum.BACKGROUND.value]
         text = layout_info.objs_desc[bg_node]
+        match_key = SCENE_MATCHER.query(
+            text, str(scene_dict), params=gpt_params
+        )
+        n_max_attempt = 10
+        while match_key not in scene_dict and n_max_attempt > 0:
+            logger.error(
+                f"Cannot find matched scene {match_key}, retrying left {n_max_attempt}..."
+            )
+            match_key = SCENE_MATCHER.query(
+                text, str(scene_dict), params=gpt_params
+            )
+            n_max_attempt -= 1
         match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
         bg_save_dir = os.path.join(output_root, "background")
         copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
         layout_info = bfs_placement(
             layout_path,
             seed=args.seed_layout,
         )
         layout_path = f"{output_root}/layout.json"

embodied_gen/scripts/simulate_sapien.py CHANGED Viewed

@@ -49,7 +49,7 @@ class SapienSimConfig:
     sim_freq: int = 200
     sim_step: int = 400
     z_offset: float = 0.004
-    init_quat: list[float] = field(
         default_factory=lambda: [0.7071, 0, 0, 0.7071]
     )  # xyzw
     device: str = "cuda"
@@ -137,7 +137,7 @@ def entrypoint(**kwargs):
     gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
     x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
-    qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
     init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
     gs_model = gs_model.get_gaussians(instance_pose=init_pose)

     sim_freq: int = 200
     sim_step: int = 400
     z_offset: float = 0.004
+    init_3dgs_quat: list[float] = field(
         default_factory=lambda: [0.7071, 0, 0, 0.7071]
     )  # xyzw
     device: str = "cuda"
     gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
     x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
+    qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_3dgs_quat)
     init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
     gs_model = gs_model.get_gaussians(instance_pose=init_pose)

embodied_gen/utils/geometry.py CHANGED Viewed

@@ -80,7 +80,7 @@ def pose_to_matrix(pose: list[float]) -> np.ndarray:
 def compute_xy_bbox(
-    vertices: np.ndarray, col_x: int = 0, col_y: int = 2
 ) -> list[float]:
     x_vals = vertices[:, col_x]
     y_vals = vertices[:, col_y]
@@ -137,13 +137,16 @@ def with_seed(seed_attr_name: str = "seed"):
 def compute_convex_hull_path(
     vertices: np.ndarray,
     z_threshold: float = 0.05,
-    interp_per_edge: int = 3,
     margin: float = -0.02,
 ) -> Path:
     top_vertices = vertices[
-        vertices[:, 1] > vertices[:, 1].max() - z_threshold
     ]
-    top_xy = top_vertices[:, [0, 2]]
     if len(top_xy) < 3:
         raise ValueError("Not enough points to form a convex hull")
@@ -184,11 +187,11 @@ def all_corners_inside(hull: Path, box: list, threshold: int = 3) -> bool:
 def compute_axis_rotation_quat(
     axis: Literal["x", "y", "z"], angle_rad: float
 ) -> list[float]:
-    if axis.lower() == 'x':
         q = Quaternion(axis=[1, 0, 0], angle=angle_rad)
-    elif axis.lower() == 'y':
         q = Quaternion(axis=[0, 1, 0], angle=angle_rad)
-    elif axis.lower() == 'z':
         q = Quaternion(axis=[0, 0, 1], angle=angle_rad)
     else:
         raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z")
@@ -226,12 +229,36 @@ def bfs_placement(
     floor_margin: float = 0,
     beside_margin: float = 0.1,
     max_attempts: int = 3000,
     rotate_objs: bool = True,
     rotate_bg: bool = True,
-    limit_reach_range: bool = True,
     robot_dim: float = 0.12,
     seed: int = None,
 ) -> LayoutInfo:
     layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
     asset_dir = os.path.dirname(layout_file)
     object_mapping = layout_info.objs_mapping
@@ -259,13 +286,23 @@ def bfs_placement(
         mesh_path = os.path.join(asset_dir, mesh_path)
         mesh_info[node]["path"] = mesh_path
         mesh = trimesh.load(mesh_path)
-        vertices = mesh.vertices
-        z1 = np.percentile(vertices[:, 1], 1)
-        z2 = np.percentile(vertices[:, 1], 99)
         if object_mapping[node] == Scene3DItemEnum.CONTEXT.value:
             object_quat = [0, 0, 0, 1]
             mesh_info[node]["surface"] = compute_convex_hull_path(vertices)
             # Put robot in the CONTEXT edge.
             x, y = random.choice(mesh_info[node]["surface"].vertices)
             theta = np.arctan2(y, x)
@@ -288,9 +325,7 @@ def bfs_placement(
                 axis="z", angle_rad=angle_rad
             )
             rotation = R.from_quat(object_quat).as_matrix()
-            vertices = np.dot(mesh.vertices, rotation.T)
-            z1 = np.percentile(vertices[:, 1], 1)
-            z2 = np.percentile(vertices[:, 1], 99)
         x1, x2, y1, y2 = compute_xy_bbox(vertices)
         mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat]
@@ -343,20 +378,40 @@ def bfs_placement(
                         continue
                     # Make sure the manipulated object is reachable by robot.
                     if (
-                        limit_reach_range
                         and object_mapping[node]
                         == Scene3DItemEnum.MANIPULATED_OBJS.value
                     ):
                         cx = parent_pos[0] + node_box[0] + obj_dx / 2
                         cy = parent_pos[1] + node_box[2] + obj_dy / 2
                         cz = parent_pos[2] + p_z2 - z1
-                        robot_pose = position[robot_node][:3]
                         if not check_reachable(
-                            base_xyz=np.array(robot_pose),
                             reach_xyz=np.array([cx, cy, cz]),
                         ):
                             continue
                     if not has_iou_conflict(
                         node_box, placed_boxes_map[parent_node]
                     ):

 def compute_xy_bbox(
+    vertices: np.ndarray, col_x: int = 0, col_y: int = 1
 ) -> list[float]:
     x_vals = vertices[:, col_x]
     y_vals = vertices[:, col_y]
 def compute_convex_hull_path(
     vertices: np.ndarray,
     z_threshold: float = 0.05,
+    interp_per_edge: int = 10,
     margin: float = -0.02,
+    x_axis: int = 0,
+    y_axis: int = 1,
+    z_axis: int = 2,
 ) -> Path:
     top_vertices = vertices[
+        vertices[:, z_axis] > vertices[:, z_axis].max() - z_threshold
     ]
+    top_xy = top_vertices[:, [x_axis, y_axis]]
     if len(top_xy) < 3:
         raise ValueError("Not enough points to form a convex hull")
 def compute_axis_rotation_quat(
     axis: Literal["x", "y", "z"], angle_rad: float
 ) -> list[float]:
+    if axis.lower() == "x":
         q = Quaternion(axis=[1, 0, 0], angle=angle_rad)
+    elif axis.lower() == "y":
         q = Quaternion(axis=[0, 1, 0], angle=angle_rad)
+    elif axis.lower() == "z":
         q = Quaternion(axis=[0, 0, 1], angle=angle_rad)
     else:
         raise ValueError(f"Unsupported axis '{axis}', must be one of x, y, z")
     floor_margin: float = 0,
     beside_margin: float = 0.1,
     max_attempts: int = 3000,
+    init_rpy: tuple = (1.5708, 0.0, 0.0),
     rotate_objs: bool = True,
     rotate_bg: bool = True,
+    rotate_context: bool = True,
+    limit_reach_range: tuple[float, float] | None = (0.20, 0.85),
+    max_orient_diff: float | None = 60,
     robot_dim: float = 0.12,
     seed: int = None,
 ) -> LayoutInfo:
+    """Place objects in the layout using BFS traversal.
+    Args:
+        layout_file: Path to the JSON file defining the layout structure and assets.
+        floor_margin: Z-offset for the background object, typically for objects placed on the floor.
+        beside_margin: Minimum margin for objects placed 'beside' their parent, used when 'on' placement fails.
+        max_attempts: Maximum number of attempts to find a non-overlapping position for an object.
+        init_rpy: Initial Roll-Pitch-Yaw rotation rad applied to all object meshes to align the mesh's
+            coordinate system with the world's (e.g., Z-up).
+        rotate_objs: If True, apply a random rotation around the Z-axis for manipulated and distractor objects.
+        rotate_bg: If True, apply a random rotation around the Y-axis for the background object.
+        rotate_context: If True, apply a random rotation around the Z-axis for the context object.
+        limit_reach_range: If set, enforce a check that manipulated objects are within the robot's reach range, in meter.
+        max_orient_diff: If set, enforce a check that manipulated objects are within the robot's orientation range, in degree.
+        robot_dim: The approximate dimension (e.g., diameter) of the robot for box representation.
+        seed: Random seed for reproducible placement.
+    Returns:
+        A :class:`LayoutInfo` object containing the objects and their final computed 7D poses
+        ([x, y, z, qx, qy, qz, qw]).
+    """
     layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
     asset_dir = os.path.dirname(layout_file)
     object_mapping = layout_info.objs_mapping
         mesh_path = os.path.join(asset_dir, mesh_path)
         mesh_info[node]["path"] = mesh_path
         mesh = trimesh.load(mesh_path)
+        rotation = R.from_euler("xyz", init_rpy, degrees=False)
+        vertices = mesh.vertices @ rotation.as_matrix().T
+        z1 = np.percentile(vertices[:, 2], 1)
+        z2 = np.percentile(vertices[:, 2], 99)
         if object_mapping[node] == Scene3DItemEnum.CONTEXT.value:
             object_quat = [0, 0, 0, 1]
+            if rotate_context:
+                angle_rad = np.random.uniform(0, 2 * np.pi)
+                object_quat = compute_axis_rotation_quat(
+                    axis="z", angle_rad=angle_rad
+                )
+                rotation = R.from_quat(object_quat).as_matrix()
+                vertices = vertices @ rotation.T
             mesh_info[node]["surface"] = compute_convex_hull_path(vertices)
             # Put robot in the CONTEXT edge.
             x, y = random.choice(mesh_info[node]["surface"].vertices)
             theta = np.arctan2(y, x)
                 axis="z", angle_rad=angle_rad
             )
             rotation = R.from_quat(object_quat).as_matrix()
+            vertices = vertices @ rotation.T
         x1, x2, y1, y2 = compute_xy_bbox(vertices)
         mesh_info[node]["pose"] = [x1, x2, y1, y2, z1, z2, *object_quat]
                         continue
                     # Make sure the manipulated object is reachable by robot.
                     if (
+                        limit_reach_range is not None
                         and object_mapping[node]
                         == Scene3DItemEnum.MANIPULATED_OBJS.value
                     ):
                         cx = parent_pos[0] + node_box[0] + obj_dx / 2
                         cy = parent_pos[1] + node_box[2] + obj_dy / 2
                         cz = parent_pos[2] + p_z2 - z1
+                        robot_pos = position[robot_node][:3]
                         if not check_reachable(
+                            base_xyz=np.array(robot_pos),
                             reach_xyz=np.array([cx, cy, cz]),
+                            min_reach=limit_reach_range[0],
+                            max_reach=limit_reach_range[1],
                         ):
                             continue
+                    # Make sure the manipulated object is inside the robot's orientation.
+                    if (
+                        max_orient_diff is not None
+                        and object_mapping[node]
+                        == Scene3DItemEnum.MANIPULATED_OBJS.value
+                    ):
+                        cx = parent_pos[0] + node_box[0] + obj_dx / 2
+                        cy = parent_pos[1] + node_box[2] + obj_dy / 2
+                        cx2, cy2 = position[robot_node][:2]
+                        v1 = np.array([-cx2, -cy2])
+                        v2 = np.array([cx - cx2, cy - cy2])
+                        dot = np.dot(v1, v2)
+                        norms = np.linalg.norm(v1) * np.linalg.norm(v2)
+                        theta = np.arccos(np.clip(dot / norms, -1.0, 1.0))
+                        theta = np.rad2deg(theta)
+                        if theta > max_orient_diff:
+                            continue
                     if not has_iou_conflict(
                         node_box, placed_boxes_map[parent_node]
                     ):

embodied_gen/validators/quality_checkers.py CHANGED Viewed

@@ -513,21 +513,23 @@ class SemanticMatcher(BaseChecker):
             - If there are fewer than <return_num> distinct relevant matches, repeat the closest ones to make a list of <return_num>.
             - Only output the list of <return_num> scene IDs, sorted from most to less similar.
             - Do NOT use markdown, JSON code blocks, or any formatting syntax, only return a plain list like ["id1", ...].
             Input example:
             Dictionary:
             "{{
-            "t_scene_008": "A study room with full bookshelves and a lamp in the corner.",
             "t_scene_019": "A child's bedroom with pink walls and a small desk.",
             "t_scene_020": "A living room with a wooden floor.",
             "t_scene_021": "A living room with toys scattered on the floor.",
             ...
-            "t_scene_office_001": "A very spacious, modern open-plan office with wide desks and no people, panoramic view."
             }}"
             Text:
             "A traditional indoor room"
             Output:
-            '["t_scene_office_001", ...]'
             Input:
             Dictionary:
@@ -552,9 +554,8 @@ class SemanticMatcher(BaseChecker):
 def test_semantic_matcher(
-    bg_file: str = "outputs/bg_scenes/bg_scene_list.txt",
 ):
-    bg_file = "outputs/bg_scenes/bg_scene_list.txt"
     scene_dict = {}
     with open(bg_file, "r") as f:
         for line in f:
@@ -575,7 +576,7 @@ def test_semantic_matcher(
     #     "presence_penalty": 0.3,
     # }
     gpt_params = None
-    match_key = SCENE_MATCHER.query(text, str(scene_dict))
     print(match_key, ",", scene_dict[match_key])

             - If there are fewer than <return_num> distinct relevant matches, repeat the closest ones to make a list of <return_num>.
             - Only output the list of <return_num> scene IDs, sorted from most to less similar.
             - Do NOT use markdown, JSON code blocks, or any formatting syntax, only return a plain list like ["id1", ...].
+            - The returned scene ID must exist in the dictionary and be in exactly the same format. For example,
+                if the key in the dictionary is "scene_0040", return "scene_0040"; if it is "scene_040", return "scene_040".
             Input example:
             Dictionary:
             "{{
+            "t_scene_0008": "A study room with full bookshelves and a lamp in the corner.",
             "t_scene_019": "A child's bedroom with pink walls and a small desk.",
             "t_scene_020": "A living room with a wooden floor.",
             "t_scene_021": "A living room with toys scattered on the floor.",
             ...
+            "t_scene_office_0001": "A very spacious, modern open-plan office with wide desks and no people, panoramic view."
             }}"
             Text:
             "A traditional indoor room"
             Output:
+            '["t_scene_office_0001", ...]'
             Input:
             Dictionary:
 def test_semantic_matcher(
+    bg_file: str = "outputs/bg_scenes/scene_list.txt",
 ):
     scene_dict = {}
     with open(bg_file, "r") as f:
         for line in f:
     #     "presence_penalty": 0.3,
     # }
     gpt_params = None
+    match_key = SCENE_MATCHER.query(text, str(scene_dict), params=gpt_params)
     print(match_key, ",", scene_dict[match_key])

embodied_gen/validators/urdf_convertor.py CHANGED Viewed

@@ -282,16 +282,12 @@ class URDFGenerator(object):
                     d_params = dict(
                         threshold=0.05, max_convex_hull=100, verbose=False
                     )
-                    filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
                     output_path = os.path.join(mesh_folder, filename)
                     decompose_convex_mesh(
                         mesh_output_path, output_path, **d_params
                     )
-                    obj_filename = filename.replace(".ply", ".obj")
-                    trimesh.load(output_path).export(
-                        f"{mesh_folder}/{obj_filename}"
-                    )
-                    collision_mesh = f"{self.output_mesh_dir}/{obj_filename}"
                 except Exception as e:
                     logger.warning(
                         f"Convex decomposition failed for {output_path}, {e}."

                     d_params = dict(
                         threshold=0.05, max_convex_hull=100, verbose=False
                     )
+                    filename = f"{os.path.splitext(obj_name)[0]}_collision.obj"
                     output_path = os.path.join(mesh_folder, filename)
                     decompose_convex_mesh(
                         mesh_output_path, output_path, **d_params
                     )
+                    collision_mesh = f"{self.output_mesh_dir}/{filename}"
                 except Exception as e:
                     logger.warning(
                         f"Convex decomposition failed for {output_path}, {e}."