Spaces:
Running
Running
| import os | |
| from copy import deepcopy | |
| import imageio | |
| import open3d as o3d | |
| import numpy as np | |
| from PIL import Image, ImageChops | |
| POINT_COLOR = [1, 0, 0] # red for demonstration | |
| ARROW_COLOR = [0, 1, 0] # green | |
| IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg") | |
| def generate_rotation_visualization( | |
| pcd: o3d.geometry.PointCloud, | |
| axis_arrow: o3d.geometry.TriangleMesh, | |
| mask: np.ndarray, | |
| axis_vector: np.ndarray, | |
| origin: np.ndarray, | |
| range_min: float, | |
| range_max: float, | |
| num_samples: int, | |
| output_dir: str, | |
| ) -> None: | |
| """ | |
| Generate visualization files for a rotation motion of a part. | |
| :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud | |
| :param axis_arrow: mesh object representing axis arrow of rotation to be rendered in visualization | |
| :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image | |
| :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation | |
| :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation | |
| :param range_min: float representing the minimum range of motion in radians | |
| :param range_max: float representing the maximum range of motion in radians | |
| :param num_samples: number of sample states to visualize in between range_min and range_max of motion | |
| :param output_dir: string path to directory in which to save visualization output | |
| """ | |
| angle_in_radians = np.linspace(range_min, range_max, num_samples) | |
| angles_in_degrees = angle_in_radians * 180 / np.pi | |
| for idx, angle_in_degrees in enumerate(angles_in_degrees): | |
| # Make a copy of your original point cloud and arrow for each rotation | |
| rotated_pcd = deepcopy(pcd) | |
| rotated_arrow = deepcopy(axis_arrow) | |
| angle_rad = np.radians(angle_in_degrees) | |
| rotated_pcd = rotate_part(rotated_pcd, mask, axis_vector, origin, angle_rad) | |
| # Create a Visualizer object for each rotation | |
| vis = o3d.visualization.Visualizer() | |
| vis.create_window(visible=False) | |
| # Add the rotated geometries | |
| vis.add_geometry(rotated_pcd) | |
| vis.add_geometry(rotated_arrow) | |
| # Apply the additional rotation around x-axis if desired | |
| angle_x = np.pi * 5.5 / 5 # 198 degrees | |
| rotation_matrix = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x) | |
| rotated_pcd.rotate(rotation_matrix, center=rotated_pcd.get_center()) | |
| rotated_arrow.rotate(rotation_matrix, center=rotated_pcd.get_center()) | |
| # Capture and save the image | |
| output_filename = f"{output_dir}/{idx}.png" | |
| vis.capture_screen_image(output_filename, do_render=True) | |
| vis.destroy_window() | |
| def generate_translation_visualization( | |
| pcd: o3d.geometry.PointCloud, | |
| axis_arrow: o3d.geometry.TriangleMesh, | |
| mask: np.ndarray, | |
| end: np.ndarray, | |
| range_min: float, | |
| range_max: float, | |
| num_samples: int, | |
| output_dir: str, | |
| ) -> None: | |
| """ | |
| Generate visualization files for a translation motion of a part. | |
| :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud | |
| :param axis_arrow: mesh object representing axis arrow of translation to be rendered in visualization | |
| :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image | |
| :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation | |
| :param origin: np.array of dimensions (3, ) representing the origin point of the axis of translation | |
| :param range_min: float representing the minimum range of motion | |
| :param range_max: float representing the maximum range of motion | |
| :param num_samples: number of sample states to visualize in between range_min and range_max of motion | |
| :param output_dir: string path to directory in which to save visualization output | |
| """ | |
| translate_distances = np.linspace(range_min, range_max, num_samples) | |
| for idx, translate_distance in enumerate(translate_distances): | |
| translated_pcd = deepcopy(pcd) | |
| translated_arrow = deepcopy(axis_arrow) | |
| translated_pcd = translate_part(translated_pcd, mask, end, translate_distance.item()) | |
| # Create a Visualizer object for each rotation | |
| vis = o3d.visualization.Visualizer() | |
| vis.create_window(visible=False) | |
| # Add the translated geometries | |
| vis.add_geometry(translated_pcd) | |
| vis.add_geometry(translated_arrow) | |
| # Apply the additional rotation around x-axis if desired | |
| # TODO: not sure why we need this rotation for the translation, and when it would be desired | |
| angle_x = np.pi * 5.5 / 5 # 198 degrees | |
| R = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x) | |
| translated_pcd.rotate(R, center=translated_pcd.get_center()) | |
| translated_arrow.rotate(R, center=translated_pcd.get_center()) | |
| # Capture and save the image | |
| output_filename = f"{output_dir}/{idx}.png" | |
| vis.capture_screen_image(output_filename, do_render=True) | |
| vis.destroy_window() | |
| def get_rotation_matrix_from_vectors(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray: | |
| """ | |
| Find the rotation matrix that aligns vec1 to vec2 | |
| :param vec1: A 3d "source" vector | |
| :param vec2: A 3d "destination" vector | |
| :return: A transform matrix (3x3) which when applied to vec1, aligns it with vec2. | |
| """ | |
| a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3) | |
| v = np.cross(a, b) | |
| c = np.dot(a, b) | |
| s = np.linalg.norm(v) | |
| kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) | |
| rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s**2)) | |
| return rotation_matrix | |
| def draw_line(start_point: np.ndarray, end_point: np.ndarray) -> o3d.geometry.TriangleMesh: | |
| """ | |
| Generate 3D mesh representing axis from start_point to end_point. | |
| :param start_point: np.ndarray of dimensions (3, ) representing the start point of the axis | |
| :param end_point: np.ndarray of dimensions (3, ) representing the end point of the axis | |
| :return: mesh object representing axis from start to end | |
| """ | |
| # Compute direction vector and normalize it | |
| direction_vector = end_point - start_point | |
| normalized_vector = direction_vector / np.linalg.norm(direction_vector) | |
| # Compute the rotation matrix to align the Z-axis with the desired direction | |
| target_vector = np.array([0, 0, 1]) | |
| rot_mat = get_rotation_matrix_from_vectors(target_vector, normalized_vector) | |
| # Create the cylinder (shaft of the arrow) | |
| cylinder_length = 0.9 # 90% of the total arrow length, you can adjust as needed | |
| cylinder_radius = 0.01 # Adjust the thickness of the arrow shaft | |
| cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=cylinder_radius, height=cylinder_length) | |
| # Move base of cylinder to origin, rotate, then translate to start_point | |
| cylinder.translate([0, 0, 0]) | |
| cylinder.rotate(rot_mat, center=[0, 0, 0]) | |
| cylinder.translate(start_point) | |
| # Create the cone (head of the arrow) | |
| cone_height = 0.1 # 10% of the total arrow length, adjust as needed | |
| cone_radius = 0.03 # Adjust the size of the arrowhead | |
| cone = o3d.geometry.TriangleMesh.create_cone(radius=cone_radius, height=cone_height) | |
| # Move base of cone to origin, rotate, then translate to end of cylinder | |
| cone.translate([-0, 0, 0]) | |
| cone.rotate(rot_mat, center=[0, 0, 0]) | |
| cone.translate(start_point + normalized_vector * 0.4) | |
| arrow = cylinder + cone | |
| return arrow | |
| def rotate_part( | |
| pcd: o3d.geometry.PointCloud, mask: np.ndarray, axis_vector: np.ndarray, origin: np.ndarray, angle_rad: float | |
| ) -> o3d.geometry.PointCloud: | |
| """ | |
| Generate rotated point cloud of mask based on provided angle around axis. | |
| :param pcd: point cloud object representing points of image | |
| :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image | |
| :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation | |
| :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation | |
| :param angle_rad: angle in radians to rotate mask part | |
| :return: point cloud object after rotation of masked part | |
| """ | |
| # Get the coordinates of the point cloud as a numpy array | |
| points_np = np.asarray(pcd.points) | |
| # Convert point cloud colors to numpy array for easier manipulation | |
| colors_np = np.asarray(pcd.colors) | |
| # Create skew-symmetric matrix from end | |
| K = np.array( | |
| [ | |
| [0, -axis_vector[2], axis_vector[1]], | |
| [axis_vector[2], 0, -axis_vector[0]], | |
| [-axis_vector[1], axis_vector[0], 0], | |
| ] | |
| ) | |
| # Compute rotation matrix using Rodrigues' formula | |
| R = np.eye(3) + np.sin(angle_rad) * K + (1 - np.cos(angle_rad)) * np.dot(K, K) | |
| # Iterate over the mask and rotate the points corresponding to the object pixels | |
| for i in range(mask.shape[0]): | |
| for j in range(mask.shape[1]): | |
| if mask[i, j] > 0: # This condition checks if the pixel belongs to the object | |
| point_index = i * mask.shape[1] + j | |
| # Translate the point such that the rotation origin is at the world origin | |
| translated_point = points_np[point_index] - origin | |
| # Rotate the translated point | |
| rotated_point = np.dot(R, translated_point) | |
| # Translate the point back | |
| points_np[point_index] = rotated_point + origin | |
| colors_np[point_index] = POINT_COLOR | |
| # Update the point cloud's coordinates | |
| pcd.points = o3d.utility.Vector3dVector(points_np) | |
| # Update point cloud colors | |
| pcd.colors = o3d.utility.Vector3dVector(colors_np) | |
| return pcd | |
| def translate_part(pcd, mask, axis_vector, distance): | |
| """ | |
| Generate translated point cloud of mask based on provided angle around axis. | |
| :param pcd: point cloud object representing points of image | |
| :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image | |
| :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation | |
| :param distance: distance within coordinate system to translate mask part | |
| :return: point cloud object after translation of masked part | |
| """ | |
| normalized_vector = axis_vector / np.linalg.norm(axis_vector) | |
| translation_vector = normalized_vector * distance | |
| # Convert point cloud colors to numpy array for easier manipulation | |
| colors_np = np.asarray(pcd.colors) | |
| # Get the coordinates of the point cloud as a numpy array | |
| points_np = np.asarray(pcd.points) | |
| # Iterate over the mask and assign the color to the points corresponding to the object pixels | |
| for i in range(mask.shape[0]): | |
| for j in range(mask.shape[1]): | |
| if mask[i, j] > 0: # This condition checks if the pixel belongs to the object | |
| point_index = i * mask.shape[1] + j | |
| colors_np[point_index] = POINT_COLOR | |
| points_np[point_index] += translation_vector | |
| # Update point cloud colors | |
| pcd.colors = o3d.utility.Vector3dVector(colors_np) | |
| # Update the point cloud's coordinates | |
| pcd.points = o3d.utility.Vector3dVector(points_np) | |
| return pcd | |
| def batch_trim(images_path: str, save_path: str, identical: bool = False) -> None: | |
| """ | |
| Trim white spaces from all images in the given path and save new images to folder. | |
| :param images_path: local path to folder containing all images. Images must have the extension ".png", ".jpg", or | |
| ".jpeg". | |
| :param save_path: local path to folder in which to save trimmed images | |
| :param identical: if True, will apply same crop to all images, else each image will have its whitespace trimmed | |
| independently. Note that in the latter case, each image may have a slightly different size. | |
| """ | |
| def get_trim(im): | |
| """Trim whitespace from an image and return the cropped image.""" | |
| bg = Image.new(im.mode, im.size, im.getpixel((0, 0))) | |
| diff = ImageChops.difference(im, bg) | |
| diff = ImageChops.add(diff, diff, 2.0, -100) | |
| bbox = diff.getbbox() | |
| return bbox | |
| if identical: # | |
| images = [] | |
| optimal_box = None | |
| # load all images | |
| for image_file in sorted(os.listdir(images_path)): | |
| if image_file.endswith(IMAGE_EXTENSIONS): | |
| image_path = os.path.join(images_path, image_file) | |
| images.append(Image.open(image_path)) | |
| # find optimal box size | |
| for im in images: | |
| bbox = get_trim(im) | |
| if bbox is None: | |
| bbox = (0, 0, im.size[0], im.size[1]) # bound entire image | |
| if optimal_box is None: | |
| optimal_box = bbox | |
| else: | |
| optimal_box = ( | |
| min(optimal_box[0], bbox[0]), | |
| min(optimal_box[1], bbox[1]), | |
| max(optimal_box[2], bbox[2]), | |
| max(optimal_box[3], bbox[3]), | |
| ) | |
| # apply cropping, if optimal box was found | |
| for idx, im in enumerate(images): | |
| im.crop(optimal_box) | |
| im.save(os.path.join(save_path, f"{idx}.png")) | |
| im.close() | |
| else: # trim each image separately | |
| for image_file in os.listdir(images_path): | |
| if image_file.endswith(IMAGE_EXTENSIONS): | |
| image_path = os.path.join(images_path, image_file) | |
| with Image.open(image_path) as im: | |
| bbox = get_trim(im) | |
| trimmed = im.crop(bbox) if bbox else im | |
| trimmed.save(os.path.join(save_path, image_file)) | |
| def create_gif(image_folder_path: str, num_samples: int, gif_filename: str = "output.gif") -> None: | |
| """ | |
| Create gif out of folder of images and save to file. | |
| :param image_folder_path: path to folder containing images (non-recursive). Assumes images are named as {i}.png for | |
| each of i from 0 to num_samples. | |
| :param num_samples: number of sampled images to compile into gif. | |
| :param gif_filename: filename for gif, defaults to "output.gif" | |
| """ | |
| # Generate a list of image filenames (assuming the images are saved as 0.png, 1.png, etc.) | |
| image_files = [f"{image_folder_path}/{i}.png" for i in range(num_samples)] | |
| # Read the images using imageio | |
| images = [imageio.imread(image_file) for image_file in image_files] | |
| assert all( | |
| images[0].shape == im.shape for im in images | |
| ), f"Found some images with a different shape: {[im.shape for im in images]}" | |
| # Save images as a gif | |
| gif_output_path = f"{image_folder_path}/{gif_filename}" | |
| imageio.mimsave(gif_output_path, images, duration=0.1) | |
| return | |