Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) 2017-present, Facebook, Inc. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| ############################################################################## | |
| """Keypoint utilities (somewhat specific to COCO keypoints).""" | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| from __future__ import unicode_literals | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| import torch.nn.functional as F | |
| import torch.cuda.comm | |
| # from core.config import cfg | |
| # import utils.blob as blob_utils | |
| def get_keypoints(): | |
| """Get the COCO keypoints and their left/right flip coorespondence map.""" | |
| # Keypoints are not available in the COCO json for the test split, so we | |
| # provide them here. | |
| keypoints = [ | |
| 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', | |
| 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', | |
| 'left_knee', 'right_knee', 'left_ankle', 'right_ankle' | |
| ] | |
| keypoint_flip_map = { | |
| 'left_eye': 'right_eye', | |
| 'left_ear': 'right_ear', | |
| 'left_shoulder': 'right_shoulder', | |
| 'left_elbow': 'right_elbow', | |
| 'left_wrist': 'right_wrist', | |
| 'left_hip': 'right_hip', | |
| 'left_knee': 'right_knee', | |
| 'left_ankle': 'right_ankle' | |
| } | |
| return keypoints, keypoint_flip_map | |
| def get_person_class_index(): | |
| """Index of the person class in COCO.""" | |
| return 1 | |
| def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width): | |
| """Left/right flip keypoint_coords. keypoints and keypoint_flip_map are | |
| accessible from get_keypoints(). | |
| """ | |
| flipped_kps = keypoint_coords.copy() | |
| for lkp, rkp in keypoint_flip_map.items(): | |
| lid = keypoints.index(lkp) | |
| rid = keypoints.index(rkp) | |
| flipped_kps[:, :, lid] = keypoint_coords[:, :, rid] | |
| flipped_kps[:, :, rid] = keypoint_coords[:, :, lid] | |
| # Flip x coordinates | |
| flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1 | |
| # Maintain COCO convention that if visibility == 0, then x, y = 0 | |
| inds = np.where(flipped_kps[:, 2, :] == 0) | |
| flipped_kps[inds[0], 0, inds[1]] = 0 | |
| return flipped_kps | |
| def flip_heatmaps(heatmaps): | |
| """Flip heatmaps horizontally.""" | |
| keypoints, flip_map = get_keypoints() | |
| heatmaps_flipped = heatmaps.copy() | |
| for lkp, rkp in flip_map.items(): | |
| lid = keypoints.index(lkp) | |
| rid = keypoints.index(rkp) | |
| heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :] | |
| heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :] | |
| heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1] | |
| return heatmaps_flipped | |
| def heatmaps_to_keypoints(maps, rois): | |
| """Extract predicted keypoint locations from heatmaps. Output has shape | |
| (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) | |
| for each keypoint. | |
| """ | |
| # This function converts a discrete image coordinate in a HEATMAP_SIZE x | |
| # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain | |
| # consistency with keypoints_to_heatmap_labels by using the conversion from | |
| # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a | |
| # continuous coordinate. | |
| offset_x = rois[:, 0] | |
| offset_y = rois[:, 1] | |
| widths = rois[:, 2] - rois[:, 0] | |
| heights = rois[:, 3] - rois[:, 1] | |
| widths = np.maximum(widths, 1) | |
| heights = np.maximum(heights, 1) | |
| widths_ceil = np.ceil(widths) | |
| heights_ceil = np.ceil(heights) | |
| # NCHW to NHWC for use with OpenCV | |
| maps = np.transpose(maps, [0, 2, 3, 1]) | |
| min_size = cfg.KRCNN.INFERENCE_MIN_SIZE | |
| xy_preds = np.zeros((len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32) | |
| for i in range(len(rois)): | |
| if min_size > 0: | |
| roi_map_width = int(np.maximum(widths_ceil[i], min_size)) | |
| roi_map_height = int(np.maximum(heights_ceil[i], min_size)) | |
| else: | |
| roi_map_width = widths_ceil[i] | |
| roi_map_height = heights_ceil[i] | |
| width_correction = widths[i] / roi_map_width | |
| height_correction = heights[i] / roi_map_height | |
| roi_map = cv2.resize( | |
| maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC | |
| ) | |
| # Bring back to CHW | |
| roi_map = np.transpose(roi_map, [2, 0, 1]) | |
| roi_map_probs = scores_to_probs(roi_map.copy()) | |
| w = roi_map.shape[2] | |
| for k in range(cfg.KRCNN.NUM_KEYPOINTS): | |
| pos = roi_map[k, :, :].argmax() | |
| x_int = pos % w | |
| y_int = (pos - x_int) // w | |
| assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max()) | |
| x = (x_int + 0.5) * width_correction | |
| y = (y_int + 0.5) * height_correction | |
| xy_preds[i, 0, k] = x + offset_x[i] | |
| xy_preds[i, 1, k] = y + offset_y[i] | |
| xy_preds[i, 2, k] = roi_map[k, y_int, x_int] | |
| xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int] | |
| return xy_preds | |
| def keypoints_to_heatmap_labels(keypoints, rois): | |
| """Encode keypoint location in the target heatmap for use in | |
| SoftmaxWithLoss. | |
| """ | |
| # Maps keypoints from the half-open interval [x1, x2) on continuous image | |
| # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image | |
| # coordinates. We use the continuous <-> discrete conversion from Heckbert | |
| # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, | |
| # where d is a discrete coordinate and c is a continuous coordinate. | |
| assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS | |
| shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS) | |
| heatmaps = blob_utils.zeros(shape) | |
| weights = blob_utils.zeros(shape) | |
| offset_x = rois[:, 0] | |
| offset_y = rois[:, 1] | |
| scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0]) | |
| scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1]) | |
| for kp in range(keypoints.shape[2]): | |
| vis = keypoints[:, 2, kp] > 0 | |
| x = keypoints[:, 0, kp].astype(np.float32) | |
| y = keypoints[:, 1, kp].astype(np.float32) | |
| # Since we use floor below, if a keypoint is exactly on the roi's right | |
| # or bottom boundary, we shift it in by eps (conceptually) to keep it in | |
| # the ground truth heatmap. | |
| x_boundary_inds = np.where(x == rois[:, 2])[0] | |
| y_boundary_inds = np.where(y == rois[:, 3])[0] | |
| x = (x - offset_x) * scale_x | |
| x = np.floor(x) | |
| if len(x_boundary_inds) > 0: | |
| x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 | |
| y = (y - offset_y) * scale_y | |
| y = np.floor(y) | |
| if len(y_boundary_inds) > 0: | |
| y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 | |
| valid_loc = np.logical_and( | |
| np.logical_and(x >= 0, y >= 0), | |
| np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE) | |
| ) | |
| valid = np.logical_and(valid_loc, vis) | |
| valid = valid.astype(np.int32) | |
| lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x | |
| heatmaps[:, kp] = lin_ind * valid | |
| weights[:, kp] = valid | |
| return heatmaps, weights | |
| def scores_to_probs(scores): | |
| """Transforms CxHxW of scores to probabilities spatially.""" | |
| channels = scores.shape[0] | |
| for c in range(channels): | |
| temp = scores[c, :, :] | |
| max_score = temp.max() | |
| temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score)) | |
| scores[c, :, :] = temp | |
| return scores | |
| def nms_oks(kp_predictions, rois, thresh): | |
| """Nms based on kp predictions.""" | |
| scores = np.mean(kp_predictions[:, 2, :], axis=1) | |
| order = scores.argsort()[::-1] | |
| keep = [] | |
| while order.size > 0: | |
| i = order[0] | |
| keep.append(i) | |
| ovr = compute_oks(kp_predictions[i], rois[i], kp_predictions[order[1:]], rois[order[1:]]) | |
| inds = np.where(ovr <= thresh)[0] | |
| order = order[inds + 1] | |
| return keep | |
| def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi): | |
| """Compute OKS for predicted keypoints wrt gt_keypoints. | |
| src_keypoints: 4xK | |
| src_roi: 4x1 | |
| dst_keypoints: Nx4xK | |
| dst_roi: Nx4 | |
| """ | |
| sigmas = np.array( | |
| [.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89] | |
| ) / 10.0 | |
| vars = (sigmas * 2)**2 | |
| # area | |
| src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1) | |
| # measure the per-keypoint distance if keypoints visible | |
| dx = dst_keypoints[:, 0, :] - src_keypoints[0, :] | |
| dy = dst_keypoints[:, 1, :] - src_keypoints[1, :] | |
| e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2 | |
| e = np.sum(np.exp(-e), axis=1) / e.shape[1] | |
| return e | |
| def get_max_preds(batch_heatmaps): | |
| ''' | |
| get predictions from score maps | |
| heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) | |
| ''' | |
| assert isinstance(batch_heatmaps, np.ndarray), \ | |
| 'batch_heatmaps should be numpy.ndarray' | |
| assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' | |
| batch_size = batch_heatmaps.shape[0] | |
| num_joints = batch_heatmaps.shape[1] | |
| width = batch_heatmaps.shape[3] | |
| heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) | |
| idx = np.argmax(heatmaps_reshaped, 2) | |
| maxvals = np.amax(heatmaps_reshaped, 2) | |
| maxvals = maxvals.reshape((batch_size, num_joints, 1)) | |
| idx = idx.reshape((batch_size, num_joints, 1)) | |
| preds = np.tile(idx, (1, 1, 2)).astype(np.float32) | |
| preds[:, :, 0] = (preds[:, :, 0]) % width | |
| preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) | |
| pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) | |
| pred_mask = pred_mask.astype(np.float32) | |
| preds *= pred_mask | |
| return preds, maxvals | |
| def generate_3d_integral_preds_tensor(heatmaps, num_joints, x_dim, y_dim, z_dim): | |
| assert isinstance(heatmaps, torch.Tensor) | |
| if z_dim is not None: | |
| heatmaps = heatmaps.reshape((heatmaps.shape[0], num_joints, z_dim, y_dim, x_dim)) | |
| accu_x = heatmaps.sum(dim=2) | |
| accu_x = accu_x.sum(dim=2) | |
| accu_y = heatmaps.sum(dim=2) | |
| accu_y = accu_y.sum(dim=3) | |
| accu_z = heatmaps.sum(dim=3) | |
| accu_z = accu_z.sum(dim=3) | |
| accu_x = accu_x * torch.cuda.comm.broadcast( | |
| torch.arange(x_dim, dtype=torch.float32), devices=[accu_x.device.index] | |
| )[0] | |
| accu_y = accu_y * torch.cuda.comm.broadcast( | |
| torch.arange(y_dim, dtype=torch.float32), devices=[accu_y.device.index] | |
| )[0] | |
| accu_z = accu_z * torch.cuda.comm.broadcast( | |
| torch.arange(z_dim, dtype=torch.float32), devices=[accu_z.device.index] | |
| )[0] | |
| accu_x = accu_x.sum(dim=2, keepdim=True) | |
| accu_y = accu_y.sum(dim=2, keepdim=True) | |
| accu_z = accu_z.sum(dim=2, keepdim=True) | |
| else: | |
| heatmaps = heatmaps.reshape((heatmaps.shape[0], num_joints, y_dim, x_dim)) | |
| accu_x = heatmaps.sum(dim=2) | |
| accu_y = heatmaps.sum(dim=3) | |
| accu_x = accu_x * torch.cuda.comm.broadcast( | |
| torch.arange(x_dim, dtype=torch.float32), devices=[accu_x.device.index] | |
| )[0] | |
| accu_y = accu_y * torch.cuda.comm.broadcast( | |
| torch.arange(y_dim, dtype=torch.float32), devices=[accu_y.device.index] | |
| )[0] | |
| accu_x = accu_x.sum(dim=2, keepdim=True) | |
| accu_y = accu_y.sum(dim=2, keepdim=True) | |
| accu_z = None | |
| return accu_x, accu_y, accu_z | |
| # integral pose estimation | |
| # https://github.com/JimmySuen/integral-human-pose/blob/99647e40ec93dfa4e3b6a1382c935cebb35440da/pytorch_projects/common_pytorch/common_loss/integral.py#L28 | |
| def softmax_integral_tensor(preds, num_joints, hm_width, hm_height, hm_depth=None): | |
| # global soft max | |
| preds = preds.reshape((preds.shape[0], num_joints, -1)) | |
| preds = F.softmax(preds, 2) | |
| output_3d = False if hm_depth is None else True | |
| # integrate heatmap into joint location | |
| if output_3d: | |
| x, y, z = generate_3d_integral_preds_tensor( | |
| preds, num_joints, hm_width, hm_height, hm_depth | |
| ) | |
| # x = x / float(hm_width) - 0.5 | |
| # y = y / float(hm_height) - 0.5 | |
| # z = z / float(hm_depth) - 0.5 | |
| preds = torch.cat((x, y, z), dim=2) | |
| # preds = preds.reshape((preds.shape[0], num_joints * 3)) | |
| else: | |
| x, y, _ = generate_3d_integral_preds_tensor( | |
| preds, num_joints, hm_width, hm_height, z_dim=None | |
| ) | |
| # x = x / float(hm_width) - 0.5 | |
| # y = y / float(hm_height) - 0.5 | |
| preds = torch.cat((x, y), dim=2) | |
| # preds = preds.reshape((preds.shape[0], num_joints * 2)) | |
| return preds | |