Shortcuts

Source code for mmdet.core.bbox.transforms

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch


[docs]def bbox_flip(bboxes, img_shape, direction='horizontal'): """Flip bboxes horizontally or vertically. Args: bboxes (Tensor): Shape (..., 4*k) img_shape (tuple): Image shape. direction (str): Flip direction, options are "horizontal", "vertical", "diagonal". Default: "horizontal" Returns: Tensor: Flipped bboxes. """ assert bboxes.shape[-1] % 4 == 0 assert direction in ['horizontal', 'vertical', 'diagonal'] flipped = bboxes.clone() if direction == 'horizontal': flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4] flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4] elif direction == 'vertical': flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4] flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4] else: flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4] flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4] flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4] flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4] return flipped
[docs]def bbox_mapping(bboxes, img_shape, scale_factor, flip, flip_direction='horizontal'): """Map bboxes from the original image scale to testing scale.""" new_bboxes = bboxes * bboxes.new_tensor(scale_factor) if flip: new_bboxes = bbox_flip(new_bboxes, img_shape, flip_direction) return new_bboxes
[docs]def bbox_mapping_back(bboxes, img_shape, scale_factor, flip, flip_direction='horizontal'): """Map bboxes from testing scale to original image scale.""" new_bboxes = bbox_flip(bboxes, img_shape, flip_direction) if flip else bboxes new_bboxes = new_bboxes.view(-1, 4) / new_bboxes.new_tensor(scale_factor) return new_bboxes.view(bboxes.shape)
[docs]def bbox2roi(bbox_list): """Convert a list of bboxes to roi format. Args: bbox_list (list[Tensor]): a list of bboxes corresponding to a batch of images. Returns: Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] """ rois_list = [] for img_id, bboxes in enumerate(bbox_list): if bboxes.size(0) > 0: img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) else: rois = bboxes.new_zeros((0, 5)) rois_list.append(rois) rois = torch.cat(rois_list, 0) return rois
[docs]def roi2bbox(rois): """Convert rois to bounding box format. Args: rois (torch.Tensor): RoIs with the shape (n, 5) where the first column indicates batch id of each RoI. Returns: list[torch.Tensor]: Converted boxes of corresponding rois. """ bbox_list = [] img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) for img_id in img_ids: inds = (rois[:, 0] == img_id.item()) bbox = rois[inds, 1:] bbox_list.append(bbox) return bbox_list
[docs]def bbox2result(bboxes, labels, num_classes): """Convert detection results to a list of numpy arrays. Args: bboxes (torch.Tensor | np.ndarray): shape (n, 5) labels (torch.Tensor | np.ndarray): shape (n, ) num_classes (int): class number, including background class Returns: list(ndarray): bbox results of each class """ if bboxes.shape[0] == 0: return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)] else: if isinstance(bboxes, torch.Tensor): bboxes = bboxes.detach().cpu().numpy() labels = labels.detach().cpu().numpy() return [bboxes[labels == i, :] for i in range(num_classes)]
[docs]def distance2bbox(points, distance, max_shape=None): """Decode distance prediction to bounding box. Args: points (Tensor): Shape (B, N, 2) or (N, 2). distance (Tensor): Distance from the given point to 4 boundaries (left, top, right, bottom). Shape (B, N, 4) or (N, 4) max_shape (Sequence[int] or torch.Tensor or Sequence[ Sequence[int]],optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). If priors shape is (B, N, 4), then the max_shape should be a Sequence[Sequence[int]] and the length of max_shape should also be B. Returns: Tensor: Boxes with shape (N, 4) or (B, N, 4) """ x1 = points[..., 0] - distance[..., 0] y1 = points[..., 1] - distance[..., 1] x2 = points[..., 0] + distance[..., 2] y2 = points[..., 1] + distance[..., 3] bboxes = torch.stack([x1, y1, x2, y2], -1) if max_shape is not None: # clip bboxes with dynamic `min` and `max` for onnx if torch.onnx.is_in_onnx_export(): from mmdet.core.export import dynamic_clip_for_onnx x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape) bboxes = torch.stack([x1, y1, x2, y2], dim=-1) return bboxes if not isinstance(max_shape, torch.Tensor): max_shape = x1.new_tensor(max_shape) max_shape = max_shape[..., :2].type_as(x1) if max_shape.ndim == 2: assert bboxes.ndim == 3 assert max_shape.size(0) == bboxes.size(0) min_xy = x1.new_tensor(0) max_xy = torch.cat([max_shape, max_shape], dim=-1).flip(-1).unsqueeze(-2) bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) return bboxes
[docs]def bbox2distance(points, bbox, max_dis=None, eps=0.1): """Decode bounding box based on distances. Args: points (Tensor): Shape (n, 2), [x, y]. bbox (Tensor): Shape (n, 4), "xyxy" format max_dis (float): Upper bound of the distance. eps (float): a small value to ensure target < max_dis, instead <= Returns: Tensor: Decoded distances. """ left = points[:, 0] - bbox[:, 0] top = points[:, 1] - bbox[:, 1] right = bbox[:, 2] - points[:, 0] bottom = bbox[:, 3] - points[:, 1] if max_dis is not None: left = left.clamp(min=0, max=max_dis - eps) top = top.clamp(min=0, max=max_dis - eps) right = right.clamp(min=0, max=max_dis - eps) bottom = bottom.clamp(min=0, max=max_dis - eps) return torch.stack([left, top, right, bottom], -1)
[docs]def bbox_rescale(bboxes, scale_factor=1.0): """Rescale bounding box w.r.t. scale_factor. Args: bboxes (Tensor): Shape (n, 4) for bboxes or (n, 5) for rois scale_factor (float): rescale factor Returns: Tensor: Rescaled bboxes. """ if bboxes.size(1) == 5: bboxes_ = bboxes[:, 1:] inds_ = bboxes[:, 0] else: bboxes_ = bboxes cx = (bboxes_[:, 0] + bboxes_[:, 2]) * 0.5 cy = (bboxes_[:, 1] + bboxes_[:, 3]) * 0.5 w = bboxes_[:, 2] - bboxes_[:, 0] h = bboxes_[:, 3] - bboxes_[:, 1] w = w * scale_factor h = h * scale_factor x1 = cx - 0.5 * w x2 = cx + 0.5 * w y1 = cy - 0.5 * h y2 = cy + 0.5 * h if bboxes.size(1) == 5: rescaled_bboxes = torch.stack([inds_, x1, y1, x2, y2], dim=-1) else: rescaled_bboxes = torch.stack([x1, y1, x2, y2], dim=-1) return rescaled_bboxes
[docs]def bbox_cxcywh_to_xyxy(bbox): """Convert bbox coordinates from (cx, cy, w, h) to (x1, y1, x2, y2). Args: bbox (Tensor): Shape (n, 4) for bboxes. Returns: Tensor: Converted bboxes. """ cx, cy, w, h = bbox.split((1, 1, 1, 1), dim=-1) bbox_new = [(cx - 0.5 * w), (cy - 0.5 * h), (cx + 0.5 * w), (cy + 0.5 * h)] return torch.cat(bbox_new, dim=-1)
[docs]def bbox_xyxy_to_cxcywh(bbox): """Convert bbox coordinates from (x1, y1, x2, y2) to (cx, cy, w, h). Args: bbox (Tensor): Shape (n, 4) for bboxes. Returns: Tensor: Converted bboxes. """ x1, y1, x2, y2 = bbox.split((1, 1, 1, 1), dim=-1) bbox_new = [(x1 + x2) / 2, (y1 + y2) / 2, (x2 - x1), (y2 - y1)] return torch.cat(bbox_new, dim=-1)
Read the Docs v: v2.17.0
Versions
latest
stable
v2.17.0
v2.16.0
v2.15.1
v2.15.0
v2.14.0
v2.13.0
v2.12.0
v2.11.0
v2.10.0
v2.9.0
v2.8.0
v2.7.0
v2.6.0
v2.5.0
v2.4.0
v2.3.0
v2.2.1
v2.2.0
v2.1.0
v2.0.0
v1.2.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.