Source code for mmdet.core.bbox.transforms

import mmcv
import numpy as np
import torch


[docs]def bbox_flip(bboxes, img_shape): """Flip bboxes horizontally. Args: bboxes(Tensor or ndarray): Shape (..., 4*k) img_shape(tuple): Image shape. Returns: Same type as `bboxes`: Flipped bboxes. """ if isinstance(bboxes, torch.Tensor): assert bboxes.shape[-1] % 4 == 0 flipped = bboxes.clone() flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] return flipped elif isinstance(bboxes, np.ndarray): return mmcv.bbox_flip(bboxes, img_shape)
[docs]def bbox_mapping(bboxes, img_shape, scale_factor, flip): """Map bboxes from the original image scale to testing scale""" new_bboxes = bboxes * bboxes.new_tensor(scale_factor) if flip: new_bboxes = bbox_flip(new_bboxes, img_shape) return new_bboxes
[docs]def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): """Map bboxes from testing scale to original image scale""" new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes new_bboxes = new_bboxes / new_bboxes.new_tensor(scale_factor) return new_bboxes
[docs]def bbox2roi(bbox_list): """Convert a list of bboxes to roi format. Args: bbox_list (list[Tensor]): a list of bboxes corresponding to a batch of images. Returns: Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] """ rois_list = [] for img_id, bboxes in enumerate(bbox_list): if bboxes.size(0) > 0: img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) else: rois = bboxes.new_zeros((0, 5)) rois_list.append(rois) rois = torch.cat(rois_list, 0) return rois
def roi2bbox(rois): bbox_list = [] img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) for img_id in img_ids: inds = (rois[:, 0] == img_id.item()) bbox = rois[inds, 1:] bbox_list.append(bbox) return bbox_list
[docs]def bbox2result(bboxes, labels, num_classes): """Convert detection results to a list of numpy arrays. Args: bboxes (Tensor): shape (n, 5) labels (Tensor): shape (n, ) num_classes (int): class number, including background class Returns: list(ndarray): bbox results of each class """ if bboxes.shape[0] == 0: return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)] else: bboxes = bboxes.cpu().numpy() labels = labels.cpu().numpy() return [bboxes[labels == i, :] for i in range(num_classes)]
[docs]def distance2bbox(points, distance, max_shape=None): """Decode distance prediction to bounding box. Args: points (Tensor): Shape (n, 2), [x, y]. distance (Tensor): Distance from the given point to 4 boundaries (left, top, right, bottom). max_shape (tuple): Shape of the image. Returns: Tensor: Decoded bboxes. """ x1 = points[:, 0] - distance[:, 0] y1 = points[:, 1] - distance[:, 1] x2 = points[:, 0] + distance[:, 2] y2 = points[:, 1] + distance[:, 3] if max_shape is not None: x1 = x1.clamp(min=0, max=max_shape[1]) y1 = y1.clamp(min=0, max=max_shape[0]) x2 = x2.clamp(min=0, max=max_shape[1]) y2 = y2.clamp(min=0, max=max_shape[0]) return torch.stack([x1, y1, x2, y2], -1)