Shortcuts

Source code for mmdet.models.dense_heads.ascend_ssd_head

# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn.functional as F
from mmcv.runner import force_fp32

from ..builder import HEADS
from ..losses import smooth_l1_loss
from .ascend_anchor_head import AscendAnchorHead
from .ssd_head import SSDHead


[docs]@HEADS.register_module() class AscendSSDHead(SSDHead, AscendAnchorHead): """Ascend SSD head used in https://arxiv.org/abs/1512.02325. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. stacked_convs (int): Number of conv layers in cls and reg tower. Default: 0. feat_channels (int): Number of hidden channels when stacked_convs > 0. Default: 256. use_depthwise (bool): Whether to use DepthwiseSeparableConv. Default: False. conv_cfg (dict): Dictionary to construct and config conv layer. Default: None. norm_cfg (dict): Dictionary to construct and config norm layer. Default: None. act_cfg (dict): Dictionary to construct and config activation layer. Default: None. anchor_generator (dict): Config dict for anchor generator bbox_coder (dict): Config of bounding box coder. reg_decoded_bbox (bool): If true, the regression loss would be applied directly on decoded bounding boxes, converting both the predicted boxes and regression targets to absolute coordinates format. Default False. It should be `True` when using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head. train_cfg (dict): Training config of anchor head. test_cfg (dict): Testing config of anchor head. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), stacked_convs=0, feat_channels=256, use_depthwise=False, conv_cfg=None, norm_cfg=None, act_cfg=None, anchor_generator=dict( type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None, init_cfg=dict( type='Xavier', layer='Conv2d', distribution='uniform', bias=0)): super(AscendSSDHead, self).__init__( num_classes=num_classes, in_channels=in_channels, stacked_convs=stacked_convs, feat_channels=feat_channels, use_depthwise=use_depthwise, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, anchor_generator=anchor_generator, bbox_coder=bbox_coder, reg_decoded_bbox=reg_decoded_bbox, train_cfg=train_cfg, test_cfg=test_cfg, init_cfg=init_cfg) assert self.reg_decoded_bbox is False, \ 'reg_decoded_bbox only support False now.'
[docs] def get_static_anchors(self, featmap_sizes, img_metas, device='cuda'): """Get static anchors according to feature map sizes. Args: featmap_sizes (list[tuple]): Multi-level feature map sizes. img_metas (list[dict]): Image meta info. device (torch.device | str): Device for returned tensors Returns: tuple: anchor_list (list[Tensor]): Anchors of each image. valid_flag_list (list[Tensor]): Valid flags of each image. """ if not hasattr(self, 'static_anchors') or \ not hasattr(self, 'static_valid_flags'): static_anchors, static_valid_flags = self.get_anchors( featmap_sizes, img_metas, device) self.static_anchors = static_anchors self.static_valid_flags = static_valid_flags return self.static_anchors, self.static_valid_flags
[docs] def get_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True, return_sampling_results=False, return_level=True): """Compute regression and classification targets for anchors in multiple images. Args: anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, 4). valid_flag_list (list[list[Tensor]]): Multi level valid flags of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_labels_list (list[Tensor]): Ground truth labels of each box. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. return_sampling_results (bool): Whether to return the result of sample. return_level (bool): Whether to map outputs back to the levels of feature map sizes. Returns: tuple: Usually returns a tuple containing learning targets. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each level. - bbox_targets_list (list[Tensor]): BBox targets of each level. - bbox_weights_list (list[Tensor]): BBox weights of each level. - num_total_pos (int): Number of positive samples in all images. - num_total_neg (int): Number of negative samples in all images. additional_returns: This function enables user-defined returns from `self._get_targets_single`. These returns are currently refined to properties at each feature map (i.e. having HxW dimension). The results will be concatenated after the end """ return AscendAnchorHead.get_targets( self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list, gt_labels_list, label_channels, unmap_outputs, return_sampling_results, return_level, )
[docs] def batch_loss(self, batch_cls_score, batch_bbox_pred, batch_anchor, batch_labels, batch_label_weights, batch_bbox_targets, batch_bbox_weights, batch_pos_mask, batch_neg_mask, num_total_samples): """Compute loss of all images. Args: batch_cls_score (Tensor): Box scores for all image Has shape (num_imgs, num_total_anchors, num_classes). batch_bbox_pred (Tensor): Box energies / deltas for all image level with shape (num_imgs, num_total_anchors, 4). batch_anchor (Tensor): Box reference for all image with shape (num_imgs, num_total_anchors, 4). batch_labels (Tensor): Labels of all anchors with shape (num_imgs, num_total_anchors,). batch_label_weights (Tensor): Label weights of all anchor with shape (num_imgs, num_total_anchors,) batch_bbox_targets (Tensor): BBox regression targets of all anchor weight shape (num_imgs, num_total_anchors, 4). batch_bbox_weights (Tensor): BBox regression loss weights of all anchor with shape (num_imgs, num_total_anchors, 4). batch_pos_mask (Tensor): Positive samples mask in all images. batch_neg_mask (Tensor): negative samples mask in all images. num_total_samples (int): If sampling, num total samples equal to the number of total anchors; Otherwise, it is the number of positive anchors. Returns: dict[str, Tensor]: A dictionary of loss components. """ num_images, num_anchors, _ = batch_anchor.size() batch_loss_cls_all = F.cross_entropy( batch_cls_score.view((-1, self.cls_out_channels)), batch_labels.view(-1), reduction='none').view( batch_label_weights.size()) * batch_label_weights # # FG cat_id: [0, num_classes -1], BG cat_id: num_classes batch_num_pos_samples = torch.sum(batch_pos_mask, dim=1) batch_num_neg_samples = \ self.train_cfg.neg_pos_ratio * batch_num_pos_samples batch_num_neg_samples_max = torch.sum(batch_neg_mask, dim=1) batch_num_neg_samples = torch.min(batch_num_neg_samples, batch_num_neg_samples_max) batch_topk_loss_cls_neg, _ = torch.topk( batch_loss_cls_all * batch_neg_mask, k=num_anchors, dim=1) batch_loss_cls_pos = torch.sum( batch_loss_cls_all * batch_pos_mask, dim=1) anchor_index = torch.arange( end=num_anchors, dtype=torch.float, device=batch_anchor.device).view((1, -1)) topk_loss_neg_mask = (anchor_index < batch_num_neg_samples.view( -1, 1)).float() batch_loss_cls_neg = torch.sum( batch_topk_loss_cls_neg * topk_loss_neg_mask, dim=1) loss_cls = \ (batch_loss_cls_pos + batch_loss_cls_neg) / num_total_samples if self.reg_decoded_bbox: # TODO: support self.reg_decoded_bbox is True raise RuntimeError loss_bbox_all = smooth_l1_loss( batch_bbox_pred, batch_bbox_targets, batch_bbox_weights, reduction='none', beta=self.train_cfg.smoothl1_beta, avg_factor=num_total_samples) eps = torch.finfo(torch.float32).eps sum_dim = (i for i in range(1, len(loss_bbox_all.size()))) loss_bbox = loss_bbox_all.sum(tuple(sum_dim)) / ( num_total_samples + eps) return loss_cls[None], loss_bbox
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds')) def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): each item are the truth boxes for each image in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.prior_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=1, unmap_outputs=True, return_level=False) if cls_reg_targets is None: return None (batch_labels, batch_label_weights, batch_bbox_targets, batch_bbox_weights, batch_pos_mask, batch_neg_mask, sampling_result, num_total_pos, num_total_neg, batch_anchors) = cls_reg_targets num_imgs = len(img_metas) batch_cls_score = torch.cat([ s.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.cls_out_channels) for s in cls_scores ], 1) batch_bbox_pred = torch.cat([ b.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) for b in bbox_preds ], -2) batch_losses_cls, batch_losses_bbox = self.batch_loss( batch_cls_score, batch_bbox_pred, batch_anchors, batch_labels, batch_label_weights, batch_bbox_targets, batch_bbox_weights, batch_pos_mask, batch_neg_mask, num_total_pos) losses_cls = [ batch_losses_cls[:, index_imgs] for index_imgs in range(num_imgs) ] losses_bbox = [losses_bbox for losses_bbox in batch_losses_bbox] return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
Read the Docs v: dev
Versions
latest
stable
3.x
v3.0.0rc0
v2.28.2
v2.28.1
v2.28.0
v2.27.0
v2.26.0
v2.25.3
v2.25.2
v2.25.1
v2.25.0
v2.24.1
v2.24.0
v2.23.0
v2.22.0
v2.21.0
v2.20.0
v2.19.1
v2.19.0
v2.18.1
v2.18.0
v2.17.0
v2.16.0
v2.15.1
v2.15.0
v2.14.0
v2.13.0
v2.12.0
v2.11.0
v2.10.0
v2.9.0
v2.8.0
v2.7.0
v2.6.0
v2.5.0
v2.4.0
v2.3.0
v2.2.1
v2.2.0
v2.1.0
v2.0.0
v1.2.0
test-3.0.0rc0
dev-3.x
dev
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.