Shortcuts

Source code for mmdet.models.dense_heads.ddod_head

# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule, Scale, bias_init_with_prob, normal_init
from mmcv.runner import force_fp32

from mmdet.core import (anchor_inside_flags, build_assigner, build_sampler,
                        images_to_levels, multi_apply, reduce_mean, unmap)
from mmdet.core.bbox import bbox_overlaps
from ..builder import HEADS, build_loss
from .anchor_head import AnchorHead

EPS = 1e-12


[docs]@HEADS.register_module() class DDODHead(AnchorHead): """DDOD head decomposes conjunctions lying in most current one-stage detectors via label assignment disentanglement, spatial feature disentanglement, and pyramid supervision disentanglement. https://arxiv.org/abs/2107.02963 Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. stacked_convs (int): The number of stacked Conv. Default: 4. conv_cfg (dict): Conv config of ddod head. Default: None. use_dcn (bool): Use dcn, Same as ATSS when False. Default: True. norm_cfg (dict): Normal config of ddod head. Default: dict(type='GN', num_groups=32, requires_grad=True). loss_iou (dict): Config of IoU loss. Default: dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0). """ def __init__(self, num_classes, in_channels, stacked_convs=4, conv_cfg=None, use_dcn=True, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), loss_iou=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), **kwargs): self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.use_dcn = use_dcn super(DDODHead, self).__init__(num_classes, in_channels, **kwargs) self.sampling = False if self.train_cfg: self.cls_assigner = build_assigner(self.train_cfg.assigner) self.reg_assigner = build_assigner(self.train_cfg.reg_assigner) sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.loss_iou = build_loss(loss_iou) def _init_layers(self): """Initialize layers of the head.""" self.relu = nn.ReLU(inplace=True) self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels self.cls_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=dict(type='DCN', deform_groups=1) if i == 0 and self.use_dcn else self.conv_cfg, norm_cfg=self.norm_cfg)) self.reg_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=dict(type='DCN', deform_groups=1) if i == 0 and self.use_dcn else self.conv_cfg, norm_cfg=self.norm_cfg)) self.atss_cls = nn.Conv2d( self.feat_channels, self.num_base_priors * self.cls_out_channels, 3, padding=1) self.atss_reg = nn.Conv2d( self.feat_channels, self.num_base_priors * 4, 3, padding=1) self.atss_iou = nn.Conv2d( self.feat_channels, self.num_base_priors * 1, 3, padding=1) self.scales = nn.ModuleList( [Scale(1.0) for _ in self.prior_generator.strides]) # we use the global list in loss self.cls_num_pos_samples_per_level = [ 0. for _ in range(len(self.prior_generator.strides)) ] self.reg_num_pos_samples_per_level = [ 0. for _ in range(len(self.prior_generator.strides)) ]
[docs] def init_weights(self): """Initialize weights of the head.""" for m in self.cls_convs: normal_init(m.conv, std=0.01) for m in self.reg_convs: normal_init(m.conv, std=0.01) normal_init(self.atss_reg, std=0.01) normal_init(self.atss_iou, std=0.01) bias_cls = bias_init_with_prob(0.01) normal_init(self.atss_cls, std=0.01, bias=bias_cls)
[docs] def forward(self, feats): """Forward features from the upstream network. Args: feats (tuple[Tensor]): Features from the upstream network, each is a 4D-tensor. Returns: tuple: Usually a tuple of classification scores and bbox prediction cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * num_classes. bbox_preds (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 4. iou_preds (list[Tensor]): IoU scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 1. """ return multi_apply(self.forward_single, feats, self.scales)
[docs] def forward_single(self, x, scale): """Forward feature of a single scale level. Args: x (Tensor): Features of a single scale level. scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize the bbox prediction. Returns: tuple: - cls_score (Tensor): Cls scores for a single scale level \ the channels number is num_base_priors * num_classes. - bbox_pred (Tensor): Box energies / deltas for a single \ scale level, the channels number is num_base_priors * 4. - iou_pred (Tensor): Iou for a single scale level, the \ channel number is (N, num_base_priors * 1, H, W). """ cls_feat = x reg_feat = x for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: reg_feat = reg_conv(reg_feat) cls_score = self.atss_cls(cls_feat) # we just follow atss, not apply exp in bbox_pred bbox_pred = scale(self.atss_reg(reg_feat)).float() iou_pred = self.atss_iou(reg_feat) return cls_score, bbox_pred, iou_pred
[docs] def loss_cls_single(self, cls_score, labels, label_weights, reweight_factor, num_total_samples): """Compute cls loss of a single scale level. Args: cls_score (Tensor): Box scores for each scale level Has shape (N, num_base_priors * num_classes, H, W). labels (Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (Tensor): Label weights of each anchor with shape (N, num_total_anchors) reweight_factor (list[int]): Reweight factor for cls and reg loss. num_total_samples (int): Number of positive samples that is reduced over all GPUs. Returns: tuple[Tensor]: A tuple of loss components. """ cls_score = cls_score.permute(0, 2, 3, 1).reshape( -1, self.cls_out_channels).contiguous() labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) loss_cls = self.loss_cls( cls_score, labels, label_weights, avg_factor=num_total_samples) return reweight_factor * loss_cls,
[docs] def loss_reg_single(self, anchors, bbox_pred, iou_pred, labels, label_weights, bbox_targets, bbox_weights, reweight_factor, num_total_samples): """Compute reg loss of a single scale level. Args: anchors (Tensor): Box reference for each scale level with shape (N, num_total_anchors, 4). bbox_pred (Tensor): Box energies / deltas for each scale level with shape (N, num_base_priors * 4, H, W). iou_pred (Tensor): Iou for a single scale level, the channel number is (N, num_base_priors * 1, H, W). labels (Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (Tensor): Label weights of each anchor with shape (N, num_total_anchors) bbox_targets (Tensor): BBox regression targets of each anchor weight shape (N, num_total_anchors, 4). bbox_weights (Tensor): BBox weights of all anchors in the image with shape (N, 4) reweight_factor (list[int]): Reweight factor for cls and reg loss. num_total_samples (int): Number of positive samples that is reduced over all GPUs. Returns: dict[str, Tensor]: A dictionary of loss components. """ anchors = anchors.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) iou_pred = iou_pred.permute(0, 2, 3, 1).reshape(-1, ) bbox_targets = bbox_targets.reshape(-1, 4) bbox_weights = bbox_weights.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) iou_targets = label_weights.new_zeros(labels.shape) iou_weights = label_weights.new_zeros(labels.shape) iou_weights[(bbox_weights.sum(axis=1) > 0).nonzero( as_tuple=False)] = 1. # FG cat_id: [0, num_classes -1], BG cat_id: num_classes bg_class_ind = self.num_classes pos_inds = ((labels >= 0) & (labels < bg_class_ind)).nonzero(as_tuple=False).squeeze(1) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_anchors = anchors[pos_inds] pos_decode_bbox_pred = self.bbox_coder.decode( pos_anchors, pos_bbox_pred) pos_decode_bbox_targets = self.bbox_coder.decode( pos_anchors, pos_bbox_targets) # regression loss loss_bbox = self.loss_bbox( pos_decode_bbox_pred, pos_decode_bbox_targets, avg_factor=num_total_samples) iou_targets[pos_inds] = bbox_overlaps( pos_decode_bbox_pred.detach(), pos_decode_bbox_targets, is_aligned=True) loss_iou = self.loss_iou( iou_pred, iou_targets, iou_weights, avg_factor=num_total_samples) else: loss_bbox = bbox_pred.sum() * 0 loss_iou = iou_pred.sum() * 0 return reweight_factor * loss_bbox, reweight_factor * loss_iou
[docs] def calc_reweight_factor(self, labels_list): """Compute reweight_factor for regression and classification loss.""" # get pos samples for each level bg_class_ind = self.num_classes for ii, each_level_label in enumerate(labels_list): pos_inds = ((each_level_label >= 0) & (each_level_label < bg_class_ind)).nonzero( as_tuple=False).squeeze(1) self.cls_num_pos_samples_per_level[ii] += len(pos_inds) # get reweight factor from 1 ~ 2 with bilinear interpolation min_pos_samples = min(self.cls_num_pos_samples_per_level) max_pos_samples = max(self.cls_num_pos_samples_per_level) interval = 1. / (max_pos_samples - min_pos_samples + 1e-10) reweight_factor_per_level = [] for pos_samples in self.cls_num_pos_samples_per_level: factor = 2. - (pos_samples - min_pos_samples) * interval reweight_factor_per_level.append(factor) return reweight_factor_per_level
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'iou_preds')) def loss(self, cls_scores, bbox_preds, iou_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_base_priors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_base_priors * 4, H, W) iou_preds (list[Tensor]): Score factor for all scale level, each is a 4D-tensor, has shape (batch_size, 1, H, W). gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (list[Tensor] | None): specify which bounding boxes can be ignored when computing the loss. Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.prior_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 # calculate common vars for cls and reg assigners at once targets_com = self.process_predictions_and_anchors( anchor_list, valid_flag_list, cls_scores, bbox_preds, img_metas, gt_bboxes_ignore) (anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes_ignore_list) = targets_com # classification branch assigner cls_targets = self.get_cls_targets( anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore_list, gt_labels_list=gt_labels, label_channels=label_channels) if cls_targets is None: return None (cls_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_targets num_total_samples = reduce_mean( torch.tensor(num_total_pos, dtype=torch.float, device=device)).item() num_total_samples = max(num_total_samples, 1.0) reweight_factor_per_level = self.calc_reweight_factor(labels_list) cls_losses_cls, = multi_apply( self.loss_cls_single, cls_scores, labels_list, label_weights_list, reweight_factor_per_level, num_total_samples=num_total_samples) # regression branch assigner reg_targets = self.get_reg_targets( anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore_list, gt_labels_list=gt_labels, label_channels=label_channels) if reg_targets is None: return None (reg_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = reg_targets num_total_samples = reduce_mean( torch.tensor(num_total_pos, dtype=torch.float, device=device)).item() num_total_samples = max(num_total_samples, 1.0) reweight_factor_per_level = self.calc_reweight_factor(labels_list) reg_losses_bbox, reg_losses_iou = multi_apply( self.loss_reg_single, reg_anchor_list, bbox_preds, iou_preds, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, reweight_factor_per_level, num_total_samples=num_total_samples) return dict( loss_cls=cls_losses_cls, loss_bbox=reg_losses_bbox, loss_iou=reg_losses_iou)
[docs] def process_predictions_and_anchors(self, anchor_list, valid_flag_list, cls_scores, bbox_preds, img_metas, gt_bboxes_ignore_list): """Compute common vars for regression and classification targets. Args: anchor_list (list[Tensor]): anchors of each image. valid_flag_list (list[Tensor]): Valid flags of each image. cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * num_classes. bbox_preds (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 4. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding boxes can be ignored when computing the loss. Return: tuple[Tensor]: A tuple of common loss vars. """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs anchor_list_ = [] valid_flag_list_ = [] # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list_.append(torch.cat(anchor_list[i])) valid_flag_list_.append(torch.cat(valid_flag_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] num_levels = len(cls_scores) cls_score_list = [] bbox_pred_list = [] mlvl_cls_score_list = [ cls_score.permute(0, 2, 3, 1).reshape( num_imgs, -1, self.num_base_priors * self.cls_out_channels) for cls_score in cls_scores ] mlvl_bbox_pred_list = [ bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_base_priors * 4) for bbox_pred in bbox_preds ] for i in range(num_imgs): mlvl_cls_tensor_list = [ mlvl_cls_score_list[j][i] for j in range(num_levels) ] mlvl_bbox_tensor_list = [ mlvl_bbox_pred_list[j][i] for j in range(num_levels) ] cat_mlvl_cls_score = torch.cat(mlvl_cls_tensor_list, dim=0) cat_mlvl_bbox_pred = torch.cat(mlvl_bbox_tensor_list, dim=0) cls_score_list.append(cat_mlvl_cls_score) bbox_pred_list.append(cat_mlvl_bbox_pred) return (anchor_list_, valid_flag_list_, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes_ignore_list)
[docs] def get_cls_targets(self, anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Get cls targets for DDOD head. This method is almost the same as `AnchorHead.get_targets()`. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. Args: anchor_list (list[Tensor]): anchors of each image. valid_flag_list (list[Tensor]): Valid flags of each image. num_level_anchors_list (list[Tensor]): Number of anchors of each scale level of all image. cls_score_list (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * num_classes. bbox_pred_list (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 4. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding boxes can be ignored when computing the loss. gt_labels_list (list[Tensor]): class indices corresponding to each box. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Return: tuple[Tensor]: A tuple of cls targets components. """ (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( self._get_target_single, anchor_list, valid_flag_list, cls_score_list, bbox_pred_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs, is_cls_assigner=True) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors_list[0]) labels_list = images_to_levels(all_labels, num_level_anchors_list[0]) label_weights_list = images_to_levels(all_label_weights, num_level_anchors_list[0]) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors_list[0]) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors_list[0]) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
[docs] def get_reg_targets(self, anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Get reg targets for DDOD head. This method is almost the same as `AnchorHead.get_targets()` when is_cls_assigner is False. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. Args: anchor_list (list[Tensor]): anchors of each image. valid_flag_list (list[Tensor]): Valid flags of each image. num_level_anchors (int): Number of anchors of each scale level. cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * num_classes. bbox_preds (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 4. gt_labels_list (list[Tensor]): class indices corresponding to each box. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding boxes can be ignored when computing the loss. Return: tuple[Tensor]: A tuple of reg targets components. """ (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( self._get_target_single, anchor_list, valid_flag_list, cls_score_list, bbox_pred_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs, is_cls_assigner=False) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors_list[0]) labels_list = images_to_levels(all_labels, num_level_anchors_list[0]) label_weights_list = images_to_levels(all_label_weights, num_level_anchors_list[0]) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors_list[0]) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors_list[0]) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def _get_target_single(self, flat_anchors, valid_flags, cls_scores, bbox_preds, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True, is_cls_assigner=True): """Compute regression, classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_base_priors, 4). valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_base_priors,). cls_scores (Tensor): Classification scores for all scale levels of the image. bbox_preds (Tensor): Box energies / deltas for all scale levels of the image. num_level_anchors (list[int]): Number of anchors of each scale level. gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, ). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts, ). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. Default: 1. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Default: True. is_cls_assigner (bool): Classification or regression. Default: True. Returns: tuple: N is the number of total anchors in the image. - labels (Tensor): Labels of all anchors in the image with \ shape (N, ). - label_weights (Tensor): Label weights of all anchor in the \ image with shape (N, ). - bbox_targets (Tensor): BBox targets of all anchors in the \ image with shape (N, 4). - bbox_weights (Tensor): BBox weights of all anchors in the \ image with shape (N, 4) - pos_inds (Tensor): Indices of positive anchor with shape \ (num_pos, ). - neg_inds (Tensor): Indices of negative anchor with shape \ (num_neg, ). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) bbox_preds_valid = bbox_preds[inside_flags, :] cls_scores_valid = cls_scores[inside_flags, :] assigner = self.cls_assigner if is_cls_assigner else self.reg_assigner # decode prediction out of assigner bbox_preds_valid = self.bbox_coder.decode(anchors, bbox_preds_valid) assign_result = assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels, cls_scores_valid, bbox_preds_valid) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if hasattr(self, 'bbox_coder'): pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: # used in VFNetHead pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap( labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
[docs] def get_num_level_anchors_inside(self, num_level_anchors, inside_flags): """Get the anchors of each scale level inside. Args: num_level_anchors (list[int]): Number of anchors of each scale level. inside_flags (Tensor): Multi level inside flags of the image, which are concatenated into a single tensor of shape (num_base_priors,). Returns: list[int]: Number of anchors of each scale level inside. """ split_inside_flags = torch.split(inside_flags, num_level_anchors) num_level_anchors_inside = [ int(flags.sum()) for flags in split_inside_flags ] return num_level_anchors_inside
Read the Docs v: dev
Versions
latest
stable
3.x
v3.0.0rc0
v2.28.2
v2.28.1
v2.28.0
v2.27.0
v2.26.0
v2.25.3
v2.25.2
v2.25.1
v2.25.0
v2.24.1
v2.24.0
v2.23.0
v2.22.0
v2.21.0
v2.20.0
v2.19.1
v2.19.0
v2.18.1
v2.18.0
v2.17.0
v2.16.0
v2.15.1
v2.15.0
v2.14.0
v2.13.0
v2.12.0
v2.11.0
v2.10.0
v2.9.0
v2.8.0
v2.7.0
v2.6.0
v2.5.0
v2.4.0
v2.3.0
v2.2.1
v2.2.0
v2.1.0
v2.0.0
v1.2.0
test-3.0.0rc0
dev-3.x
dev
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.