NormalizedBoxCenterEncoder in Object Detection


#1

Hi,
I am trying to understand code on ssd of gluoncv. I found a function NormalizedBoxCenterEncoder in gluoncv.model_zoo.ssd.ssd.py
It looks like take take box_preds,and anchors.And let anchors move to predicted boxes based on standard deviations and means. I want to know why to do this and the affect of this part . Your time and consideration will be highly appreciated :smiley:
code are here:

class NormalizedBoxCenterDecoder(gluon.HybridBlock):
    """Decode bounding boxes training target with normalized center offsets.
    This decoder must cooperate with NormalizedBoxCenterEncoder of same `stds`
    in order to get properly reconstructed bounding boxes.

    Returned bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`.

    Parameters
    ----------
    stds : array-like of size 4
        Std value to be divided from encoded values, default is (0.1, 0.1, 0.2, 0.2).
    means : array-like of size 4
        Mean value to be subtracted from encoded values, default is (0., 0., 0., 0.).
    clip: float, default is None
        If given, bounding box target will be clipped to this value.

    """
    def __init__(self, stds=(0.1, 0.1, 0.2, 0.2), means=(0., 0., 0., 0.),
                 convert_anchor=False, clip=None):
        super(NormalizedBoxCenterDecoder, self).__init__()
        assert len(stds) == 4, "Box Encoder requires 4 std values."
        self._stds = stds
        self._means = means
        self._clip = clip
        if convert_anchor:
            self.corner_to_center = BBoxCornerToCenter(split=True)
        else:
            self.corner_to_center = None

    def hybrid_forward(self, F, x, anchors):
        if self.corner_to_center is not None:
            a = self.corner_to_center(anchors)
        else:
            a = anchors.split(axis=-1, num_outputs=4)
        p = F.split(x, axis=-1, num_outputs=4)
        ox = F.broadcast_add(F.broadcast_mul(p[0] * self._stds[0] + self._means[0], a[2]), a[0])
        oy = F.broadcast_add(F.broadcast_mul(p[1] * self._stds[1] + self._means[1], a[3]), a[1])
        tw = F.exp(p[2] * self._stds[2] + self._means[2])
        th = F.exp(p[3] * self._stds[3] + self._means[3])
        if self._clip:
            tw = F.minimum(tw, self._clip)
            th = F.minimum(th, self._clip)
        ow = F.broadcast_mul(tw, a[2]) / 2
        oh = F.broadcast_mul(th, a[3]) / 2
        return F.concat(ox - ow, oy - oh, ox + ow, oy + oh, dim=-1)


class MultiClassEncoder(gluon.HybridBlock):
    """Encode classification training target given matching results.

    This encoder will assign training target of matched bounding boxes to
    ground-truth label + 1 and negative samples with label 0.
    Ignored samples will be assigned with `ignore_label`, whose default is -1.

    Parameters
    ----------
    ignore_label : float
        Assigned to un-matched samples, they are neither positive or negative during
        training, and should be excluded in loss function. Default is -1.

    """
    def __init__(self, ignore_label=-1):
        super(MultiClassEncoder, self).__init__()
        self._ignore_label = ignore_label

    def hybrid_forward(self, F, samples, matches, refs):
        """HybridBlock, handle multi batch correctly

        Parameters
        ----------
        samples: (B, N), value +1 (positive), -1 (negative), 0 (ignore)
        matches: (B, N), value range [0, M)
        refs: (B, M), value range [0, num_fg_class), excluding background

        Returns
        -------
        targets: (B, N), value range [0, num_fg_class + 1), including background

        """
        # samples (B, N) (+1, -1, 0: ignore), matches (B, N) [0, M), refs (B, M)
        # reshape refs (B, M) -> (B, 1, M) -> (B, N, M)
        refs = F.repeat(refs.reshape((0, 1, -1)), axis=1, repeats=matches.shape[1])
        # ids (B, N, M) -> (B, N), value [0, M + 1), 0 reserved for background class
        target_ids = F.pick(refs, matches, axis=2) + 1
        # samples 0: set ignore samples to ignore_label
        targets = F.where(samples > 0.5, target_ids, nd.ones_like(target_ids) * self._ignore_label)
        # samples -1: set negative samples to 0
        targets = F.where(samples < -0.5, nd.zeros_like(targets), targets)
        return targets

bbox_decoder = NormalizedBoxCenterDecoder(stds)
bbox_decoder(box_preds, anchors)