Source code for context_builder.loss

from   torch.autograd import Variable
import torch
import torch.nn as nn

[docs]class LabelSmoothing(nn.Module):

[docs]    def __init__(self, size, smoothing=0.0):
        """Implements label smoothing loss function

            Parameters
            ----------
            size : int
                Number of labels

            smoothing : float, default=0.0
                Smoothing factor to apply
            """
        # Initialise super
        super(LabelSmoothing, self).__init__()
        # Set KL-Divergence loss function
        self.criterion      = nn.KLDivLoss(reduction='none')
        self.criterion_attn = nn.MSELoss()
        # Set size
        self.size = size
        # Set confidence and smoothing
        self.smoothing  =       smoothing
        self.confidence = 1.0 - smoothing

[docs]    def forward(self, x, target, weights=None, attention=None):
        """Forward data"""
        # Check if shape of data is the same as expected
        assert x.size(-1) == self.size
        # Set target if necessary
        if x.ndim != target.ndim:
            target = target.unsqueeze(-1)

        # Create true distribution
        true_dist = x.data.clone()
        true_dist.fill_(self.smoothing / (self.size - 1))
        true_dist.scatter_(1, target, self.confidence)
        # Apply criterion
        result = self.criterion(x, Variable(true_dist, requires_grad=False))
        # Apply weights if necessary
        if weights is not None:
            result *= weights.to(torch.float).unsqueeze(1)
        # Get result as sum
        result = result.sum()

        # Apply attention criterion if necessary
        if attention is not None:
            target = torch.ones(attention.shape, device=x.device) / attention.shape[1]
            return result + 0.05*self.criterion_attn(attention, target)
        else:
            return result