Source code for context_builder.loss

from   torch.autograd import Variable
import torch
import torch.nn as nn

[docs]class LabelSmoothing(nn.Module):
[docs] def __init__(self, size, smoothing=0.0): """Implements label smoothing loss function Parameters ---------- size : int Number of labels smoothing : float, default=0.0 Smoothing factor to apply """ # Initialise super super(LabelSmoothing, self).__init__() # Set KL-Divergence loss function self.criterion = nn.KLDivLoss(reduction='none') self.criterion_attn = nn.MSELoss() # Set size self.size = size # Set confidence and smoothing self.smoothing = smoothing self.confidence = 1.0 - smoothing
[docs] def forward(self, x, target, weights=None, attention=None): """Forward data""" # Check if shape of data is the same as expected assert x.size(-1) == self.size # Set target if necessary if x.ndim != target.ndim: target = target.unsqueeze(-1) # Create true distribution true_dist = x.data.clone() true_dist.fill_(self.smoothing / (self.size - 1)) true_dist.scatter_(1, target, self.confidence) # Apply criterion result = self.criterion(x, Variable(true_dist, requires_grad=False)) # Apply weights if necessary if weights is not None: result *= weights.to(torch.float).unsqueeze(1) # Get result as sum result = result.sum() # Apply attention criterion if necessary if attention is not None: target = torch.ones(attention.shape, device=x.device) / attention.shape[1] return result + 0.05*self.criterion_attn(attention, target) else: return result