Source code for context_builder.decoders

import torch
import torch.nn as nn
import torch.nn.functional as F

[docs]class DecoderAttention(nn.Module):
[docs] def __init__(self, embedding, context_size, attention_size, num_layers=1, dropout=0.1, bidirectional=False, LSTM=False): """Attention decoder for retrieving attention from context vector. Parameters ---------- embedding : nn.Embedding Embedding layer to use. context_size : int Size of context to expect as input. attention_size : int Size of attention vector. num_layers : int, default=1 Number of recurrent layers to use. dropout : float, default=0.1 Default dropout rate to use. bidirectional : boolean, default=False If True, use bidirectional recurrent layer. LSTM : boolean, default=False If True, use LSTM instead of GRU. """ # Call super super().__init__() ################################################################ # Initialise layers # ################################################################ # Embedding layer self.embedding = embedding # Recurrency layer self.recurrent = (nn.LSTM if LSTM else nn.GRU)( input_size = embedding.embedding_dim, hidden_size = context_size, num_layers = num_layers, batch_first = True, bidirectional = bidirectional, ) # Attention layer self.attn = nn.Linear( in_features = context_size * num_layers * (1+bidirectional), out_features = attention_size ) # Dropout layer self.dropout = nn.Dropout(dropout)
[docs] def forward(self, context_vector, previous_input=None): """Compute attention based on input and hidden state. Parameters ---------- X : torch.Tensor of shape=(n_samples, embedding_dim) Input from which to compute attention hidden : torch.Tensor of shape=(n_samples, hidden_size) Context vector from which to compute attention Returns ------- attention : torch.Tensor of shape=(n_samples, context_size) Computed attention context_vector : torch.Tensor of shape=(n_samples, hidden_size) Updated context vector """ # Get embedding from input embedded = self.embedding(previous_input)\ .view(-1, 1, self.embedding.embedding_dim) # Apply dropout layer embedded = self.dropout(embedded) # Compute attention and pass through hidden to next state attention, context_vector = self.recurrent(embedded, context_vector) # Apply dropout layer # attention = self.dropout(attention) # Compute attention attention = self.attn(attention.squeeze(1)) # Normalise attention weights, i.e. sum to 1 attention = F.softmax(attention, dim=1) # Return result return attention, context_vector
[docs]class DecoderEvent(nn.Module):
[docs] def __init__(self, input_size, output_size, dropout=0.1): """""" # Call super super().__init__() # Initialise layers self.hidden = nn.Linear(input_size, input_size) self.out = nn.Linear(input_size, output_size) self.dropout = nn.Dropout(dropout)
[docs] def forward(self, X, attention): """Decode X with given attention. Parameters ---------- X : torch.Tensor of shape=(n_samples, context_size, hidden_size) Input samples on which to apply attention. attention : torch.Tensor of shape=(n_samples, context_size) Attention to use for decoding step Returns ------- output : torch.Tensor of shape=(n_samples, output_size) Decoded output """ # Apply attention (by computing batch matrix-matrix product) attn_applied = torch.bmm(attention.unsqueeze(1), X).squeeze(1) # attn_applied = self.dropout(attn_applied) # Compute prediction based on latent dimension output = self.hidden(attn_applied).relu() # output = self.dropout(output) output = self.out(output) # Apply softmax for distribution output = F.log_softmax(output, dim=1) # Return result return output