Source code for hyperion.torch.layer_blocks.transformer_feedforward

"""
 Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

import torch
import torch.nn as nn

from ..layers import ActivationFactory as AF
from ..layers import Dropout1d


[docs]class PositionwiseFeedForward(nn.Module): """Positionwise feed forward layer for transfomer. Attributes: num_feats: input/output dimenstion hid_feats: number of hidden units activation: activation function for hidden layers dropout_rate: dropout rate time_dim: time dimension in the input tensor """
[docs] def __init__( self, num_feats, hid_feats, activation="relu6", dropout_rate=0, time_dim=1 ): super().__init__() self.w_1 = nn.Linear(num_feats, hid_feats) self.w_2 = nn.Linear(hid_feats, num_feats) self.dropout_rate = dropout_rate self.time_dim = time_dim self.activation = AF.create(activation) if self.dropout_rate > 0: self.dropout = torch.nn.Dropout(dropout_rate)
[docs] def forward(self, x): """Forward function. Args: x: input size=(batch, time, num_feats) Returns: tensor size=(batch, time, num_feats) """ if self.time_dim != 1: x = x.transpose(1, time_dim) x = self.activation(self.w_1(x)) if self.dropout_rate > 0: x = self.dropout(x) x = self.w_2(x) if self.time_dim != 1: x = x.transpose(1, time_dim) return x
[docs]class Conv1dx2(nn.Module): """Two layer Conv1d for transformer feed-forward block Introduced in `FastSpeech: Fast, Robust and Controllable Text to Speech`_. .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`: https://arxiv.org/pdf/1905.09263.pdf Attributes: num_channels: input/output channels. hid_channels: hidden channels kernel_size: conv kernel size activation: activation function for hidden layers dropout_rate: dropout rate time_dim: indicates what is the time dimension in the input tensor. """
[docs] def __init__( self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1 ): super().__init__() self.w_1 = nn.Conv1d( num_channels, hid_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2, ) self.w_2 = nn.Conv1d( hid_channels, num_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2, ) self.dropout_rate = dropout_rate self.time_dim = time_dim self.activation = AF.create(activation) if self.dropout_rate > 0: self.dropout = Dropout1d(dropout_rate)
[docs] def forward(self, x): """Calculates forward propagation. Args: x: input tensors with size=(batch, time, num_channels) or size=(batch, num_channels, time). Returns: output tensor same size as input """ if self.time_dim != -1: x.transpose(-1, self.time_dim) x = self.activation(self.w_1(x)) if self.dropout_rate > 0: x = self.dropout(x) x = self.w_2(x) if self.time_dim != -1: x.transpose(-1, self.time_dim) return x
[docs]class Conv1dLinear(nn.Module): """Conv1D + Linear for Transformer block. Attributes: num_channels: input/output channels. hid_channels: hidden channels kernel_size: conv kernel size activation: activation function for hidden layers dropout_rate: dropout rate time_dim: indicates what is the time dimension in the input tensor. """
[docs] def __init__( self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1 ): super().__init__() self.w_1 = nn.Conv1d( num_channels, hid_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2, ) self.w_2 = nn.Conv1d(hid_channels, num_channels, 1) self.dropout_rate = dropout_rate self.time_dim = time_dim self.activation = AF.create(activation) if self.dropout_rate > 0: self.dropout = Dropout1d(dropout_rate)
[docs] def forward(self, x): """Calculates forward propagation. Args: x: input tensors with size=(batch, time, num_channels) or size=(batch, num_channels, time). Returns: output tensor same size as input """ if self.time_dim != -1: x.transpose(-1, self.time_dim) x = self.activation(self.w_1(x)) if self.dropout_rate > 0: x = self.dropout(x) x = self.w_2(x) if self.time_dim != -1: x.transpose(-1, self.time_dim) return x