Source code for hyperion.torch.layer_blocks.transformer_feedforward

"""
 Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

import torch
import torch.nn as nn

from ..layers import ActivationFactory as AF
from ..layers import Dropout1d


[docs]class PositionwiseFeedForward(nn.Module):
    """Positionwise feed forward layer for transfomer.

    Attributes:
       num_feats: input/output dimenstion
       hid_feats: number of hidden units
       activation: activation function for hidden layers
       dropout_rate: dropout rate
       time_dim: time dimension in the input tensor
    """

[docs]    def __init__(
        self, num_feats, hid_feats, activation="relu6", dropout_rate=0, time_dim=1
    ):
        super().__init__()
        self.w_1 = nn.Linear(num_feats, hid_feats)
        self.w_2 = nn.Linear(hid_feats, num_feats)
        self.dropout_rate = dropout_rate
        self.time_dim = time_dim
        self.activation = AF.create(activation)
        if self.dropout_rate > 0:
            self.dropout = torch.nn.Dropout(dropout_rate)

[docs]    def forward(self, x):
        """Forward function.

        Args:
          x: input size=(batch, time, num_feats)

        Returns:
          tensor size=(batch, time, num_feats)
        """
        if self.time_dim != 1:
            x = x.transpose(1, time_dim)

        x = self.activation(self.w_1(x))
        if self.dropout_rate > 0:
            x = self.dropout(x)

        x = self.w_2(x)
        if self.time_dim != 1:
            x = x.transpose(1, time_dim)

        return x


[docs]class Conv1dx2(nn.Module):
    """Two layer Conv1d for transformer feed-forward block

    Introduced in `FastSpeech: Fast, Robust and Controllable Text to Speech`_.
    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
        https://arxiv.org/pdf/1905.09263.pdf

    Attributes:
      num_channels: input/output channels.
      hid_channels: hidden channels
      kernel_size: conv kernel size
      activation: activation function for hidden layers
      dropout_rate: dropout rate
      time_dim: indicates what is the time dimension in the input tensor.
    """

[docs]    def __init__(
        self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1
    ):

        super().__init__()
        self.w_1 = nn.Conv1d(
            num_channels,
            hid_channels,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
        )
        self.w_2 = nn.Conv1d(
            hid_channels,
            num_channels,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
        )
        self.dropout_rate = dropout_rate
        self.time_dim = time_dim
        self.activation = AF.create(activation)
        if self.dropout_rate > 0:
            self.dropout = Dropout1d(dropout_rate)

[docs]    def forward(self, x):
        """Calculates forward propagation.
        Args:
            x: input tensors with size=(batch, time, num_channels) or
               size=(batch, num_channels, time).
        Returns:
            output tensor same size as input
        """
        if self.time_dim != -1:
            x.transpose(-1, self.time_dim)

        x = self.activation(self.w_1(x))
        if self.dropout_rate > 0:
            x = self.dropout(x)
        x = self.w_2(x)

        if self.time_dim != -1:
            x.transpose(-1, self.time_dim)

        return x


[docs]class Conv1dLinear(nn.Module):
    """Conv1D + Linear for Transformer block.

    Attributes:
      num_channels: input/output channels.
      hid_channels: hidden channels
      kernel_size: conv kernel size
      activation: activation function for hidden layers
      dropout_rate: dropout rate
      time_dim: indicates what is the time dimension in the input tensor.

    """

[docs]    def __init__(
        self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1
    ):
        super().__init__()
        self.w_1 = nn.Conv1d(
            num_channels,
            hid_channels,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
        )
        self.w_2 = nn.Conv1d(hid_channels, num_channels, 1)

        self.dropout_rate = dropout_rate
        self.time_dim = time_dim
        self.activation = AF.create(activation)
        if self.dropout_rate > 0:
            self.dropout = Dropout1d(dropout_rate)

[docs]    def forward(self, x):
        """Calculates forward propagation.
        Args:
            x: input tensors with size=(batch, time, num_channels) or
               size=(batch, num_channels, time).
        Returns:
            output tensor same size as input
        """
        if self.time_dim != -1:
            x.transpose(-1, self.time_dim)

        x = self.activation(self.w_1(x))
        if self.dropout_rate > 0:
            x = self.dropout(x)
        x = self.w_2(x)

        if self.time_dim != -1:
            x.transpose(-1, self.time_dim)

        return x