Source code for hyperion.torch.layer_blocks.resnet1d_blocks

"""
 Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""
import torch.nn as nn
from torch.nn import Conv1d, BatchNorm1d

from ..layers import ActivationFactory as AF
from ..layers import Dropout1d, DropConnect1d, Interpolate
from ..layers.subpixel_convs import SubPixelConv1d
from .se_blocks import SEBlock1d


[docs]def _convk( in_channels, out_channels, kernel_size=3, stride=1, groups=1, dilation=1, bias=False ): """kernel k convolution with padding""" padding = dilation * (kernel_size - 1) // 2 return Conv1d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, dilation=dilation, )
[docs]def _conv1(in_channels, out_channels, stride=1, bias=False): """point-wise convolution""" return Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=bias)
[docs]def _subpixel_conv1(in_channels, out_channels, stride=1, bias=False): """point-wise subpixel convolution""" return SubPixelConv1d( in_channels, out_channels, kernel_size=1, stride=stride, bias=bias )
[docs]def _subpixel_convk( in_channels, out_channels, kernel_size=3, stride=1, groups=1, dilation=1, bias=False ): """kernel k subpixel convolution with padding""" padding = dilation * (kernel_size - 1) // 2 return SubPixelConv1d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, dilation=dilation, )
def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before): if stride % 2 == 0: first_stride = 2 second_stride = stride // 2 else: first_stride = 1 second_stride = stride if norm_before: layers = [ _conv1(in_channels, out_channels, first_stride, bias=False), norm_layer(out_channels), ] else: layers = [_conv1(in_channels, out_channels, first_stride, bias=True)] if second_stride > 1: kernel_size = 2 * (second_stride // 2) + 1 layers.append( nn.MaxPool1d( kernel_size=kernel_size, stride=second_stride, padding=(kernel_size - 1) // 2, ) ) return nn.Sequential(*layers) def _make_upsample( in_channels, out_channels, stride, norm_layer, norm_before, mode="nearest" ): if mode == "subpixel": if norm_before: return nn.Sequential( _subpixel_conv1(in_channels, out_channels, stride, bias=False), norm_layer(out_channels), ) return _subpixel_conv1(in_channels, out_channels, stride, bias=True) if norm_before: layers = [ _conv1(in_channels, out_channels, stride=1, bias=False), norm_layer(out_channels), ] else: layers = [_conv1(in_channels, out_channels, stride=1, bias=True)] layers.append(Interpolate(scale_factor=stride, mode=mode)) return nn.Sequential(*layers)
[docs]class ResNet1dBasicBlock(nn.Module): expansion = 1 # __constants__ = ['downsample']
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__() self.norm_before = False self.norm_after = False if use_norm: if norm_layer is None: norm_layer = BatchNorm1d self.bn1 = norm_layer(channels) self.bn2 = norm_layer(channels) if norm_before: self.norm_before = True else: self.norm_after = True self.in_channels = in_channels self.channels = channels bias = not norm_before self.conv1 = _convk( in_channels, channels, kernel_size, stride, groups, dilation, bias=bias ) self.act1 = AF.create(activation) self.conv2 = _convk(channels, channels, kernel_size, groups=groups, bias=bias) self.act2 = AF.create(activation) self.stride = stride self.downsample = None if stride != 1 or in_channels != channels: self.downsample = _make_downsample( in_channels, channels, stride, norm_layer, norm_before ) self.dropout_rate = dropout_rate self.dropout = None if dropout_rate > 0: self.dropout = Dropout1d(dropout_rate) self.drop_connect_rate = drop_connect_rate self.drop_connect = None if drop_connect_rate > 0: self.drop_connect = DropConnect1d(drop_connect_rate) self.context = (stride + dilation) * (kernel_size - 1) // 2 self.downsample_factor = stride
@property def out_channels(self): return self.channels
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.downsample is not None: residual = self.downsample(residual) x += residual x = self.act2(x) if self.norm_after: x = self.bn2(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class ResNet1dBasicDecBlock(nn.Module): expansion = 1 # __constants__ = ['downsample']
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__() self.norm_before = False self.norm_after = False if use_norm: if norm_layer is None: norm_layer = BatchNorm1d self.bn1 = norm_layer(channels) self.bn2 = norm_layer(channels) if norm_before: self.norm_before = True else: self.norm_after = True self.in_channels = in_channels self.channels = channels bias = not norm_before self.conv1 = _subpixel_convk( in_channels, channels, kernel_size, stride, groups, dilation, bias=bias ) self.act1 = AF.create(activation) self.conv2 = _convk(channels, channels, kernel_size, groups=groups, bias=bias) self.act2 = AF.create(activation) self.stride = stride self.upsample = None if stride != 1 or in_channels != channels: self.upsample = _make_upsample( in_channels, channels, stride, norm_layer, norm_before ) self.dropout_rate = dropout_rate self.dropout = None if dropout_rate > 0: self.dropout = Dropout1d(dropout_rate) self.drop_connect_rate = drop_connect_rate self.drop_connect = None if drop_connect_rate > 0: self.drop_connect = DropConnect1d(drop_connect_rate) self.context = (stride + dilation) * (kernel_size - 1) // 2 self.upsample_factor = stride
@property def out_channels(self): return self.channels
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.upsample is not None: residual = self.upsample(residual) x += residual x = self.act2(x) if self.norm_after: x = self.bn2(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class ResNet1dBNBlock(nn.Module):
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, expansion=4, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__() self.norm_before = False self.norm_after = False self.expansion = expansion bn_channels = channels // expansion if use_norm: if norm_layer is None: norm_layer = BatchNorm1d self.bn1 = norm_layer(bn_channels) self.bn2 = norm_layer(bn_channels) self.bn3 = norm_layer(channels) if norm_before: self.norm_before = True else: self.norm_after = True self.in_channels = in_channels self.channels = channels bias = not norm_before self.conv1 = _conv1(in_channels, bn_channels, stride=1, bias=bias) self.conv2 = _convk( bn_channels, bn_channels, kernel_size, stride, groups=groups, dilation=dilation, bias=bias, ) self.conv3 = _conv1(bn_channels, channels, stride=1, bias=bias) self.act1 = AF.create(activation) self.act2 = AF.create(activation) self.act3 = AF.create(activation) self.stride = stride self.downsample = None if stride != 1 or in_channels != channels: self.downsample = _make_downsample( in_channels, channels, stride, norm_layer, norm_before ) self.dropout_rate = dropout_rate self.dropout = None if dropout_rate > 0: self.dropout = Dropout1d(dropout_rate) self.drop_connect_rate = drop_connect_rate self.drop_connect = None if drop_connect_rate > 0: self.drop_connect = DropConnect1d(drop_connect_rate) self.context = dilation * (kernel_size - 1) // 2 self.downsample_factor = stride
@property def out_channels(self): return self.channels
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.act2(x) if self.norm_after: x = self.bn2(x) x = self.conv3(x) if self.norm_before: x = self.bn3(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.downsample is not None: residual = self.downsample(residual) x += residual x = self.act3(x) if self.norm_after: x = self.bn3(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class ResNet1dBNDecBlock(nn.Module):
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, expansion=4, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__() self.norm_before = False self.norm_after = False self.expansion = expansion bn_channels = channels // expansion if use_norm: if norm_layer is None: norm_layer = BatchNorm1d self.bn1 = norm_layer(bn_channels) self.bn2 = norm_layer(bn_channels) self.bn3 = norm_layer(channels) if norm_before: self.norm_before = True else: self.norm_after = True self.in_channels = in_channels self.channels = channels bias = not norm_before self.conv1 = _conv1(in_channels, bn_channels, stride=1, bias=bias) self.conv2 = _subpixel_convk( bn_channels, bn_channels, kernel_size, stride, groups, dilation, bias=bias ) self.conv3 = _conv1(bn_channels, channels, stride=1, bias=bias) self.act1 = AF.create(activation) self.act2 = AF.create(activation) self.act3 = AF.create(activation) self.stride = stride self.upsample = None if stride != 1 or in_channels != channels: self.upsample = _make_upsample( in_channels, channels, stride, norm_layer, norm_before ) self.dropout_rate = dropout_rate self.dropout = None if dropout_rate > 0: self.dropout = Dropout1d(dropout_rate) self.drop_connect_rate = drop_connect_rate self.drop_connect = None if drop_connect_rate > 0: self.drop_connect = DropConnect1d(drop_connect_rate) self.context = dilation * (kernel_size - 1) // 2 self.upsample_factor = stride
@property def out_channels(self): return self.channels
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.act2(x) if self.norm_after: x = self.bn2(x) x = self.conv3(x) if self.norm_before: x = self.bn3(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.upsample is not None: residual = self.upsample(residual) x += residual x = self.act3(x) if self.norm_after: x = self.bn2(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class SEResNet1dBasicBlock(ResNet1dBasicBlock): expansion = 1
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, se_r=16, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__( in_channels, channels, kernel_size=kernel_size, activation=activation, stride=stride, dropout_rate=dropout_rate, drop_connect_rate=drop_connect_rate, groups=groups, dilation=dilation, use_norm=use_norm, norm_layer=norm_layer, norm_before=norm_before, ) self.se_layer = SEBlock1d(channels, se_r, activation)
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.se_layer(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.downsample is not None: residual = self.downsample(residual) x += residual x = self.act2(x) if self.norm_after: x = self.bn2(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class SEResNet1dBasicDecBlock(ResNet1dBasicDecBlock): expansion = 1
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, se_r=16, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__( in_channels, channels, kernel_size=kernel_size, activation=activation, stride=stride, dropout_rate=dropout_rate, drop_connect_rate=drop_connect_rate, groups=groups, dilation=dilation, use_norm=use_norm, norm_layer=norm_layer, norm_before=norm_before, ) self.se_layer = SEBlock1d(channels, se_r, activation)
@property def out_channels(self): return self.channels
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.se_layer(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.upsample is not None: residual = self.upsample(residual) x += residual x = self.act2(x) if self.norm_after: x = self.bn2(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class SEResNet1dBNBlock(ResNet1dBNBlock):
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, expansion=4, se_r=16, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__( in_channels, channels, kernel_size=kernel_size, activation=activation, stride=stride, dropout_rate=dropout_rate, drop_connect_rate=drop_connect_rate, groups=groups, dilation=dilation, expansion=expansion, use_norm=use_norm, norm_layer=norm_layer, norm_before=norm_before, ) self.se_layer = SEBlock1d(channels, se_r, activation)
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.act2(x) if self.norm_after: x = self.bn2(x) x = self.conv3(x) if self.norm_before: x = self.bn3(x) x = self.se_layer(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.downsample is not None: residual = self.downsample(residual) x += residual x = self.act3(x) if self.norm_after: x = self.bn3(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class SEResNet1dBNDecBlock(ResNet1dBNDecBlock):
[docs] def __init__( self, in_channels, channels, kernel_size=3, activation="relu6", stride=1, dropout_rate=0, drop_connect_rate=0, groups=1, dilation=1, expansion=4, se_r=16, use_norm=True, norm_layer=None, norm_before=True, ): super().__init__( in_channels, channels, kernel_size=kernel_size, activation=activation, stride=stride, dropout_rate=dropout_rate, drop_connect_rate=drop_connect_rate, groups=groups, dilation=dilation, expansion=expansion, use_norm=use_norm, norm_layer=norm_layer, norm_before=norm_before, ) self.se_layer = SEBlock1d(channels, se_r, activation)
[docs] def forward(self, x): residual = x x = self.conv1(x) if self.norm_before: x = self.bn1(x) x = self.act1(x) if self.norm_after: x = self.bn1(x) x = self.conv2(x) if self.norm_before: x = self.bn2(x) x = self.act2(x) if self.norm_after: x = self.bn2(x) x = self.conv3(x) if self.norm_before: x = self.bn3(x) x = self.se_layer(x) if self.drop_connect_rate > 0: x = self.drop_connect(x) if self.upsample is not None: residual = self.upsample(residual) x += residual x = self.act3(x) if self.norm_after: x = self.bn3(x) if self.dropout_rate > 0: x = self.dropout(x) return x
[docs]class ResNet1dEndpoint(nn.Module):
[docs] def __init__( self, in_channels, channels, in_scale, scale, upsampling_mode="nearest", activation={"name": "relu6", "inplace": True}, norm_layer=None, norm_before=True, ): """ Class that connects the ouputs of the ResNet1d to the rest of the network when using multilevel feature aggregation It converts the features of all the levels that we are going to aggregate to the same temporal scale """ super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm1d self.in_channels = in_channels self.channels = channels self.norm_before = norm_before self.rel_scale = in_scale / scale if scale >= in_scale: stride = int(scale / in_scale) self.resample = _make_downsample( in_channels, channels, stride, norm_layer, norm_before ) else: stride = int(in_scale / scale) self.resample = _make_upsample( in_channels, channels, stride, norm_layer, norm_before, mode=upsampling_mode, ) self.act = AF.create(activation) if not self.norm_before: self.bn = norm_layer(channels)
[docs] def forward(self, x): x = self.resample(x) x = self.act(x) if not self.norm_before: x = self.bn(x) return x