Source code for hyperion.feats.filter_banks

"""
 Copyright 2018 Jesus Villalba (Johns Hopkins University)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

from jsonargparse import ArgumentParser, ActionParser
import logging

import numpy as np
from librosa.filters import mel as make_mel_librosa

from ..hyp_defs import float_cpu
from ..utils.misc import str2bool


[docs]class FilterBankFactory(object):
[docs]    @staticmethod
    def create(
        filter_bank_type, num_filters, fft_length, fs, low_freq, high_freq, norm_filters
    ):

        if filter_bank_type == "mel_kaldi":
            B = FilterBankFactory.make_mel_kaldi(
                num_filters, fft_length, fs, low_freq, high_freq
            )
        elif filter_bank_type == "mel_etsi":
            B = FilterBankFactory.make_mel_etsi(
                num_filters, fft_length, fs, low_freq, high_freq
            )
        elif filter_bank_type == "mel_librosa":
            B = FilterBankFactory.make_mel_librosa(
                num_filters,
                fft_length,
                fs,
                low_freq,
                high_freq,
                norm_filters=norm_filters,
            )
            norm_filters = False
        elif filter_bank_type == "mel_librosa_htk":
            B = FilterBankFactory.make_mel_librosa(
                num_filters,
                fft_length,
                fs,
                low_freq,
                high_freq,
                htk=True,
                norm_filters=norm_filters,
            )
            norm_filters = False
        elif filter_bank_type == "linear":
            B = FilterBankFactory.make_linear(
                num_filters, fft_length, fs, low_freq, high_freq
            )
        else:
            raise Exception("Invalid filter-bank type %s" % filter_bank_type)

        if norm_filters:
            B = B / np.sum(B, axis=0, keepdims=True)

        return B

[docs]    @staticmethod
    def lin2mel(x):
        return 1127.0 * np.log(1 + x / 700)

[docs]    @staticmethod
    def mel2lin(x):
        return 700 * (np.exp(x / 1127.0) - 1)

[docs]    @staticmethod
    def make_mel_kaldi(num_filters, fft_length, fs, low_freq, high_freq):

        if high_freq <= 0:
            high_freq = fs / 2 + high_freq

        mel_low_freq = FilterBankFactory.lin2mel(low_freq)
        mel_high_freq = FilterBankFactory.lin2mel(high_freq)
        melfc = np.linspace(mel_low_freq, mel_high_freq, num_filters + 2)
        mels = FilterBankFactory.lin2mel(np.linspace(0, fs, fft_length))

        B = np.zeros((int(fft_length / 2 + 1), num_filters), dtype=float_cpu())
        for k in range(num_filters):
            left_mel = melfc[k]
            center_mel = melfc[k + 1]
            right_mel = melfc[k + 2]
            for j in range(int(fft_length / 2)):
                mel_j = mels[j]
                if mel_j > left_mel and mel_j < right_mel:
                    if mel_j <= center_mel:
                        B[j, k] = (mel_j - left_mel) / (center_mel - left_mel)
                    else:
                        B[j, k] = (right_mel - mel_j) / (right_mel - center_mel)

        return B

[docs]    @staticmethod
    def make_mel_etsi(num_filters, fft_length, fs, low_freq, high_freq):

        if high_freq <= 0:
            high_freq = fs / 2 + high_freq

        fs_2 = fs / 2
        mel_low_freq = FilterBankFactory.lin2mel(low_freq)
        mel_high_freq = FilterBankFactory.lin2mel(high_freq)
        fc = FilterBankFactory.mel2lin(
            np.linspace(mel_low_freq, mel_high_freq, num_filters + 2)
        )
        cbin = np.round(fc / fs * fft_length).astype(int)

        B = np.zeros((int(fft_length / 2 + 1), num_filters), dtype=float_cpu())
        for k in range(num_filters):
            for j in range(cbin[k], cbin[k + 1] + 1):
                B[j, k] = (j - cbin[k] + 1) / (cbin[k + 1] - cbin[k] + 1)
            for j in range(cbin[k + 1] + 1, cbin[k + 2] + 1):
                B[j, k] = (cbin[k + 2] - j + 1) / (cbin[k + 2] - cbin[k + 1] + 1)

        return B

[docs]    @staticmethod
    def make_linear(num_filters, fft_length, fs, low_freq, high_freq):

        if high_freq <= 0:
            high_freq = fs / 2 + high_freq

        fs_2 = fs / 2
        fc = np.linspace(low_freq, high_freq, num_filters + 2)
        cbin = np.round(fc / fs * fft_length).astype(int)

        B = np.zeros((int(fft_length / 2 + 1), num_filters), dtype=float_cpu())
        for k in range(num_filters):
            for j in range(cbin[k], cbin[k + 1] + 1):
                B[j, k] = (j - cbin[k] + 1) / (cbin[k + 1] - cbin[k] + 1)
            for j in range(cbin[k + 1] + 1, cbin[k + 2] + 1):
                B[j, k] = (cbin[k + 2] - j + 1) / (cbin[k + 2] - cbin[k + 1] + 1)

        return B

[docs]    @staticmethod
    def make_mel_librosa(
        num_filters, fft_length, fs, low_freq, high_freq, htk=False, norm_filters=False
    ):
        if high_freq <= 0:
            high_freq = fs / 2 + high_freq

        if norm_filters:
            norm = "slaney"
        else:
            norm = None

        return make_mel_librosa(
            fs,
            fft_length,
            num_filters,
            fmin=low_freq,
            fmax=high_freq,
            htk=htk,
            norm=norm,
        ).T

[docs]    @staticmethod
    def add_class_args(parser, prefix=None):
        if prefix is not None:
            outer_parser = parser
            parser = ArgumentParser(prog="")

        parser.add_argument(
            "--fb-type",
            default="mel_kaldi",
            choices=[
                "mel_kaldi",
                "mel_etsi",
                "mel_librosa",
                "mel_librosa_htk",
                "linear",
            ],
            help="Filter-bank type: mel_kaldi, mel_etsi, linear",
        )

        parser.add_argument(
            "--num-filters",
            type=int,
            default=23,
            help="Number of triangular mel-frequency bins",
        )

        parser.add_argument(
            "--low-freq",
            type=float,
            default=20,
            help="Low cutoff frequency for mel bins",
        )

        parser.add_argument(
            "--high-freq",
            type=float,
            default=0,
            help="High cutoff frequency for mel bins (if < 0, offset from Nyquist)",
        )

        parser.add_argument(
            "--norm-filters",
            default=False,
            action="store_true",
            help="Normalize filters coeff to sum up to 1",
        )

        if prefix is not None:
            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
            # help='filter-bank options')

    add_argparse_args = add_class_args