Source code for hyperion.augment.speed_augment

"""
 Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

import logging
from copy import deepcopy
import multiprocessing
import numpy as np
from librosa.effects import time_stretch

from ..hyp_defs import float_cpu


[docs]class SpeedAugment(object): """Class to augment speech with speed perturbation Attributes: speed_prob: probability of applying speed perturbation speed_ratios: list of speed pertubation ratios keep_length: applies padding or cropping to keep the lenght of the signal random_seed: random seed for random number generator rng: Random number generator returned by np.random.RandomState (optional) """
[docs] def __init__( self, speed_prob, speed_ratios=[0.9, 1.1], keep_length=False, random_seed=112358, rng=None, ): logging.info( "init speed augment with prob={}, speed_ratios={}, keep_length={}".format( speed_prob, speed_ratios, keep_length ) ) self.speed_prob = speed_prob self.speed_ratios = speed_ratios self.keep_length = keep_length if rng is None: self.rng = np.random.RandomState(seed=random_seed) else: self.rng = deepcopy(rng)
[docs] @classmethod def create(cls, cfg, random_seed=112358, rng=None): """Creates a SpeedAugment object from options dictionary or YAML file. Args: cfg: YAML file path or dictionary with noise options. rng: Random number generator returned by np.random.RandomState (optional) Returns: NoiseAugment object """ if isinstance(cfg, str): with open(cfg, "r") as f: cfg = yaml.load(f, Loader=yaml.FullLoader) assert isinstance(cfg, dict), "wrong object type for cfg={}".format(cfg) return cls( speed_prob=cfg["speed_prob"], speed_ratios=cfg["speed_ratios"], keep_length=cfg["keep_length"], random_seed=random_seed, rng=rng, )
[docs] def forward(self, x): # decide whether to add noise or not p = self.rng.random_sample() if p > self.speed_prob: # we don't add speed perturbation info = {"speed_ratio": 1} return x, info speed_idx = self.rng.choice(len(self.speed_ratios)) # change speed r = self.speed_ratios[speed_idx] info = {"speed_ratio": r} y = time_stretch(x, r) # print(f"1 r={r} {x.shape} {y.shape}", flush=True) if self.keep_length: if r > 1: dither = np.max(x) / 2 ** 15 # we add some dither in the padding pad_y = dither * np.ones((x.shape[-1] - y.shape[-1],), dtype=y.dtype) y = np.concatenate((y, pad_y), axis=-1) elif r < 1: y = y[: x.shape[-1]] # print(f"2 r={r} {x.shape} {y.shape}", flush=True) return y, info
def __call__(self, x): return self.forward(x)