Source code for hyperion.io.audio_writer

"""
 Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

import os
import re
import soundfile as sf

import numpy as np

from ..hyp_defs import float_cpu
from ..utils.scp_list import SCPList
from ..utils.kaldi_io_funcs import is_token
from .audio_reader import valid_ext

subtype_to_npdtype = {
    "PCM_32": "int32",
    "ALAW": "int16",
    "IMA_ADPCM": "int16",
    "FLOAT": "float32",
    "PCM_16": "int16",
    "DOUBLE": "float64",
    "MS_ADPCM": "int16",
    "ULAW": "int16",
    "PCM_U8": "uint8",
    "PCM_S8": "int8",
    "VORBIS": "float32",
    "GSM610": "int16",
    "G721_32": "int16",
    "PCM_24": "int24",
}


[docs]class AudioWriter(object):
    """Abstract base class to write audio files.

    Attributes:
      output_path: output data file path.
      script_path: optional output scp file.
      audio_format:   audio file format
      audio_subtype: subtype of audio in [PCM_16, PCM_32, FLOAT, DOUBLE, ...],
               if None, it uses soundfile defaults (recommended)
      scp_sep: Separator for scp files (default ' ').
    """

[docs]    def __init__(
        self,
        output_path,
        script_path=None,
        audio_format="wav",
        audio_subtype=None,
        scp_sep=" ",
    ):
        self.output_path = output_path
        self.script_path = script_path
        self.audio_format = audio_format
        self.scp_sep = scp_sep

        assert "." + self.audio_format in valid_ext
        if audio_subtype is None:
            self.subtype = sf.default_subtype(self.audio_format)
        else:
            self.subtype = audio_subtype
            assert sf.check_format(self.audio_format, self.subtype)

        if not os.path.exists(output_path):
            try:
                os.makedirs(output_path)
            except FileExistsError:
                pass

        if script_path is not None:
            self.f_script = open(script_path, "w")
        else:
            self.f_script = None

[docs]    def __enter__(self):
        """Function required when entering contructions of type

        with AudioWriter('./path') as f:
           f.write(key, data)
        """
        return self

[docs]    def __exit__(self, exc_type, exc_value, traceback):
        """Function required when exiting from contructions of type

        with AudioWriter('./path') as f:
           f.write(key, data)
        """
        self.close()

[docs]    def close(self):
        """Closes the script file if open"""
        if self.f_script is not None:
            self.f_script.close()

[docs]    def write(self, keys, data, fs):
        """Writes waveform to audio file.

        Args:
          key: List of recodings names.
          data: List of waveforms
          fs:
        """
        if isinstance(keys, str):
            keys = [keys]
            data = [data]

        fs_is_list = isinstance(fs, (list, np.ndarray))
        assert self.subtype in subtype_to_npdtype
        dtype = subtype_to_npdtype[self.subtype]
        output_files = []
        for i, key_i in enumerate(keys):
            assert is_token(key_i), "Token %s not valid" % key_i
            file_basename = re.sub("/", "-", key_i)
            output_file = "%s/%s.%s" % (
                self.output_path,
                file_basename,
                self.audio_format,
            )
            fs_i = fs[i] if fs_is_list else fs
            data_i = data[i].astype(dtype, copy=False)
            sf.write(output_file, data_i, fs_i, subtype=self.subtype)

            output_files.append(output_file)

            if self.f_script is not None:
                self.f_script.write("%s%s%s\n" % (key_i, self.scp_sep, output_file))
                self.f_script.flush()

        return output_files

[docs]    @staticmethod
    def filter_args(**kwargs):
        valid_args = (
            "output_fs",
            "output_wav_scale",
            "output_audio_format",
            "output_audio_subtype",
        )
        return dict(
            (re.sub("output_", "", k), kwargs[k]) for k in valid_args if k in kwargs
        )

[docs]    @staticmethod
    def add_class_args(parser, prefix=None):
        if prefix is None:
            p1 = "--"
        else:
            p1 = "--" + prefix + "."

        # parser.add_argument(p1+'output-wav-scale', default=1, type=float,
        #                      help=('scale to divide the waveform before writing'))

        parser.add_argument(
            p1 + "output-audio-format",
            default="flac",
            choices=["flac", "ogg", "wav"],
            help=("ouput audio format"),
        )

        parser.add_argument(
            p1 + "output-audio-subtype",
            default=None,
            choices=["pcm_16", "pcm_24", "float", "double", "vorbis"],
            help=("coding format for audio file"),
        )

        # parser.add_argument(p1+'output-fs', default=16000, type=int,
        #                      help=('output sample frequency'))

    add_argparse_args = add_class_args