Source code for hyperion.torch.utils.eval_utils

"""
 Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""

import math
import torch


[docs]def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1): # model_device = next(nnet.parameters()).device # print(device, model_device, x.device) # assume time is the last dimension device = None if nnet.device == x.device else nnet.device T = x.shape[time_dim] if T <= chunk_length or chunk_length == 0: if device is not None: x = x.to(device) y = nnet(x) if detach_chunks: y = y.detach() return y try: left_context, right_context = nnet.in_context() except: left_context = right_context = 0 in_shape = x.shape chunk_shift_in = chunk_length - left_context - right_context try: out_shape = nnet.out_shape(in_shape) T_out = out_shape[time_dim] r = float(T_out) / T except: out_shape = None num_chunks = int(math.ceil((T - chunk_length) / chunk_shift_in + 1)) # move time dimension to dim 0 x = x.transpose(0, time_dim) y = None tbeg_in = 0 tbeg_out = 0 for i in range(num_chunks): tend_in = min(tbeg_in + chunk_length, x.shape[0]) # get slice and move back time dimension to last dim x_i = x[tbeg_in:tend_in].transpose(0, time_dim) if device is not None: x_i = x_i.to(device) y_i = nnet(x_i) if detach_chunks: y_i = y_i.detach() chunk_length_out = y_i.shape[time_dim] if out_shape is None: # infer chunk_shift in the output r = float(chunk_length_out) / chunk_length # infer total output length T_out = int(r * T) out_shape = list(y_i.shape) out_shape[time_dim] = T_out if y is None: right_context_out = int(math.floor(r * right_context)) left_context_out = int(math.floor(r * left_context)) chunk_shift_out = chunk_length_out - right_context_out - left_context_out # create output tensor y = torch.zeros(out_shape) # move time dimension to dim 0 y = y.transpose(0, time_dim) y_i = y_i.transpose(0, time_dim) if i == 0: tend_out = min(tbeg_out + chunk_length_out, T_out) y[tbeg_out:tend_out] = y_i tbeg_out = +(chunk_length_out - right_context_out) else: tend_out = min( int(round(tbeg_out)) + chunk_length_out - left_context_out, T_out ) dt = tend_out - tbeg_out if dt > 0: # print('eu', tbeg_out, tend_out, left_context_out,left_context_out+dt, T_out, chunk_length, chunk_length_out, tbeg_in, tend_in) y[tbeg_out:tend_out] = y_i[left_context_out : left_context_out + dt] tbeg_out += chunk_shift_out tbeg_in += chunk_shift_in # put time dimension back in its place y = y.transpose(0, time_dim) return y
[docs]def eval_nnet_overlap_add( x, nnet, chunk_length=0, chunk_overlap=None, detach_chunks=True, time_dim=-1 ): device = None if nnet.device == x.device else nnet.device # assume time is the last dimension T = x.shape[time_dim] if T <= chunk_length or chunk_length == 0: if device is not None: x = x.to(device) y = nnet(x) if detach_chunks: y = y.detach() return y if chunk_overlap is None: # infer chunk overlap from network input context try: left_context, right_context = nnet.in_context() except: left_context = right_context = 0 chunk_overlap = left_context + right_context in_shape = x.shape chunk_shift_in = chunk_length - chunk_overlap try: out_shape = nnet.out_shape(in_shape) T_out = out_shape[time_dim] r = float(T_out) / T except: out_shape = None num_chunks = int(math.ceil((T - chunk_length) / chunk_shift_in + 1)) # move time dimension to dim 0 x = x.transpose(0, time_dim) y = None N = None tbeg_in = 0 tbeg_out = 0 for i in range(num_chunks): tend_in = min(tbeg_in + chunk_length, x.shape[0]) # get slice and move back time dimension to last dim x_i = x[tbeg_in:tend_in].transpose(0, time_dim) if device is not None: x_i = x_i.to(device) y_i = nnet(x_i) if detach_chunks: y_i = y_i.detach() chunk_length_out = y_i.shape[time_dim] if out_shape is None: # infer chunk_shift in the output r = float(chunk_length_out) / chunk_length # infer total output length T_out = int(r * T) out_shape = list(y_i.shape) out_shape[time_dim] = T_out if y is None: chunk_shift_out = r * chunk_shift_in # create output tensor y = torch.zeros(out_shape) # move time dimension to dim 0 y = y.transpose(0, time_dim) count = torch.zeros(T_out) y_i = y_i.transpose(0, time_dim) tend_out = min(int(round(tbeg_out)) + chunk_length_out, T_out) dt = tend_out - tbeg_out y[tbeg_out:tend_out] += y_i[:dt] count[tbeg_out:tend_out] += 1 tbeg_out += chunk_shift_out tbeg_in += chunk_shift_in # put time dimension back in his place and normalize y = y.transpose(0, time_dim) / count return y
# """ # Copyright 2019 Johns Hopkins University (Author: Jesus Villalba) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) # """ # import math # import torch # def eval_nnet_by_chunks(x, nnet, chunk_length=0, device=None, time_dim=-1): # # model_device = next(nnet.parameters()).device # # print(device, model_device, x.device) # #assume time is the last dimension # T = x.shape[time_dim] # if T <= chunk_length or chunk_length == 0: # if device is not None: # x = x.to(device) # return nnet(x) #.detach() # try: # left_context, right_context = nnet.in_context() # except: # left_context = right_context = 0 # in_shape = x.shape # chunk_shift_in = chunk_length - left_context - right_context # try: # out_shape = nnet.out_shape(in_shape) # T_out = out_shape[time_dim] # r = float(T_out)/T # except: # out_shape = None # num_chunks = int(math.ceil((T-chunk_length)/chunk_shift_in+1)) # #move time dimension to dim 0 # x = x.transpose(0, time_dim) # y = None # tbeg_in = 0 # tbeg_out = 0 # for i in range(num_chunks): # tend_in = min(tbeg_in + chunk_length, x.shape[0]) # #get slice and move back time dimension to last dim # x_i = x[tbeg_in:tend_in].transpose(0, time_dim) # if device is not None: # x_i = x_i.to(device) # y_i = nnet(x_i).detach() # chunk_length_out = y_i.shape[time_dim] # if out_shape is None: # # infer chunk_shift in the output # r = float(chunk_length_out)/chunk_length # # infer total output length # T_out = int(r * T) # out_shape = list(y_i.shape) # out_shape[time_dim] = T_out # if y is None: # right_context_out = int(math.floor(r*right_context)) # left_context_out = int(math.floor(r*left_context)) # chunk_shift_out = chunk_length_out - right_context_out - left_context_out # # create output tensor # y = torch.zeros(out_shape) # #move time dimension to dim 0 # y = y.transpose(0, time_dim) # y_i = y_i.transpose(0, time_dim) # if i == 0: # tend_out = min(tbeg_out + chunk_length_out, T_out) # y[tbeg_out:tend_out] = y_i # tbeg_out =+ (chunk_length_out - right_context_out) # else: # tend_out = min(int(round(tbeg_out)) + chunk_length_out - left_context_out, T_out) # dt = tend_out - tbeg_out # if dt > 0: # #print('eu', tbeg_out, tend_out, left_context_out,left_context_out+dt, T_out, chunk_length, chunk_length_out, tbeg_in, tend_in) # y[tbeg_out:tend_out] = y_i[left_context_out:left_context_out+dt] # tbeg_out += chunk_shift_out # tbeg_in += chunk_shift_in # # put time dimension back in his place # y = y.transpose(0, time_dim) # return y # def eval_nnet_overlap_add(x, nnet, chunk_length=0, chunk_overlap=None, device=None, time_dim=-1): # #assume time is the last dimension # T = x.shape[time_dim] # if T <= chunk_length or chunk_length == 0: # if device is not None: # x = x.to(device) # return nnet(x).detach() # if chunk_overlap is None: # #infer chunk overlap from network input context # try: # left_context, right_context = nnet.in_context() # except: # left_context = right_context = 0 # chunk_overlap = left_context + right_context # in_shape = x.shape # chunk_shift_in = chunk_length - chunk_overlap # try: # out_shape = nnet.out_shape(in_shape) # T_out = out_shape[time_dim] # r = float(T_out)/T # except: # out_shape = None # num_chunks = int(math.ceil((T-chunk_length)/chunk_shift_in+1)) # #move time dimension to dim 0 # x = x.transpose(0, time_dim) # y = None # N = None # tbeg_in = 0 # tbeg_out = 0 # for i in range(num_chunks): # tend_in = min(tbeg_in + chunk_length, x.shape[0]) # #get slice and move back time dimension to last dim # x_i = x[tbeg_in:tend_in].transpose(0, time_dim) # if device is not None: # x_i = x_i.to(device) # y_i = nnet(x_i).detach() # chunk_length_out = y_i.shape[time_dim] # if out_shape is None: # # infer chunk_shift in the output # r = float(chunk_length_out)/chunk_length # # infer total output length # T_out = int(r * T) # out_shape = list(y_i.shape) # out_shape[time_dim] = T_out # if y is None: # chunk_shift_out = r*chunk_shift_in # # create output tensor # y = torch.zeros(out_shape) # #move time dimension to dim 0 # y = y.transpose(0, time_dim) # count = torch.zeros(T_out) # y_i = y_i.transpose(0, time_dim) # tend_out = min(int(round(tbeg_out)) + chunk_length_out, T_out) # dt = tend_out - tbeg_out # y[tbeg_out:tend_out] += y_i[:dt] # count[tbeg_out:tend_out] += 1 # tbeg_out += chunk_shift_out # tbeg_in += chunk_shift_in # # put time dimension back in his place and normalize # y = y.transpose(0, time_dim)/count # return y