Skip to content

Format code with black #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions audio_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
import librosa.util as librosa_util


def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
n_fft=800, dtype=np.float32, norm=None):
def window_sumsquare(
window,
n_frames,
hop_length=200,
win_length=800,
n_fft=800,
dtype=np.float32,
norm=None,
):
"""
# from librosa 0.6
Compute the sum-square envelope of a window function at a given hop length.
Expand Down Expand Up @@ -46,13 +53,13 @@ def window_sumsquare(window, n_frames, hop_length=200, win_length=800,

# Compute the squared window at the desired length
win_sq = get_window(window, win_length, fftbins=True)
win_sq = librosa_util.normalize(win_sq, norm=norm)**2
win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2
win_sq = librosa_util.pad_center(win_sq, n_fft)

# Fill the envelope
for i in range(n_frames):
sample = i * hop_length
x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
return x


Expand Down
70 changes: 48 additions & 22 deletions data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class TextMelLoader(torch.utils.data.Dataset):
2) normalizes text and converts them to sequences of one-hot vectors
3) computes mel-spectrograms from audio files.
"""

def __init__(self, audiopaths_and_text, hparams):
self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text)
self.text_cleaners = hparams.text_cleaners
Expand All @@ -23,41 +24,56 @@ def __init__(self, audiopaths_and_text, hparams):
self.audio_dtype = hparams.audio_dtype
self.use_librosa = hparams.use_librosa
self.stft = layers.TacotronSTFT(
hparams.filter_length, hparams.hop_length, hparams.win_length,
hparams.n_mel_channels, hparams.sampling_rate, hparams.mel_fmin,
hparams.mel_fmax)
hparams.filter_length,
hparams.hop_length,
hparams.win_length,
hparams.n_mel_channels,
hparams.sampling_rate,
hparams.mel_fmin,
hparams.mel_fmax,
)
random.seed(hparams.seed)
random.shuffle(self.audiopaths_and_text)

def get_mel_text_pair(self, audiopath_and_text):
# separate filename and text
audiopath, text = audiopath_and_text[0], audiopath_and_text[1]
speaker, lang = int(float(audiopath_and_text[2])), int(float(audiopath_and_text[3]))
speaker, lang = (
int(float(audiopath_and_text[2])),
int(float(audiopath_and_text[3])),
)
text = self.get_text(text)
mel = self.get_mel(audiopath)
return (text, mel, speaker, lang)

def get_mel(self, filename):
if not self.load_mel_from_disk:
audio, sampling_rate = load_wav_to_torch(filename, self.use_librosa, self.audio_dtype, self.sampling_rate)
audio, sampling_rate = load_wav_to_torch(
filename, self.use_librosa, self.audio_dtype, self.sampling_rate
)
if sampling_rate != self.stft.sampling_rate:
raise ValueError("{} SR doesn't match target {} SR".format(
sampling_rate, self.stft.sampling_rate))
raise ValueError(
"{} SR doesn't match target {} SR".format(
sampling_rate, self.stft.sampling_rate
)
)
audio_norm = audio / self.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
melspec = self.stft.mel_spectrogram(audio_norm)
melspec = torch.squeeze(melspec, 0)
else:
melspec = torch.from_numpy(np.load(filename))
assert melspec.size(0) == self.stft.n_mel_channels, (
'Mel dimension mismatch: given {}, expected {}'.format(
melspec.size(0), self.stft.n_mel_channels))
assert (
melspec.size(0) == self.stft.n_mel_channels
), "Mel dimension mismatch: given {}, expected {}".format(
melspec.size(0), self.stft.n_mel_channels
)

return melspec

def get_text(self, text):
text = '*'+text+'`'
text = "*" + text + "`"
text_norm = torch.IntTensor(text_to_sequence(text, self.text_cleaners))
return text_norm

Expand All @@ -68,9 +84,10 @@ def __len__(self):
return len(self.audiopaths_and_text)


class TextMelCollate():
class TextMelCollate:
""" Zero-pads model inputs and targets based on number of frames per setep
"""

def __init__(self, n_frames_per_step):
self.n_frames_per_step = n_frames_per_step

Expand All @@ -80,27 +97,29 @@ def __call__(self, batch):
------
batch: [text_normalized, mel_normalized]
"""

speakers = torch.tensor([batch[i][2] for i in range(len(batch))])
langs = torch.tensor([batch[i][3] for i in range(len(batch))])

# Right zero-pad all one-hot text sequences to max input length
input_lengths, ids_sorted_decreasing = torch.sort(
torch.LongTensor([len(x[0]) for x in batch]),
dim=0, descending=True)
torch.LongTensor([len(x[0]) for x in batch]), dim=0, descending=True
)
max_input_len = input_lengths[0]

text_padded = torch.LongTensor(len(batch), max_input_len)
text_padded.zero_()
for i in range(len(ids_sorted_decreasing)):
text = batch[ids_sorted_decreasing[i]][0]
text_padded[i, :text.size(0)] = text
text_padded[i, : text.size(0)] = text

# Right zero-pad mel-spec
num_mels = batch[0][1].size(0)
max_target_len = max([x[1].size(1) for x in batch])
if max_target_len % self.n_frames_per_step != 0:
max_target_len += self.n_frames_per_step - max_target_len % self.n_frames_per_step
max_target_len += (
self.n_frames_per_step - max_target_len % self.n_frames_per_step
)
assert max_target_len % self.n_frames_per_step == 0

# include mel padded and gate padded
Expand All @@ -111,9 +130,16 @@ def __call__(self, batch):
output_lengths = torch.LongTensor(len(batch))
for i in range(len(ids_sorted_decreasing)):
mel = batch[ids_sorted_decreasing[i]][1]
mel_padded[i, :, :mel.size(1)] = mel
gate_padded[i, mel.size(1)-1:] = 1
mel_padded[i, :, : mel.size(1)] = mel
gate_padded[i, mel.size(1) - 1 :] = 1
output_lengths[i] = mel.size(1)

return text_padded, input_lengths, mel_padded, gate_padded, \
output_lengths, speakers, langs
return (
text_padded,
input_lengths,
mel_padded,
gate_padded,
output_lengths,
speakers,
langs,
)
141 changes: 81 additions & 60 deletions distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from torch.nn.modules import Module
from torch.autograd import Variable


def _flatten_dense_tensors(tensors):
"""Flatten dense tensors into a contiguous 1D buffer. Assume tensors are of
same dense type.
Expand All @@ -19,6 +20,7 @@ def _flatten_dense_tensors(tensors):
flat = torch.cat([t.contiguous().view(-1) for t in tensors], dim=0)
return flat


def _unflatten_dense_tensors(flat, tensors):
"""View a flat buffer using the sizes of tensors. Assume that tensors are of
same dense type, and that flat is given by _flatten_dense_tensors.
Expand All @@ -39,24 +41,27 @@ def _unflatten_dense_tensors(flat, tensors):
return tuple(outputs)


'''
"""
This version of DistributedDataParallel is designed to be used in conjunction with the multiproc.py
launcher included with this example. It assumes that your run is using multiprocess with 1
GPU/process, that the model is on the correct device, and that torch.set_device has been
used to set the device.

Parameters are broadcasted to the other processes on initialization of DistributedDataParallel,
and will be allreduced at the finish of the backward pass.
'''
class DistributedDataParallel(Module):
"""


class DistributedDataParallel(Module):
def __init__(self, module):
super(DistributedDataParallel, self).__init__()
#fallback for PyTorch 0.3
if not hasattr(dist, '_backend'):
# fallback for PyTorch 0.3
if not hasattr(dist, "_backend"):
self.warn_on_half = True
else:
self.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False
self.warn_on_half = (
True if dist._backend == dist.dist_backend.GLOO else False
)

self.module = module

Expand All @@ -66,7 +71,7 @@ def __init__(self, module):
dist.broadcast(p, 0)

def allreduce_params():
if(self.needs_reduction):
if self.needs_reduction:
self.needs_reduction = False
buckets = {}
for param in self.module.parameters():
Expand All @@ -77,9 +82,11 @@ def allreduce_params():
buckets[tp].append(param)
if self.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
" It is recommended to use the NCCL backend in this case. This currently requires" +
"PyTorch built from top of tree master.")
print(
"WARNING: gloo dist backend for half parameters may be extremely slow."
+ " It is recommended to use the NCCL backend in this case. This currently requires"
+ "PyTorch built from top of tree master."
)
self.warn_on_half = False

for tp in buckets:
Expand All @@ -88,20 +95,24 @@ def allreduce_params():
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
for buf, synced in zip(
grads, _unflatten_dense_tensors(coalesced, grads)
):
buf.copy_(synced)

for param in list(self.module.parameters()):

def allreduce_hook(*unused):
param._execution_engine.queue_callback(allreduce_params)

if param.requires_grad:
param.register_hook(allreduce_hook)

def forward(self, *inputs, **kwargs):
self.needs_reduction = True
return self.module(*inputs, **kwargs)

'''
"""
def _sync_buffers(self):
buffers = list(self.module._all_buffers())
if len(buffers) > 0:
Expand All @@ -118,56 +129,66 @@ def train(self, mode=True):
dist._clear_group_cache()
super(DistributedDataParallel, self).train(mode)
self.module.train(mode)
'''
'''
Modifies existing model to do gradient allreduce, but doesn't change class
so you don't need "module"
'''
def apply_gradient_allreduce(module):
if not hasattr(dist, '_backend'):
module.warn_on_half = True
else:
module.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False

for p in module.state_dict().values():
if not torch.is_tensor(p):
continue
dist.broadcast(p, 0)
"""

def allreduce_params():
if(module.needs_reduction):
module.needs_reduction = False
buckets = {}
for param in module.parameters():
if param.requires_grad and param.grad is not None:
tp = param.data.dtype
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
if module.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
" It is recommended to use the NCCL backend in this case. This currently requires" +
"PyTorch built from top of tree master.")
module.warn_on_half = False

for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
"""
Modifies existing model to do gradient allreduce, but doesn't change class
so you don't need "module"
"""

for param in list(module.parameters()):
def allreduce_hook(*unused):
Variable._execution_engine.queue_callback(allreduce_params)
if param.requires_grad:
param.register_hook(allreduce_hook)

def set_needs_reduction(self, input, output):
self.needs_reduction = True
def apply_gradient_allreduce(module):
if not hasattr(dist, "_backend"):
module.warn_on_half = True
else:
module.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False

for p in module.state_dict().values():
if not torch.is_tensor(p):
continue
dist.broadcast(p, 0)

def allreduce_params():
if module.needs_reduction:
module.needs_reduction = False
buckets = {}
for param in module.parameters():
if param.requires_grad and param.grad is not None:
tp = param.data.dtype
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
if module.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print(
"WARNING: gloo dist backend for half parameters may be extremely slow."
+ " It is recommended to use the NCCL backend in this case. This currently requires"
+ "PyTorch built from top of tree master."
)
module.warn_on_half = False

for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(
grads, _unflatten_dense_tensors(coalesced, grads)
):
buf.copy_(synced)

for param in list(module.parameters()):

def allreduce_hook(*unused):
Variable._execution_engine.queue_callback(allreduce_params)

if param.requires_grad:
param.register_hook(allreduce_hook)

def set_needs_reduction(self, input, output):
self.needs_reduction = True

module.register_forward_hook(set_needs_reduction)
return module
module.register_forward_hook(set_needs_reduction)
return module
Loading