|
1 | 1 | import os
|
2 |
| -import unittest |
3 | 2 |
|
4 |
| -from tests import get_tests_input_path, get_tests_output_path, get_tests_path |
| 3 | +import pytest |
| 4 | + |
| 5 | +from tests import get_tests_input_path |
5 | 6 | from TTS.config import BaseAudioConfig
|
6 | 7 | from TTS.utils.audio.processor import AudioProcessor
|
7 | 8 |
|
8 |
| -TESTS_PATH = get_tests_path() |
9 |
| -OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") |
10 | 9 | WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
|
11 | 10 |
|
12 |
| -os.makedirs(OUT_PATH, exist_ok=True) |
13 | 11 | conf = BaseAudioConfig(mel_fmax=8000, pitch_fmax=640, pitch_fmin=1)
|
14 | 12 |
|
15 | 13 |
|
16 |
| -# pylint: disable=protected-access |
17 |
| -class TestAudio(unittest.TestCase): |
18 |
| - def __init__(self, *args, **kwargs): |
19 |
| - super().__init__(*args, **kwargs) |
20 |
| - self.ap = AudioProcessor(**conf) |
21 |
| - |
22 |
| - def test_audio_synthesis(self): |
23 |
| - """1. load wav |
24 |
| - 2. set normalization parameters |
25 |
| - 3. extract mel-spec |
26 |
| - 4. invert to wav and save the output |
27 |
| - """ |
28 |
| - print(" > Sanity check for the process wav -> mel -> wav") |
29 |
| - |
30 |
| - def _test(max_norm, signal_norm, symmetric_norm, clip_norm): |
31 |
| - self.ap.max_norm = max_norm |
32 |
| - self.ap.signal_norm = signal_norm |
33 |
| - self.ap.symmetric_norm = symmetric_norm |
34 |
| - self.ap.clip_norm = clip_norm |
35 |
| - wav = self.ap.load_wav(WAV_FILE) |
36 |
| - mel = self.ap.melspectrogram(wav) |
37 |
| - wav_ = self.ap.inv_melspectrogram(mel) |
38 |
| - file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav".format( |
39 |
| - max_norm, signal_norm, symmetric_norm, clip_norm |
40 |
| - ) |
41 |
| - print(" | > Creating wav file at : ", file_name) |
42 |
| - self.ap.save_wav(wav_, OUT_PATH + file_name) |
43 |
| - |
44 |
| - # maxnorm = 1.0 |
45 |
| - _test(1.0, False, False, False) |
46 |
| - _test(1.0, True, False, False) |
47 |
| - _test(1.0, True, True, False) |
48 |
| - _test(1.0, True, False, True) |
49 |
| - _test(1.0, True, True, True) |
50 |
| - # maxnorm = 4.0 |
51 |
| - _test(4.0, False, False, False) |
52 |
| - _test(4.0, True, False, False) |
53 |
| - _test(4.0, True, True, False) |
54 |
| - _test(4.0, True, False, True) |
55 |
| - _test(4.0, True, True, True) |
56 |
| - |
57 |
| - def test_normalize(self): |
58 |
| - """Check normalization and denormalization for range values and consistency""" |
59 |
| - print(" > Testing normalization and denormalization.") |
60 |
| - wav = self.ap.load_wav(WAV_FILE) |
61 |
| - wav = self.ap.sound_norm(wav) # normalize audio to get abetter normalization range below. |
62 |
| - self.ap.signal_norm = False |
63 |
| - x = self.ap.melspectrogram(wav) |
64 |
| - x_old = x |
65 |
| - |
66 |
| - self.ap.signal_norm = True |
67 |
| - self.ap.symmetric_norm = False |
68 |
| - self.ap.clip_norm = False |
69 |
| - self.ap.max_norm = 4.0 |
70 |
| - x_norm = self.ap.normalize(x) |
71 |
| - print( |
72 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
73 |
| - ) |
74 |
| - assert (x_old - x).sum() == 0 |
75 |
| - # check value range |
76 |
| - assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() |
77 |
| - assert x_norm.min() >= 0 - 1, x_norm.min() |
78 |
| - # check denorm. |
79 |
| - x_ = self.ap.denormalize(x_norm) |
80 |
| - assert (x - x_).sum() < 1e-3, (x - x_).mean() |
81 |
| - |
82 |
| - self.ap.signal_norm = True |
83 |
| - self.ap.symmetric_norm = False |
84 |
| - self.ap.clip_norm = True |
85 |
| - self.ap.max_norm = 4.0 |
86 |
| - x_norm = self.ap.normalize(x) |
87 |
| - print( |
88 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
89 |
| - ) |
90 |
| - |
91 |
| - assert (x_old - x).sum() == 0 |
92 |
| - # check value range |
93 |
| - assert x_norm.max() <= self.ap.max_norm, x_norm.max() |
94 |
| - assert x_norm.min() >= 0, x_norm.min() |
95 |
| - # check denorm. |
96 |
| - x_ = self.ap.denormalize(x_norm) |
97 |
| - assert (x - x_).sum() < 1e-3, (x - x_).mean() |
98 |
| - |
99 |
| - self.ap.signal_norm = True |
100 |
| - self.ap.symmetric_norm = True |
101 |
| - self.ap.clip_norm = False |
102 |
| - self.ap.max_norm = 4.0 |
103 |
| - x_norm = self.ap.normalize(x) |
104 |
| - print( |
105 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
106 |
| - ) |
107 |
| - |
108 |
| - assert (x_old - x).sum() == 0 |
109 |
| - # check value range |
110 |
| - assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() |
111 |
| - assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min() # pylint: disable=invalid-unary-operand-type |
112 |
| - assert x_norm.min() <= 0, x_norm.min() |
113 |
| - # check denorm. |
114 |
| - x_ = self.ap.denormalize(x_norm) |
115 |
| - assert (x - x_).sum() < 1e-3, (x - x_).mean() |
116 |
| - |
117 |
| - self.ap.signal_norm = True |
118 |
| - self.ap.symmetric_norm = True |
119 |
| - self.ap.clip_norm = True |
120 |
| - self.ap.max_norm = 4.0 |
121 |
| - x_norm = self.ap.normalize(x) |
122 |
| - print( |
123 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
124 |
| - ) |
125 |
| - |
126 |
| - assert (x_old - x).sum() == 0 |
127 |
| - # check value range |
128 |
| - assert x_norm.max() <= self.ap.max_norm, x_norm.max() |
129 |
| - assert x_norm.min() >= -self.ap.max_norm, x_norm.min() # pylint: disable=invalid-unary-operand-type |
130 |
| - assert x_norm.min() <= 0, x_norm.min() |
131 |
| - # check denorm. |
132 |
| - x_ = self.ap.denormalize(x_norm) |
133 |
| - assert (x - x_).sum() < 1e-3, (x - x_).mean() |
134 |
| - |
135 |
| - self.ap.signal_norm = True |
136 |
| - self.ap.symmetric_norm = False |
137 |
| - self.ap.max_norm = 1.0 |
138 |
| - x_norm = self.ap.normalize(x) |
139 |
| - print( |
140 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
141 |
| - ) |
142 |
| - |
143 |
| - assert (x_old - x).sum() == 0 |
144 |
| - assert x_norm.max() <= self.ap.max_norm, x_norm.max() |
145 |
| - assert x_norm.min() >= 0, x_norm.min() |
146 |
| - x_ = self.ap.denormalize(x_norm) |
147 |
| - assert (x - x_).sum() < 1e-3 |
148 |
| - |
149 |
| - self.ap.signal_norm = True |
150 |
| - self.ap.symmetric_norm = True |
151 |
| - self.ap.max_norm = 1.0 |
152 |
| - x_norm = self.ap.normalize(x) |
153 |
| - print( |
154 |
| - f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
155 |
| - ) |
156 |
| - |
157 |
| - assert (x_old - x).sum() == 0 |
158 |
| - assert x_norm.max() <= self.ap.max_norm, x_norm.max() |
159 |
| - assert x_norm.min() >= -self.ap.max_norm, x_norm.min() # pylint: disable=invalid-unary-operand-type |
160 |
| - assert x_norm.min() < 0, x_norm.min() |
161 |
| - x_ = self.ap.denormalize(x_norm) |
162 |
| - assert (x - x_).sum() < 1e-3 |
163 |
| - |
164 |
| - def test_scaler(self): |
165 |
| - scaler_stats_path = os.path.join(get_tests_input_path(), "scale_stats.npy") |
166 |
| - conf.stats_path = scaler_stats_path |
167 |
| - conf.preemphasis = 0.0 |
168 |
| - conf.do_trim_silence = True |
169 |
| - conf.signal_norm = True |
170 |
| - |
171 |
| - ap = AudioProcessor(**conf) |
172 |
| - mel_mean, mel_std, linear_mean, linear_std, _ = ap.load_stats(scaler_stats_path) |
173 |
| - ap.setup_scaler(mel_mean, mel_std, linear_mean, linear_std) |
174 |
| - |
175 |
| - self.ap.signal_norm = False |
176 |
| - self.ap.preemphasis = 0.0 |
177 |
| - |
178 |
| - # test scaler forward and backward transforms |
179 |
| - wav = self.ap.load_wav(WAV_FILE) |
180 |
| - mel_reference = self.ap.melspectrogram(wav) |
181 |
| - mel_norm = ap.melspectrogram(wav) |
182 |
| - mel_denorm = ap.denormalize(mel_norm) |
183 |
| - assert abs(mel_reference - mel_denorm).max() < 1e-4 |
184 |
| - |
185 |
| - def test_compute_f0(self): # pylint: disable=no-self-use |
186 |
| - ap = AudioProcessor(**conf) |
187 |
| - wav = ap.load_wav(WAV_FILE) |
188 |
| - pitch = ap.compute_f0(wav) |
189 |
| - mel = ap.melspectrogram(wav) |
190 |
| - assert pitch.shape[0] == mel.shape[1] |
| 14 | +@pytest.fixture |
| 15 | +def ap(): |
| 16 | + """Set up audio processor.""" |
| 17 | + return AudioProcessor(**conf) |
| 18 | + |
| 19 | + |
| 20 | +norms = [ |
| 21 | + # maxnorm = 1.0 |
| 22 | + (1.0, False, False, False), |
| 23 | + (1.0, True, False, False), |
| 24 | + (1.0, True, True, False), |
| 25 | + (1.0, True, False, True), |
| 26 | + (1.0, True, True, True), |
| 27 | + # maxnorm = 4.0 |
| 28 | + (4.0, False, False, False), |
| 29 | + (4.0, True, False, False), |
| 30 | + (4.0, True, True, False), |
| 31 | + (4.0, True, False, True), |
| 32 | + (4.0, True, True, True), |
| 33 | +] |
| 34 | + |
| 35 | + |
| 36 | +@pytest.mark.parametrize("norms", norms) |
| 37 | +def test_audio_synthesis(tmp_path, ap, norms): |
| 38 | + """1. load wav |
| 39 | + 2. set normalization parameters |
| 40 | + 3. extract mel-spec |
| 41 | + 4. invert to wav and save the output |
| 42 | + """ |
| 43 | + print(" > Sanity check for the process wav -> mel -> wav") |
| 44 | + max_norm, signal_norm, symmetric_norm, clip_norm = norms |
| 45 | + ap.max_norm = max_norm |
| 46 | + ap.signal_norm = signal_norm |
| 47 | + ap.symmetric_norm = symmetric_norm |
| 48 | + ap.clip_norm = clip_norm |
| 49 | + wav = ap.load_wav(WAV_FILE) |
| 50 | + mel = ap.melspectrogram(wav) |
| 51 | + wav_ = ap.inv_melspectrogram(mel) |
| 52 | + file_name = ( |
| 53 | + f"audio_test-melspec_max_norm_{max_norm}-signal_norm_{signal_norm}-" |
| 54 | + f"symmetric_{symmetric_norm}-clip_norm_{clip_norm}.wav" |
| 55 | + ) |
| 56 | + print(" | > Creating wav file at : ", file_name) |
| 57 | + ap.save_wav(wav_, tmp_path / file_name) |
| 58 | + |
| 59 | + |
| 60 | +def test_normalize(ap): |
| 61 | + """Check normalization and denormalization for range values and consistency""" |
| 62 | + print(" > Testing normalization and denormalization.") |
| 63 | + wav = ap.load_wav(WAV_FILE) |
| 64 | + wav = ap.sound_norm(wav) # normalize audio to get abetter normalization range below. |
| 65 | + ap.signal_norm = False |
| 66 | + x = ap.melspectrogram(wav) |
| 67 | + x_old = x |
| 68 | + |
| 69 | + ap.signal_norm = True |
| 70 | + ap.symmetric_norm = False |
| 71 | + ap.clip_norm = False |
| 72 | + ap.max_norm = 4.0 |
| 73 | + x_norm = ap.normalize(x) |
| 74 | + print( |
| 75 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 76 | + ) |
| 77 | + assert (x_old - x).sum() == 0 |
| 78 | + # check value range |
| 79 | + assert x_norm.max() <= ap.max_norm + 1, x_norm.max() |
| 80 | + assert x_norm.min() >= 0 - 1, x_norm.min() |
| 81 | + # check denorm. |
| 82 | + x_ = ap.denormalize(x_norm) |
| 83 | + assert (x - x_).sum() < 1e-3, (x - x_).mean() |
| 84 | + |
| 85 | + ap.signal_norm = True |
| 86 | + ap.symmetric_norm = False |
| 87 | + ap.clip_norm = True |
| 88 | + ap.max_norm = 4.0 |
| 89 | + x_norm = ap.normalize(x) |
| 90 | + print( |
| 91 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 92 | + ) |
| 93 | + |
| 94 | + assert (x_old - x).sum() == 0 |
| 95 | + # check value range |
| 96 | + assert x_norm.max() <= ap.max_norm, x_norm.max() |
| 97 | + assert x_norm.min() >= 0, x_norm.min() |
| 98 | + # check denorm. |
| 99 | + x_ = ap.denormalize(x_norm) |
| 100 | + assert (x - x_).sum() < 1e-3, (x - x_).mean() |
| 101 | + |
| 102 | + ap.signal_norm = True |
| 103 | + ap.symmetric_norm = True |
| 104 | + ap.clip_norm = False |
| 105 | + ap.max_norm = 4.0 |
| 106 | + x_norm = ap.normalize(x) |
| 107 | + print( |
| 108 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 109 | + ) |
| 110 | + |
| 111 | + assert (x_old - x).sum() == 0 |
| 112 | + # check value range |
| 113 | + assert x_norm.max() <= ap.max_norm + 1, x_norm.max() |
| 114 | + assert x_norm.min() >= -ap.max_norm - 2, x_norm.min() # pylint: disable=invalid-unary-operand-type |
| 115 | + assert x_norm.min() <= 0, x_norm.min() |
| 116 | + # check denorm. |
| 117 | + x_ = ap.denormalize(x_norm) |
| 118 | + assert (x - x_).sum() < 1e-3, (x - x_).mean() |
| 119 | + |
| 120 | + ap.signal_norm = True |
| 121 | + ap.symmetric_norm = True |
| 122 | + ap.clip_norm = True |
| 123 | + ap.max_norm = 4.0 |
| 124 | + x_norm = ap.normalize(x) |
| 125 | + print( |
| 126 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 127 | + ) |
| 128 | + |
| 129 | + assert (x_old - x).sum() == 0 |
| 130 | + # check value range |
| 131 | + assert x_norm.max() <= ap.max_norm, x_norm.max() |
| 132 | + assert x_norm.min() >= -ap.max_norm, x_norm.min() # pylint: disable=invalid-unary-operand-type |
| 133 | + assert x_norm.min() <= 0, x_norm.min() |
| 134 | + # check denorm. |
| 135 | + x_ = ap.denormalize(x_norm) |
| 136 | + assert (x - x_).sum() < 1e-3, (x - x_).mean() |
| 137 | + |
| 138 | + ap.signal_norm = True |
| 139 | + ap.symmetric_norm = False |
| 140 | + ap.max_norm = 1.0 |
| 141 | + x_norm = ap.normalize(x) |
| 142 | + print( |
| 143 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 144 | + ) |
| 145 | + |
| 146 | + assert (x_old - x).sum() == 0 |
| 147 | + assert x_norm.max() <= ap.max_norm, x_norm.max() |
| 148 | + assert x_norm.min() >= 0, x_norm.min() |
| 149 | + x_ = ap.denormalize(x_norm) |
| 150 | + assert (x - x_).sum() < 1e-3 |
| 151 | + |
| 152 | + ap.signal_norm = True |
| 153 | + ap.symmetric_norm = True |
| 154 | + ap.max_norm = 1.0 |
| 155 | + x_norm = ap.normalize(x) |
| 156 | + print( |
| 157 | + f" > MaxNorm: {ap.max_norm}, ClipNorm:{ap.clip_norm}, SymmetricNorm:{ap.symmetric_norm}, SignalNorm:{ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}" |
| 158 | + ) |
| 159 | + |
| 160 | + assert (x_old - x).sum() == 0 |
| 161 | + assert x_norm.max() <= ap.max_norm, x_norm.max() |
| 162 | + assert x_norm.min() >= -ap.max_norm, x_norm.min() # pylint: disable=invalid-unary-operand-type |
| 163 | + assert x_norm.min() < 0, x_norm.min() |
| 164 | + x_ = ap.denormalize(x_norm) |
| 165 | + assert (x - x_).sum() < 1e-3 |
| 166 | + |
| 167 | + |
| 168 | +def test_scaler(ap): |
| 169 | + scaler_stats_path = os.path.join(get_tests_input_path(), "scale_stats.npy") |
| 170 | + conf.stats_path = scaler_stats_path |
| 171 | + conf.preemphasis = 0.0 |
| 172 | + conf.do_trim_silence = True |
| 173 | + conf.signal_norm = True |
| 174 | + |
| 175 | + ap = AudioProcessor(**conf) |
| 176 | + mel_mean, mel_std, linear_mean, linear_std, _ = ap.load_stats(scaler_stats_path) |
| 177 | + ap.setup_scaler(mel_mean, mel_std, linear_mean, linear_std) |
| 178 | + |
| 179 | + ap.signal_norm = False |
| 180 | + ap.preemphasis = 0.0 |
| 181 | + |
| 182 | + # test scaler forward and backward transforms |
| 183 | + wav = ap.load_wav(WAV_FILE) |
| 184 | + mel_reference = ap.melspectrogram(wav) |
| 185 | + mel_norm = ap.melspectrogram(wav) |
| 186 | + mel_denorm = ap.denormalize(mel_norm) |
| 187 | + assert abs(mel_reference - mel_denorm).max() < 1e-4 |
| 188 | + |
| 189 | + |
| 190 | +def test_compute_f0(ap): |
| 191 | + wav = ap.load_wav(WAV_FILE) |
| 192 | + pitch = ap.compute_f0(wav) |
| 193 | + mel = ap.melspectrogram(wav) |
| 194 | + assert pitch.shape[0] == mel.shape[1] |
0 commit comments