-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathspsModel.py
119 lines (104 loc) · 7.12 KB
/
spsModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# functions that implement analysis and synthesis of sounds using the Sinusoidal plus Stochastic Model
# (for example usage check the models_interface directory)
import numpy as np
from scipy.signal import resample, blackmanharris, triang, hanning
from scipy.fftpack import fft, ifft, fftshift
import math
import utilFunctions as UF
import dftModel as DFT
import sineModel as SM
import stochasticModel as STM
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf):
"""
Analysis of a sound using the sinusoidal plus stochastic model
x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB,
minSineDur: minimum duration of sinusoidal tracks
maxnSines: maximum number of parallel sinusoids
freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
stocf: decimation factor used for the stochastic approximation
returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual
"""
# perform sinusoidal analysis
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
Ns = 512
xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound
stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) # compute stochastic model of residual
return tfreq, tmag, tphase, stocEnv
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs):
"""
Synthesis of a sound using the sinusoidal plus stochastic model
tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
N: synthesis FFT size; H: hop size, fs: sampling rate
returns y: output sound, ys: sinusoidal component, yst: stochastic component
"""
ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids
yst = STM.stochasticModelSynth(stocEnv, H, H*2) # synthesize stochastic residual
y = ys[:min(ys.size, yst.size)]+yst[:min(ys.size, yst.size)] # sum sinusoids and stochastic components
return y, ys, yst
def spsModel(x, fs, w, N, t, stocf):
"""
Analysis/synthesis of a sound using the sinusoidal plus stochastic model
x: input sound, fs: sampling rate, w: analysis window,
N: FFT size (minimum 512), t: threshold in negative dB,
stocf: decimation factor of mag spectrum for stochastic analysis
returns y: output sound, ys: sinusoidal component, yst: stochastic component
"""
hN = N//2 # size of positive spectrum
hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding
hM2 = int(math.floor(w.size/2)) # half analysis window size by floor
Ns = 512 # FFT size for synthesis (even)
H = Ns//4 # Hop size used for analysis and synthesis
hNs = Ns//2
pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window
pend = x.size - max(hNs, hM1) # last sample to start a frame
fftbuffer = np.zeros(N) # initialize buffer for FFT
ysw = np.zeros(Ns) # initialize output sound frame
ystw = np.zeros(Ns) # initialize output sound frame
ys = np.zeros(x.size) # initialize output array
yst = np.zeros(x.size) # initialize output array
w = w / sum(w) # normalize analysis window
sw = np.zeros(Ns)
ow = triang(2*H) # overlapping window
sw[hNs-H:hNs+H] = ow
bh = blackmanharris(Ns) # synthesis window
bh = bh / sum(bh) # normalize synthesis window
wr = bh # window for residual
sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
sws = H*hanning(Ns)/2 # synthesis window for stochastic
while pin<pend:
#-----analysis-----
x1 = x[pin-hM1:pin+hM2] # select frame
mX, pX = DFT.dftAnal(x1, w, N) # compute dft
ploc = UF.peakDetection(mX, t) # find peaks
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values
ipfreq = fs*iploc/float(N) # convert peak locations to Hertz
ri = pin-hNs-1 # input sound pointer for residual analysis
xw2 = x[ri:ri+Ns]*wr # window the input sound
fftbuffer = np.zeros(Ns) # reset buffer
fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer
fftbuffer[hNs:] = xw2[:hNs]
X2 = fft(fftbuffer) # compute FFT for residual analysis
#-----synthesis-----
Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component
Xr = X2-Ys; # get the residual complex spectrum
mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual
mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf
stocEnv = resample(mXrenv, hNs) # interpolate to original size
pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values
Yst = np.zeros(Ns, dtype = complex)
Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq.
Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq.
fftbuffer = np.zeros(Ns)
fftbuffer = np.real(ifft(Ys)) # inverse FFT of harmonic spectrum
ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window
ysw[hNs-1:] = fftbuffer[:hNs+1]
fftbuffer = np.zeros(Ns)
fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum
ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window
ystw[hNs-1:] = fftbuffer[:hNs+1]
ys[ri:ri+Ns] += sw*ysw # overlap-add for sines
yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic
pin += H # advance sound pointer
y = ys+yst # sum of sinusoidal and residual components
return y, ys, yst