-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsineModel.py
222 lines (206 loc) · 13.9 KB
/
sineModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# functions that implement analysis and synthesis of sounds using the Sinusoidal Model
# (for example usage check the examples models_interface)
import numpy as np
from scipy.signal import blackmanharris, triang
from scipy.fftpack import ifft, fftshift
import math
import dftModel as DFT
import utilFunctions as UF
def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01):
"""
Tracking sinusoids from one frame to the next
pfreq, pmag, pphase: frequencies and magnitude of current frame
tfreq: frequencies of incoming tracks from previous frame
freqDevOffset: minimum frequency deviation at 0Hz
freqDevSlope: slope increase of minimum frequency deviation
returns tfreqn, tmagn, tphasen: frequency, magnitude and phase of tracks
"""
tfreqn = np.zeros(tfreq.size) # initialize array for output frequencies
tmagn = np.zeros(tfreq.size) # initialize array for output magnitudes
tphasen = np.zeros(tfreq.size) # initialize array for output phases
pindexes = np.array(np.nonzero(pfreq), dtype=np.int)[0] # indexes of current peaks
incomingTracks = np.array(np.nonzero(tfreq), dtype=np.int)[0] # indexes of incoming tracks
newTracks = np.zeros(tfreq.size, dtype=np.int) -1 # initialize to -1 new tracks
magOrder = np.argsort(-pmag[pindexes]) # order current peaks by magnitude
pfreqt = np.copy(pfreq) # copy current peaks to temporary array
pmagt = np.copy(pmag) # copy current peaks to temporary array
pphaset = np.copy(pphase) # copy current peaks to temporary array
# continue incoming tracks
if incomingTracks.size > 0: # if incoming tracks exist
for i in magOrder: # iterate over current peaks
if incomingTracks.size == 0: # break when no more incoming tracks
break
track = np.argmin(abs(pfreqt[i] - tfreq[incomingTracks])) # closest incoming track to peak
freqDistance = abs(pfreq[i] - tfreq[incomingTracks[track]]) # measure freq distance
if freqDistance < (freqDevOffset + freqDevSlope * pfreq[i]): # choose track if distance is small
newTracks[incomingTracks[track]] = i # assign peak index to track index
incomingTracks = np.delete(incomingTracks, track) # delete index of track in incomming tracks
indext = np.array(np.nonzero(newTracks != -1), dtype=np.int)[0] # indexes of assigned tracks
if indext.size > 0:
indexp = newTracks[indext] # indexes of assigned peaks
tfreqn[indext] = pfreqt[indexp] # output freq tracks
tmagn[indext] = pmagt[indexp] # output mag tracks
tphasen[indext] = pphaset[indexp] # output phase tracks
pfreqt= np.delete(pfreqt, indexp) # delete used peaks
pmagt= np.delete(pmagt, indexp) # delete used peaks
pphaset= np.delete(pphaset, indexp) # delete used peaks
# create new tracks from non used peaks
emptyt = np.array(np.nonzero(tfreq == 0), dtype=np.int)[0] # indexes of empty incoming tracks
peaksleft = np.argsort(-pmagt) # sort left peaks by magnitude
if ((peaksleft.size > 0) & (emptyt.size >= peaksleft.size)): # fill empty tracks
tfreqn[emptyt[:peaksleft.size]] = pfreqt[peaksleft]
tmagn[emptyt[:peaksleft.size]] = pmagt[peaksleft]
tphasen[emptyt[:peaksleft.size]] = pphaset[peaksleft]
elif ((peaksleft.size > 0) & (emptyt.size < peaksleft.size)): # add more tracks if necessary
tfreqn[emptyt] = pfreqt[peaksleft[:emptyt.size]]
tmagn[emptyt] = pmagt[peaksleft[:emptyt.size]]
tphasen[emptyt] = pphaset[peaksleft[:emptyt.size]]
tfreqn = np.append(tfreqn, pfreqt[peaksleft[emptyt.size:]])
tmagn = np.append(tmagn, pmagt[peaksleft[emptyt.size:]])
tphasen = np.append(tphasen, pphaset[peaksleft[emptyt.size:]])
return tfreqn, tmagn, tphasen
def cleaningSineTracks(tfreq, minTrackLength=3):
"""
Delete short fragments of a collection of sinusoidal tracks
tfreq: frequency of tracks
minTrackLength: minimum duration of tracks in number of frames
returns tfreqn: output frequency of tracks
"""
if tfreq.shape[1] == 0: # if no tracks return input
return tfreq
nFrames = tfreq[:,0].size # number of frames
nTracks = tfreq[0,:].size # number of tracks in a frame
for t in range(nTracks): # iterate over all tracks
trackFreqs = tfreq[:,t] # frequencies of one track
trackBegs = np.nonzero((trackFreqs[:nFrames-1] <= 0) # begining of track contours
& (trackFreqs[1:]>0))[0] + 1
if trackFreqs[0]>0:
trackBegs = np.insert(trackBegs, 0, 0)
trackEnds = np.nonzero((trackFreqs[:nFrames-1] > 0) # end of track contours
& (trackFreqs[1:] <=0))[0] + 1
if trackFreqs[nFrames-1]>0:
trackEnds = np.append(trackEnds, nFrames-1)
trackLengths = 1 + trackEnds - trackBegs # lengths of trach contours
for i,j in zip(trackBegs, trackLengths): # delete short track contours
if j <= minTrackLength:
trackFreqs[i:i+j] = 0
return tfreq
def sineModel(x, fs, w, N, t):
"""
Analysis/synthesis of a sound using the sinusoidal model, without sine tracking
x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB
returns y: output array sound
"""
hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding
hM2 = int(math.floor(w.size/2)) # half analysis window size by floor
Ns = 512 # FFT size for synthesis (even)
H = Ns//4 # Hop size used for analysis and synthesis
hNs = Ns//2 # half of synthesis FFT size
pin = max(hNs, hM1) # init sound pointer in middle of anal window
pend = x.size - max(hNs, hM1) # last sample to start a frame
fftbuffer = np.zeros(N) # initialize buffer for FFT
yw = np.zeros(Ns) # initialize output sound frame
y = np.zeros(x.size) # initialize output array
w = w / sum(w) # normalize analysis window
sw = np.zeros(Ns) # initialize synthesis window
ow = triang(2*H) # triangular window
sw[hNs-H:hNs+H] = ow # add triangular window
bh = blackmanharris(Ns) # blackmanharris window
bh = bh / sum(bh) # normalized blackmanharris window
sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window
while pin<pend: # while input sound pointer is within sound
#-----analysis-----
x1 = x[pin-hM1:pin+hM2] # select frame
mX, pX = DFT.dftAnal(x1, w, N) # compute dft
ploc = UF.peakDetection(mX, t) # detect locations of peaks
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation
ipfreq = fs*iploc/float(N) # convert peak locations to Hertz
#-----synthesis-----
Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum
fftbuffer = np.real(ifft(Y)) # compute inverse FFT
yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window
yw[hNs-1:] = fftbuffer[:hNs+1]
y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window
pin += H # advance sound pointer
return y
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01):
"""
Analysis of a sound using the sinusoidal model with sine tracking
x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB
maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds
freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation
returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks
"""
if (minSineDur <0): # raise error if minSineDur is smaller than 0
raise ValueError("Minimum duration of sine tracks smaller than 0")
hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding
hM2 = int(math.floor(w.size/2)) # half analysis window size by floor
x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0
x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample
pin = hM1 # initialize sound pointer in middle of analysis window
pend = x.size - hM1 # last sample to start a frame
w = w / sum(w) # normalize analysis window
tfreq = np.array([])
while pin<pend: # while input sound pointer is within sound
x1 = x[pin-hM1:pin+hM2] # select frame
mX, pX = DFT.dftAnal(x1, w, N) # compute dft
ploc = UF.peakDetection(mX, t) # detect locations of peaks
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation
ipfreq = fs*iploc/float(N) # convert peak locations to Hertz
# perform sinusoidal tracking by adding peaks to trajectories
tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope)
tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines
tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines
tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines
jtfreq = np.zeros(maxnSines) # temporary output array
jtmag = np.zeros(maxnSines) # temporary output array
jtphase = np.zeros(maxnSines) # temporary output array
jtfreq[:tfreq.size]=tfreq # save track frequencies to temporary array
jtmag[:tmag.size]=tmag # save track magnitudes to temporary array
jtphase[:tphase.size]=tphase # save track magnitudes to temporary array
if pin == hM1: # if first frame initialize output sine tracks
xtfreq = jtfreq
xtmag = jtmag
xtphase = jtphase
else: # rest of frames append values to sine tracks
xtfreq = np.vstack((xtfreq, jtfreq))
xtmag = np.vstack((xtmag, jtmag))
xtphase = np.vstack((xtphase, jtphase))
pin += H
# delete sine tracks shorter than minSineDur
xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H))
return xtfreq, xtmag, xtphase
def sineModelSynth(tfreq, tmag, tphase, N, H, fs):
"""
Synthesis of a sound using the sinusoidal model
tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids
N: synthesis FFT size, H: hop size, fs: sampling rate
returns y: output array sound
"""
hN = N//2 # half of FFT size for synthesis
L = tfreq.shape[0] # number of frames
pout = 0 # initialize output sound pointer
ysize = H*(L+3) # output sound size
y = np.zeros(ysize) # initialize output array
sw = np.zeros(N) # initialize synthesis window
ow = triang(2*H) # triangular window
sw[hN-H:hN+H] = ow # add triangular window
bh = blackmanharris(N) # blackmanharris window
bh = bh / sum(bh) # normalized blackmanharris window
sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H] # normalized synthesis window
lastytfreq = tfreq[0,:] # initialize synthesis frequencies
ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size) # initialize synthesis phases
for l in range(L): # iterate over all frames
if (tphase.size > 0): # if no phases generate them
ytphase = tphase[l,:]
else:
ytphase += (np.pi*(lastytfreq+tfreq[l,:])/fs)*H # propagate phases
Y = UF.genSpecSines(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum
lastytfreq = tfreq[l,:] # save frequency for phase propagation
ytphase = ytphase % (2*np.pi) # make phase inside 2*pi
yw = np.real(fftshift(ifft(Y))) # compute inverse FFT
y[pout:pout+N] += sw*yw # overlap-add and apply a synthesis window
pout += H # advance sound pointer
y = np.delete(y, range(hN)) # delete half of first window
y = np.delete(y, range(y.size-hN, y.size)) # delete half of the last window
return y