Skip to content

Commit 76cb980

Browse files
add param isconstant to ostinato (#864)
* add param isconstant to naive function ostinato * add test func, expecting error * fix performant func and update test func * minor changes * add param isconstant to ostinatoed, also fix converage * fix decorator * fix coverage
1 parent e3b16d3 commit 76cb980

File tree

3 files changed

+143
-18
lines changed

3 files changed

+143
-18
lines changed

stumpy/ostinato.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,14 @@ def _ostinato(
236236
else:
237237
h = 0
238238

239-
mp = partial_mp_func(Ts[j], m, Ts[h], ignore_trivial=False)
239+
mp = partial_mp_func(
240+
Ts[j],
241+
m,
242+
Ts[h],
243+
ignore_trivial=False,
244+
T_A_subseq_isconstant=Ts_subseq_isconstant[j],
245+
T_B_subseq_isconstant=Ts_subseq_isconstant[h],
246+
)
240247
si = np.argsort(mp[:, 0])
241248
for q in si:
242249
radius = mp[q, 0]
@@ -271,8 +278,11 @@ def _ostinato(
271278
return bsf_radius, bsf_Ts_idx, bsf_subseq_idx
272279

273280

274-
@core.non_normalized(aamp_ostinato)
275-
def ostinato(Ts, m, normalize=True, p=2.0):
281+
@core.non_normalized(
282+
aamp_ostinato,
283+
exclude=["normalize", "p", "Ts_subseq_isconstant"],
284+
)
285+
def ostinato(Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
276286
"""
277287
Find the z-normalized consensus motif of multiple time series
278288
@@ -297,6 +307,9 @@ def ostinato(Ts, m, normalize=True, p=2.0):
297307
The p-norm to apply for computing the Minkowski distance. This parameter is
298308
ignored when `normalize == True`.
299309
310+
Ts_subseq_isconstant : list, default None
311+
A list of rolling window isconstant for each time series in `Ts`.
312+
300313
Returns
301314
-------
302315
central_radius : float
@@ -352,9 +365,12 @@ def ostinato(Ts, m, normalize=True, p=2.0):
352365

353366
M_Ts = [None] * len(Ts)
354367
Σ_Ts = [None] * len(Ts)
355-
Ts_subseq_isconstant = [None] * len(Ts)
368+
if Ts_subseq_isconstant is None:
369+
Ts_subseq_isconstant = [None] * len(Ts)
356370
for i, T in enumerate(Ts):
357-
Ts[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(T, m)
371+
Ts[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
372+
T, m, T_subseq_isconstant=Ts_subseq_isconstant[i]
373+
)
358374

359375
bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
360376
Ts, m, M_Ts, Σ_Ts, Ts_subseq_isconstant
@@ -371,8 +387,11 @@ def ostinato(Ts, m, normalize=True, p=2.0):
371387
return central_radius, central_Ts_idx, central_subseq_idx
372388

373389

374-
@core.non_normalized(aamp_ostinatoed)
375-
def ostinatoed(client, Ts, m, normalize=True, p=2.0):
390+
@core.non_normalized(
391+
aamp_ostinatoed,
392+
exclude=["normalize", "p", "Ts_subseq_isconstant"],
393+
)
394+
def ostinatoed(client, Ts, m, normalize=True, p=2.0, Ts_subseq_isconstant=None):
376395
"""
377396
Find the z-normalized consensus motif of multiple time series with a distributed
378397
cluster
@@ -403,6 +422,9 @@ def ostinatoed(client, Ts, m, normalize=True, p=2.0):
403422
The p-norm to apply for computing the Minkowski distance. This parameter is
404423
ignored when `normalize == True`.
405424
425+
Ts_subseq_isconstant : list, default None
426+
A list of rolling window isconstant for each time series in `Ts`.
427+
406428
Returns
407429
-------
408430
central_radius : float
@@ -461,9 +483,13 @@ def ostinatoed(client, Ts, m, normalize=True, p=2.0):
461483

462484
M_Ts = [None] * len(Ts)
463485
Σ_Ts = [None] * len(Ts)
464-
Ts_subseq_isconstant = [None] * len(Ts)
486+
487+
if Ts_subseq_isconstant is None:
488+
Ts_subseq_isconstant = [None] * len(Ts)
465489
for i, T in enumerate(Ts):
466-
Ts[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(T, m)
490+
Ts[i], M_Ts[i], Σ_Ts[i], Ts_subseq_isconstant[i] = core.preprocess(
491+
T, m, T_subseq_isconstant=Ts_subseq_isconstant[i]
492+
)
467493

468494
bsf_radius, bsf_Ts_idx, bsf_subseq_idx = _ostinato(
469495
Ts,

tests/naive.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ def left_I_(self):
10631063
return self._left_I.astype(np.int64)
10641064

10651065

1066-
def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):
1066+
def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m, Ts_subseq_isconstant):
10671067
"""
10681068
For multiple time series find, per individual time series, the subsequences closest
10691069
to a query.
@@ -1085,6 +1085,9 @@ def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):
10851085
m : int
10861086
Subsequence window size
10871087
1088+
Ts_subseq_isconstant : list
1089+
A list of `T_subseq_isconstant`, where the i-th item corresponds to `Ts[i]`
1090+
10881091
Returns
10891092
-------
10901093
nns_radii : ndarray
@@ -1096,19 +1099,33 @@ def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m):
10961099
`Ts[Ts_idx][subseq_idx : subseq_idx + m]`
10971100
"""
10981101
k = len(Ts)
1102+
10991103
Q = Ts[Ts_idx][subseq_idx : subseq_idx + m]
1104+
Q_subseq_isconstant = Ts_subseq_isconstant[Ts_idx][subseq_idx]
1105+
11001106
nns_radii = np.zeros(k, dtype=np.float64)
11011107
nns_subseq_idx = np.zeros(k, dtype=np.int64)
11021108

11031109
for i in range(k):
11041110
dist_profile = distance_profile(Q, Ts[i], len(Q))
1111+
for j in range(len(dist_profile)):
1112+
if np.isfinite(dist_profile[j]):
1113+
if Q_subseq_isconstant and Ts_subseq_isconstant[i][j]:
1114+
dist_profile[j] = 0
1115+
elif Q_subseq_isconstant or Ts_subseq_isconstant[i][j]:
1116+
dist_profile[j] = np.sqrt(m)
1117+
else: # pragma: no cover
1118+
pass
1119+
11051120
nns_subseq_idx[i] = np.argmin(dist_profile)
11061121
nns_radii[i] = dist_profile[nns_subseq_idx[i]]
11071122

11081123
return nns_radii, nns_subseq_idx
11091124

11101125

1111-
def get_central_motif(Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m):
1126+
def get_central_motif(
1127+
Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m, Ts_subseq_isconstant
1128+
):
11121129
"""
11131130
Compare subsequences with the same radius and return the most central motif
11141131
@@ -1129,6 +1146,9 @@ def get_central_motif(Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m):
11291146
m : int
11301147
Window size
11311148
1149+
Ts_subseq_isconstant : list
1150+
A list of boolean arrays, each corresponds to a time series in `Ts`
1151+
11321152
Returns
11331153
-------
11341154
bsf_radius : float
@@ -1142,7 +1162,7 @@ def get_central_motif(Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m):
11421162
series `bsf_Ts_idx` that contains it
11431163
"""
11441164
bsf_nns_radii, bsf_nns_subseq_idx = across_series_nearest_neighbors(
1145-
Ts, bsf_Ts_idx, bsf_subseq_idx, m
1165+
Ts, bsf_Ts_idx, bsf_subseq_idx, m, Ts_subseq_isconstant
11461166
)
11471167
bsf_nns_mean_radii = bsf_nns_radii.mean()
11481168

@@ -1151,7 +1171,7 @@ def get_central_motif(Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m):
11511171

11521172
for Ts_idx, subseq_idx in zip(candidate_nns_Ts_idx, candidate_nns_subseq_idx):
11531173
candidate_nns_radii, _ = across_series_nearest_neighbors(
1154-
Ts, Ts_idx, subseq_idx, m
1174+
Ts, Ts_idx, subseq_idx, m, Ts_subseq_isconstant
11551175
)
11561176
if (
11571177
np.isclose(candidate_nns_radii.max(), bsf_radius)
@@ -1164,7 +1184,7 @@ def get_central_motif(Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m):
11641184
return bsf_radius, bsf_Ts_idx, bsf_subseq_idx
11651185

11661186

1167-
def consensus_search(Ts, m):
1187+
def consensus_search(Ts, m, Ts_subseq_isconstant):
11681188
"""
11691189
Brute force consensus motif from
11701190
<https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>
@@ -1184,7 +1204,13 @@ def consensus_search(Ts, m):
11841204
radii = np.zeros(len(Ts[j]) - m + 1)
11851205
for i in range(k):
11861206
if i != j:
1187-
mp = stump(Ts[j], m, Ts[i])
1207+
mp = stump(
1208+
Ts[j],
1209+
m,
1210+
Ts[i],
1211+
T_A_subseq_isconstant=Ts_subseq_isconstant[j],
1212+
T_B_subseq_isconstant=Ts_subseq_isconstant[i],
1213+
)
11881214
radii = np.maximum(radii, mp[:, 0])
11891215
min_radius_idx = np.argmin(radii)
11901216
min_radius = radii[min_radius_idx]
@@ -1196,10 +1222,24 @@ def consensus_search(Ts, m):
11961222
return bsf_radius, bsf_Ts_idx, bsf_subseq_idx
11971223

11981224

1199-
def ostinato(Ts, m):
1200-
bsf_radius, bsf_Ts_idx, bsf_subseq_idx = consensus_search(Ts, m)
1225+
def ostinato(Ts, m, Ts_subseq_isconstant=None):
1226+
if Ts_subseq_isconstant is None:
1227+
Ts_subseq_isconstant = [None] * len(Ts)
1228+
1229+
Ts_subseq_isconstant = [
1230+
rolling_isconstant(Ts[i], m, Ts_subseq_isconstant[i]) for i in range(len(Ts))
1231+
]
1232+
1233+
bsf_radius, bsf_Ts_idx, bsf_subseq_idx = consensus_search(
1234+
Ts, m, Ts_subseq_isconstant
1235+
)
12011236
radius, Ts_idx, subseq_idx = get_central_motif(
1202-
Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m
1237+
Ts,
1238+
bsf_radius,
1239+
bsf_Ts_idx,
1240+
bsf_subseq_idx,
1241+
m,
1242+
Ts_subseq_isconstant,
12031243
)
12041244
return radius, Ts_idx, subseq_idx
12051245

tests/test_ostinato.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import functools
2+
13
import naive
24
import numpy as np
35
import numpy.testing as npt
@@ -78,3 +80,60 @@ def test_deterministic_ostinatoed(seed, dask_cluster):
7880
npt.assert_almost_equal(ref_radius, comp_radius)
7981
npt.assert_almost_equal(ref_Ts_idx, comp_Ts_idx)
8082
npt.assert_almost_equal(ref_subseq_idx, comp_subseq_idx)
83+
84+
85+
@pytest.mark.parametrize(
86+
"seed", np.random.choice(np.arange(10000), size=25, replace=False)
87+
)
88+
def test_random_ostinato_with_isconstant(seed):
89+
isconstant_custom_func = functools.partial(
90+
naive.isconstant_func_stddev_threshold, quantile_threshold=0.05
91+
)
92+
93+
m = 50
94+
np.random.seed(seed)
95+
Ts = [np.random.rand(n) for n in [64, 128, 256]]
96+
Ts_subseq_isconstant = [isconstant_custom_func for _ in range(len(Ts))]
97+
98+
ref_radius, ref_Ts_idx, ref_subseq_idx = naive.ostinato(
99+
Ts, m, Ts_subseq_isconstant=Ts_subseq_isconstant
100+
)
101+
comp_radius, comp_Ts_idx, comp_subseq_idx = stumpy.ostinato(
102+
Ts, m, Ts_subseq_isconstant=Ts_subseq_isconstant
103+
)
104+
105+
npt.assert_almost_equal(ref_radius, comp_radius)
106+
npt.assert_almost_equal(ref_Ts_idx, comp_Ts_idx)
107+
npt.assert_almost_equal(ref_subseq_idx, comp_subseq_idx)
108+
109+
110+
@pytest.mark.parametrize("seed", [79, 109, 112, 133, 151, 161, 251, 275, 309, 355])
111+
def test_deterministic_ostinatoed_with_isconstant(seed, dask_cluster):
112+
isconstant_custom_func = functools.partial(
113+
naive.isconstant_func_stddev_threshold, quantile_threshold=0.05
114+
)
115+
116+
with Client(dask_cluster) as dask_client:
117+
m = 50
118+
np.random.seed(seed)
119+
Ts = [np.random.rand(n) for n in [64, 128, 256]]
120+
121+
l = 64 - m + 1
122+
subseq_isconsant = np.full(l, 0, dtype=bool)
123+
subseq_isconsant[np.random.randint(0, l)] = True
124+
Ts_subseq_isconstant = [
125+
subseq_isconsant,
126+
None,
127+
isconstant_custom_func,
128+
]
129+
130+
ref_radius, ref_Ts_idx, ref_subseq_idx = naive.ostinato(
131+
Ts, m, Ts_subseq_isconstant=Ts_subseq_isconstant
132+
)
133+
comp_radius, comp_Ts_idx, comp_subseq_idx = stumpy.ostinatoed(
134+
dask_client, Ts, m, Ts_subseq_isconstant=Ts_subseq_isconstant
135+
)
136+
137+
npt.assert_almost_equal(ref_radius, comp_radius)
138+
npt.assert_almost_equal(ref_Ts_idx, comp_Ts_idx)
139+
npt.assert_almost_equal(ref_subseq_idx, comp_subseq_idx)

0 commit comments

Comments
 (0)