Skip to content

Commit 210c2ef

Browse files
Add the introduction part to Snippet Tutorial (#397)
* Add the introduction part * Add textual context and Improve the flow and figures * [WIP] Revise the intro section and legend of figures * Add the introduction part * Add textual context and Improve the flow and figures * [WIP] Revise the intro section and legend of figures * [WIP]: save temp changes * add the slices of indices for each snippet in the output * use stumpy.mplstyle to refactor the rcparams * remove blank line after docstring of a function * add the plot of snippets regime to check out the functionality of snippets_regime added to snippets modules * correct the block of code for plotting the snippets regimes * Change snippet_regime from list to numpy array * update the notebook according to the updated version of snippet module * resolve issues raised by flake8 * remove snippet tutorial local file from its developing branch tto allow merge from the main * Copy back Snippet Tutorial notebook to the Snipperts_Tutorial branch * allow git to track the file (?) * copy back Snippet Tutorial notebook * checking out older version of snippet tutorial notebook * checkout file in a parent * get back to the ours modified version of the notebook file * remove snippet tutorial notebook on the branch * add snippet tutorial notebook from ancestor common file * handle merge manually: add changes of ours to theirs version of the file * move the function _get_mask_slices(mask) to core module * revise the snippet_regime calculation: use list to append new slices since it is faster than appending to numpy array * slight modification on snippet_regime calculation * revise the notebook since the structure of snippet_regimes, returned by the snippets and aampdist_snippets modules, is changed * add dtype=object according to numpy warning. Revise docstring to include snippets_regime * Run all cells of notebooks to make sure things are okay * revise docstring (small modification) * revise the snippet_regime caclulation to get 2D NumPy array * revise slicing the snippets regimes returned by the snippet module * create a new variable to reduce the length of lines of code that caused error in flake8 * add unit test for function _get_mask_slices * resolve indentation. and, move naive implementation to the TOP of the test file * move the naive implementation of _get_mask_slices function to naive module * add snippets_regimes in the naive implementation mpdist_snippets and aampdist_snippet * add snippet_regime test to the unit test * add snippet_regime to the unit test * minor modification on the format * reformat the code to follow Black code formatter style * add snippet_regimes to the non_normalized decorator unit test
1 parent 3df750b commit 210c2ef

File tree

9 files changed

+490
-83
lines changed

9 files changed

+490
-83
lines changed

docs/Tutorial_Time_Series_Snippets.ipynb

Lines changed: 333 additions & 72 deletions
Large diffs are not rendered by default.

stumpy/aampdist_snippets.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import math
66
import numpy as np
7-
from .core import check_window_size
7+
from .core import check_window_size, _get_mask_slices
88
from .aampdist import _aampdist_vect
99

1010

@@ -178,6 +178,9 @@ def aampdist_snippets(
178178
The area under the curve corresponding to each profile for each of the top `k`
179179
snippets
180180
181+
snippets_regimes: ndarray
182+
The slices of indices that show the starting and ending indices of snippets
183+
181184
Notes
182185
-----
183186
`DOI: 10.1109/ICBK.2018.00058 \
@@ -213,6 +216,7 @@ def aampdist_snippets(
213216
snippets_areas = np.empty(k)
214217
Q = np.full(D.shape[-1], np.inf)
215218
indices = np.arange(0, n_padded - m, m)
219+
snippets_regimes_list = []
216220

217221
for i in range(k):
218222
profile_areas = np.sum(np.minimum(D, Q), axis=1)
@@ -231,11 +235,19 @@ def aampdist_snippets(
231235
mask = snippets_profiles[i] <= total_min
232236
snippets_fractions[i] = np.sum(mask) / total_min.shape[0]
233237
total_min = total_min - mask.astype(np.float64)
238+
slices = _get_mask_slices(mask)
239+
snippets_regimes_list.append(slices)
240+
241+
n_slices = [regime.shape[0] for regime in snippets_regimes_list]
242+
snippets_regimes = np.empty((sum(n_slices), 3), dtype=object)
243+
snippets_regimes[:, 0] = np.repeat(np.arange(len(snippets_regimes_list)), n_slices)
244+
snippets_regimes[:, 1:] = np.vstack(snippets_regimes_list)
234245

235246
return (
236247
snippets,
237248
snippets_indices,
238249
snippets_profiles,
239250
snippets_fractions,
240251
snippets_areas,
252+
snippets_regimes,
241253
)

stumpy/core.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,3 +1806,31 @@ def _jagged_list_to_array(a, fill_value, dtype):
18061806
out[i, : row.size] = row
18071807

18081808
return out
1809+
1810+
1811+
def _get_mask_slices(mask):
1812+
"""
1813+
For a boolean vector mask, returns the slices of indices at which the mask is True.
1814+
1815+
Parameters
1816+
----------
1817+
mask: ndarray
1818+
A boolean 1D array
1819+
1820+
Returns
1821+
-------
1822+
slices: ndarray
1823+
slices of indices where the mask is True. Each slice has a size of two:
1824+
The first number is the start index (inclusive)
1825+
The second number is the end index (exclusive)
1826+
1827+
"""
1828+
m1 = np.r_[0, mask]
1829+
m2 = np.r_[mask, 0]
1830+
1831+
(starts,) = np.where(~m1 & m2)
1832+
(ends,) = np.where(m1 & ~m2)
1833+
1834+
slices = np.c_[starts, ends]
1835+
1836+
return slices

stumpy/snippets.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import math
66
import numpy as np
77
from . import core
8-
from .core import check_window_size
8+
from .core import check_window_size, _get_mask_slices
99
from .mpdist import _mpdist_vect
1010
from .aampdist_snippets import aampdist_snippets
1111

@@ -187,6 +187,9 @@ def snippets(
187187
The area under the curve corresponding to each profile for each of the top `k`
188188
snippets
189189
190+
snippets_regimes: ndarray
191+
The slices of indices that show the starting and ending indices of snippets
192+
190193
Notes
191194
-----
192195
`DOI: 10.1109/ICBK.2018.00058 \
@@ -222,6 +225,7 @@ def snippets(
222225
snippets_areas = np.empty(k)
223226
Q = np.full(D.shape[-1], np.inf)
224227
indices = np.arange(0, n_padded - m, m)
228+
snippets_regimes_list = []
225229

226230
for i in range(k):
227231
profile_areas = np.sum(np.minimum(D, Q), axis=1)
@@ -240,11 +244,19 @@ def snippets(
240244
mask = snippets_profiles[i] <= total_min
241245
snippets_fractions[i] = np.sum(mask) / total_min.shape[0]
242246
total_min = total_min - mask.astype(np.float64)
247+
slices = _get_mask_slices(mask)
248+
snippets_regimes_list.append(slices)
249+
250+
n_slices = [regime.shape[0] for regime in snippets_regimes_list]
251+
snippets_regimes = np.empty((sum(n_slices), 3), dtype=object)
252+
snippets_regimes[:, 0] = np.repeat(np.arange(len(snippets_regimes_list)), n_slices)
253+
snippets_regimes[:, 1:] = np.vstack(snippets_regimes_list)
243254

244255
return (
245256
snippets,
246257
snippets_indices,
247258
snippets_profiles,
248259
snippets_fractions,
249260
snippets_areas,
261+
snippets_regimes,
250262
)

tests/naive.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,7 @@ def mpdist_snippets(
11101110
snippets_areas = np.empty(k)
11111111
Q = np.inf
11121112
indices = np.arange(0, n_padded - m, m)
1113+
snippets_regimes_list = []
11131114

11141115
for snippet_idx in range(k):
11151116
min_area = np.inf
@@ -1132,13 +1133,21 @@ def mpdist_snippets(
11321133
mask = snippets_profiles[i] <= total_min
11331134
snippets_fractions[i] = np.sum(mask) / total_min.shape[0]
11341135
total_min = total_min - mask.astype(float)
1136+
slices = _get_mask_slices(mask)
1137+
snippets_regimes_list.append(slices)
1138+
1139+
n_slices = [regime.shape[0] for regime in snippets_regimes_list]
1140+
snippets_regimes = np.empty((sum(n_slices), 3), dtype=object)
1141+
snippets_regimes[:, 0] = np.repeat(np.arange(len(snippets_regimes_list)), n_slices)
1142+
snippets_regimes[:, 1:] = np.vstack(snippets_regimes_list)
11351143

11361144
return (
11371145
snippets,
11381146
snippets_indices,
11391147
snippets_profiles,
11401148
snippets_fractions,
11411149
snippets_areas,
1150+
snippets_regimes,
11421151
)
11431152

11441153

@@ -1173,6 +1182,7 @@ def aampdist_snippets(
11731182
snippets_areas = np.empty(k)
11741183
Q = np.inf
11751184
indices = np.arange(0, n_padded - m, m)
1185+
snippets_regimes_list = []
11761186

11771187
for snippet_idx in range(k):
11781188
min_area = np.inf
@@ -1195,13 +1205,21 @@ def aampdist_snippets(
11951205
mask = snippets_profiles[i] <= total_min
11961206
snippets_fractions[i] = np.sum(mask) / total_min.shape[0]
11971207
total_min = total_min - mask.astype(float)
1208+
slices = _get_mask_slices(mask)
1209+
snippets_regimes_list.append(slices)
1210+
1211+
n_slices = [regime.shape[0] for regime in snippets_regimes_list]
1212+
snippets_regimes = np.empty((sum(n_slices), 3), dtype=object)
1213+
snippets_regimes[:, 0] = np.repeat(np.arange(len(snippets_regimes_list)), n_slices)
1214+
snippets_regimes[:, 1:] = np.vstack(snippets_regimes_list)
11981215

11991216
return (
12001217
snippets,
12011218
snippets_indices,
12021219
snippets_profiles,
12031220
snippets_fractions,
12041221
snippets_areas,
1222+
snippets_regimes,
12051223
)
12061224

12071225

@@ -1351,3 +1369,19 @@ def transform_pan(pan, ms, threshold, bfs_indices, n_processed):
13511369
pan[np.isnan(pan)] = np.nanmax(pan)
13521370

13531371
return pan
1372+
1373+
1374+
def _get_mask_slices(mask):
1375+
idx = []
1376+
1377+
tmp = np.r_[0, mask]
1378+
for i, val in enumerate(np.diff(tmp)):
1379+
if val == 1:
1380+
idx.append(i)
1381+
if val == -1:
1382+
idx.append(i)
1383+
1384+
if tmp[-1]:
1385+
idx.append(len(mask))
1386+
1387+
return np.array(idx).reshape(len(idx) // 2, 2)

tests/test_aampdist_snippets.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ def test_aampdist_snippets(T, m, k):
2323
ref_profiles,
2424
ref_fractions,
2525
ref_areas,
26+
ref_regimes,
2627
) = naive.aampdist_snippets(T, m, k)
2728
(
2829
cmp_snippets,
2930
cmp_indices,
3031
cmp_profiles,
3132
cmp_fractions,
3233
cmp_areas,
34+
cmp_regimes,
3335
) = aampdist_snippets(T, m, k)
3436

3537
npt.assert_almost_equal(
@@ -47,6 +49,9 @@ def test_aampdist_snippets(T, m, k):
4749
# npt.assert_almost_equal(
4850
# ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION
4951
# )
52+
npt.assert_almost_equal(
53+
ref_regimes, cmp_regimes, decimal=config.STUMPY_TEST_PRECISION
54+
)
5055

5156

5257
@pytest.mark.parametrize("T", test_data)
@@ -60,13 +65,15 @@ def test_mpdist_snippets_percentage(T, m, k, percentage):
6065
ref_profiles,
6166
ref_fractions,
6267
ref_areas,
68+
ref_regimes,
6369
) = naive.aampdist_snippets(T, m, k, percentage=percentage)
6470
(
6571
cmp_snippets,
6672
cmp_indices,
6773
cmp_profiles,
6874
cmp_fractions,
6975
cmp_areas,
76+
cmp_regimes,
7077
) = aampdist_snippets(T, m, k, percentage=percentage)
7178

7279
npt.assert_almost_equal(
@@ -84,6 +91,9 @@ def test_mpdist_snippets_percentage(T, m, k, percentage):
8491
# npt.assert_almost_equal(
8592
# ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION
8693
# )
94+
npt.assert_almost_equal(
95+
ref_regimes, cmp_regimes, decimal=config.STUMPY_TEST_PRECISION
96+
)
8797

8898

8999
@pytest.mark.parametrize("T", test_data)
@@ -97,13 +107,15 @@ def test_mpdist_snippets_s(T, m, k, s):
97107
ref_profiles,
98108
ref_fractions,
99109
ref_areas,
110+
ref_regimes,
100111
) = naive.aampdist_snippets(T, m, k, s=s)
101112
(
102113
cmp_snippets,
103114
cmp_indices,
104115
cmp_profiles,
105116
cmp_fractions,
106117
cmp_areas,
118+
cmp_regimes,
107119
) = aampdist_snippets(T, m, k, s=s)
108120

109121
npt.assert_almost_equal(
@@ -121,3 +133,6 @@ def test_mpdist_snippets_s(T, m, k, s):
121133
# npt.assert_almost_equal(
122134
# ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION
123135
# )
136+
npt.assert_almost_equal(
137+
ref_regimes, cmp_regimes, decimal=config.STUMPY_TEST_PRECISION
138+
)

tests/test_core.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,3 +827,19 @@ def test_jagged_list_to_array_empty():
827827
left = np.array([[]], dtype="float64")
828828
right = core._jagged_list_to_array(arr, fill_value=np.nan, dtype="float64")
829829
npt.assert_array_equal(left, right)
830+
831+
832+
def test_get_mask_slices():
833+
bool_lst = [False, True]
834+
mask_cases = [
835+
[x, y, z, w]
836+
for x in bool_lst
837+
for y in bool_lst
838+
for z in bool_lst
839+
for w in bool_lst
840+
]
841+
842+
for mask in mask_cases:
843+
ref_slices = naive._get_mask_slices(mask)
844+
comp_slices = core._get_mask_slices(mask)
845+
npt.assert_array_equal(ref_slices, comp_slices)

tests/test_non_normalized_decorator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,14 @@ def test_snippets():
300300
ref_profiles,
301301
ref_fractions,
302302
ref_areas,
303+
ref_regimes,
303304
) = stumpy.aampdist_snippets(T, m, k)
304305
(
305306
cmp_snippets,
306307
cmp_indices,
307308
cmp_profiles,
308309
cmp_fractions,
309310
cmp_areas,
311+
cmp_regimes,
310312
) = stumpy.snippets(T, m, k, normalize=False)
311313
npt.assert_almost_equal(ref_snippets, cmp_snippets)

0 commit comments

Comments
 (0)