From 62ee497ed89f5bc4b5ef6db298b19338f58685b8 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Tue, 26 Mar 2024 20:04:57 -0600 Subject: [PATCH 1/4] coords_size and range methods --- dascore/core/coordmanager.py | 20 +++++++++++++++++++- docs/tutorial/patch.qmd | 4 ++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dascore/core/coordmanager.py b/dascore/core/coordmanager.py index 1bdc2bdb..1d3b1546 100644 --- a/dascore/core/coordmanager.py +++ b/dascore/core/coordmanager.py @@ -677,7 +677,7 @@ def shape(self): @property def size(self): - """Return the shape of the dimensions.""" + """Return the size of the patch data matrix.""" return np.prod(self.shape) def validate_data(self, data): @@ -945,6 +945,24 @@ def get_array(self, coord_name) -> np.ndarray: """Return the coordinate values as a numpy array.""" return np.array(self.get_coord(coord_name)) + def coords_size(self, coord_name): + """Return the coordinate size.""" + # a better name for this function? (size is already taken) + # returning self.shape[self.coord_name] would be more efficient since + # we have already defined shape as a property? + return len(self.get_coord(coord_name)) + + def range(self, coord_name): + """Return the coordinate scaler value (e.g., number of seconds).""" + # not sure if we want to go with the "range" name. + if coord_name == "time": + sampling_interval = self.attrs["time_step"] / np.timedelta64(1, "s") + sec_max = self.attrs["time_max"] / np.timedelta64(1, "s") + sec_min = self.attrs["time_min"] / np.timedelta64(1, "s") + return sec_max - sec_min + sampling_interval + else: + return self.coords_size(coord_name) + def get_coord_manager( coords: Mapping[str, BaseCoord | np.ndarray] | CoordManager | None = None, diff --git a/docs/tutorial/patch.qmd b/docs/tutorial/patch.qmd index 656865c9..de39802a 100644 --- a/docs/tutorial/patch.qmd +++ b/docs/tutorial/patch.qmd @@ -36,8 +36,8 @@ A single file can be loaded like this: import dascore as dc from dascore.utils.downloader import fetch -# first we download an example data file. You can replace -# this with the path to your file. +# here we download an example data file. You can replace +# the next line with the path to your file. path = fetch("terra15_das_1_trimmed.hdf5") # then we get the first patch in the spool From a48a84b4f38043ea7aad229f6b9f58affa4810fd Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 27 Mar 2024 11:22:16 -0600 Subject: [PATCH 2/4] added patch shortcuts - more efficient methods --- dascore/core/coordmanager.py | 31 +++++++++++-------------------- dascore/core/coords.py | 5 +++++ dascore/core/patch.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/dascore/core/coordmanager.py b/dascore/core/coordmanager.py index 1d3b1546..e74c419d 100644 --- a/dascore/core/coordmanager.py +++ b/dascore/core/coordmanager.py @@ -82,7 +82,7 @@ MaybeArray = TypeVar("MaybeArray", ArrayLike, np.ndarray, None) -def _validate_select_coords(coord, coord_name): +def _validate_select_coords(coord, coord_name: str): """Ensure multi-dims are not used.""" if not len(coord.shape) == 1: msg = ( @@ -92,7 +92,7 @@ def _validate_select_coords(coord, coord_name): raise CoordError(msg) -def _indirect_coord_updates(cm, dim_name, coord_name, reduction, new_coords): +def _indirect_coord_updates(cm, dim_name, coord_name: str, reduction, new_coords): """ Applies trim to coordinates. @@ -929,39 +929,30 @@ def get_coord(self, coord_name: str) -> BaseCoord: raise CoordError(msg) return self.coord_map[coord_name] - def min(self, coord_name): + def min(self, coord_name: str): """Return the minimum value of a coordinate.""" return self.get_coord(coord_name).min() - def max(self, coord_name): + def max(self, coord_name: str): """Return the maximum value of a coordinate.""" return self.get_coord(coord_name).max() - def step(self, coord_name): + def step(self, coord_name: str): """Return the coordinate step.""" return self.get_coord(coord_name).step - def get_array(self, coord_name) -> np.ndarray: + def get_array(self, coord_name: str) -> np.ndarray: """Return the coordinate values as a numpy array.""" return np.array(self.get_coord(coord_name)) - def coords_size(self, coord_name): + def coord_size(self, coord_name: str) -> int: """Return the coordinate size.""" - # a better name for this function? (size is already taken) - # returning self.shape[self.coord_name] would be more efficient since - # we have already defined shape as a property? - return len(self.get_coord(coord_name)) + return self.get_coord(coord_name).size - def range(self, coord_name): + def coord_range(self, coord_name: str): """Return the coordinate scaler value (e.g., number of seconds).""" - # not sure if we want to go with the "range" name. - if coord_name == "time": - sampling_interval = self.attrs["time_step"] / np.timedelta64(1, "s") - sec_max = self.attrs["time_max"] / np.timedelta64(1, "s") - sec_min = self.attrs["time_min"] / np.timedelta64(1, "s") - return sec_max - sec_min + sampling_interval - else: - return self.coords_size(coord_name) + coord = self.get_coord(coord_name) + return coord.max() - coord.min() + coord.step def get_coord_manager( diff --git a/dascore/core/coords.py b/dascore/core/coords.py index 325ea0a8..aa04f426 100644 --- a/dascore/core/coords.py +++ b/dascore/core/coords.py @@ -323,6 +323,11 @@ def shape(self) -> tuple[int, ...]: """Return the shape of the coordinate data.""" return self.data.shape + @property + def size(self) -> int: + """Return the size of the coordinate data.""" + return np.prod(self.shape) + @property def evenly_sampled(self) -> tuple[int, ...]: """Returns True if the coord is evenly sampled.""" diff --git a/dascore/core/patch.py b/dascore/core/patch.py index fc2660f5..ee5045d9 100644 --- a/dascore/core/patch.py +++ b/dascore/core/patch.py @@ -18,6 +18,7 @@ from dascore.core.coords import BaseCoord from dascore.utils.display import array_to_text, attrs_to_text, get_dascore_text from dascore.utils.models import ArrayLike +from dascore.utils.time import to_float from dascore.viz import VizPatchNameSpace @@ -173,6 +174,16 @@ def size(self) -> tuple[int, ...]: """Return the shape of the data array.""" return self.coords.size + @property + def seconds(self) -> float: + """Return number of seconds in the time coordinate.""" + return to_float(self.coords.coord_range("time")) + + @property + def channel_count(self) -> int: + """Return number of channels in the distance coordinate.""" + return self.coords.coord_size("distance") + # --- basic patch functionality. update = dascore.proc.update From 42ea36298624c91222e1640bf4558d7ab87a0f14 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 27 Mar 2024 15:58:37 -0600 Subject: [PATCH 3/4] tests added --- dascore/core/coordmanager.py | 5 ++--- dascore/core/coords.py | 6 ++++++ dascore/core/patch.py | 4 ++-- tests/test_core/test_coordmanager.py | 19 ++++++++++++++++--- tests/test_core/test_coords.py | 10 +++++++++- tests/test_core/test_patch.py | 16 ++++++++++++++++ 6 files changed, 51 insertions(+), 9 deletions(-) diff --git a/dascore/core/coordmanager.py b/dascore/core/coordmanager.py index e74c419d..eb3bc3a1 100644 --- a/dascore/core/coordmanager.py +++ b/dascore/core/coordmanager.py @@ -950,9 +950,8 @@ def coord_size(self, coord_name: str) -> int: return self.get_coord(coord_name).size def coord_range(self, coord_name: str): - """Return the coordinate scaler value (e.g., number of seconds).""" - coord = self.get_coord(coord_name) - return coord.max() - coord.min() + coord.step + """Return a scaler value for the coordinate (e.g., number of seconds).""" + return self.get_coord(coord_name).coord_range() def get_coord_manager( diff --git a/dascore/core/coords.py b/dascore/core/coords.py index aa04f426..b4a28c13 100644 --- a/dascore/core/coords.py +++ b/dascore/core/coords.py @@ -354,6 +354,12 @@ def simplify_units(self) -> Self: _, unit = get_factor_and_unit(self.units, simplify=True) return self.convert_units(unit) + def coord_range(self): + """Return a scaler value for the coordinate (e.g., number of seconds).""" + if not self.evenly_sampled: + raise CoordError("coord_range has to be called on an evenly sampled data.") + return self.max() - self.min() + self.step + @abc.abstractmethod def sort(self, reverse=False) -> tuple[BaseCoord, slice | ArrayLike]: """Sort the contents of the coord. Return new coord and slice for sorting.""" diff --git a/dascore/core/patch.py b/dascore/core/patch.py index ee5045d9..96bed91e 100644 --- a/dascore/core/patch.py +++ b/dascore/core/patch.py @@ -161,7 +161,7 @@ def coords(self) -> CoordManager: @property def data(self) -> ArrayLike: - """Return the dimensions contained in patch.""" + """Return the data contained in patch.""" return self._data @property @@ -171,7 +171,7 @@ def shape(self) -> tuple[int, ...]: @property def size(self) -> tuple[int, ...]: - """Return the shape of the data array.""" + """Return the size of the data array.""" return self.coords.size @property diff --git a/tests/test_core/test_coordmanager.py b/tests/test_core/test_coordmanager.py index afe32dd8..8e584ed6 100644 --- a/tests/test_core/test_coordmanager.py +++ b/tests/test_core/test_coordmanager.py @@ -256,19 +256,19 @@ def test_size(self, coord_manager): assert isinstance(coord_manager.size, int | np.int_) def test_min(self, basic_coord_manager): - """Ensure we can git min value.""" + """Ensure we can get min value.""" expected = np.min(basic_coord_manager.time.data).astype(np.int64) got = basic_coord_manager.min("time").astype(np.int64) assert np.isclose(got, expected) def test_max(self, basic_coord_manager): - """Ensure we can git max value.""" + """Ensure we can get max value.""" expected = np.max(basic_coord_manager.time.data).astype(np.int64) got = basic_coord_manager.max("time").astype(np.int64) assert np.isclose(got, expected) def test_step(self, basic_coord_manager): - """Ensure we can git min value.""" + """Ensure we can get min value.""" expected = basic_coord_manager.time.step assert basic_coord_manager.step("time") == expected @@ -297,6 +297,19 @@ def test_iterate(self, basic_coord_manager): expected = basic_coord_manager.get_coord(dim) assert all_close(coord, expected) + def test_coord_size(self, random_patch): + """Ensure we can get size of the coordinate.""" + expected = len(random_patch.coords["time"]) + assert random_patch.coords.coord_size("time") == expected + + def test_coord_range(self, random_patch): + """Ensure we can get a scaler value for the coordinate.""" + coord_array = random_patch.coords["time"] + expected = ( + np.max(coord_array) - np.min(coord_array) + random_patch.attrs["time_step"] + ) + assert random_patch.coords.coord_range("time") == expected + class TestCoordManagerInputs: """Tests for coordinates management.""" diff --git a/tests/test_core/test_coords.py b/tests/test_core/test_coords.py index a807f8b8..09768c29 100644 --- a/tests/test_core/test_coords.py +++ b/tests/test_core/test_coords.py @@ -80,7 +80,7 @@ def evenly_sampled_time_delta_coord(): @pytest.fixture(scope="session") @register_func(COORDS) def monotonic_float_coord(): - """Create coordinates which are evenly sampled.""" + """Create coordinates which are not evenly sampled.""" ar = np.cumsum(np.abs(np.random.rand(100))) return get_coord(data=ar) @@ -242,6 +242,7 @@ def test_snap(self, coord): out = coord.snap() assert isinstance(out, BaseCoord) assert out.shape == coord.shape + assert out.size == coord.size # sort order should stay the same if coord.reverse_sorted: assert out.reverse_sorted @@ -328,6 +329,12 @@ def test_both_values_and_data_raises(self): with pytest.raises(CoordError, match=msg): get_coord(data=data, values=data) + def test_coord_range(self, monotonic_float_coord): + """Ensure that coord_range raises an error for not evenly sampled patches.""" + msg = "has to be called on an evenly sampled" + with pytest.raises(CoordError, match=msg): + monotonic_float_coord.coord_range() + class TestCoordSummary: """tests for converting to and from summary coords.""" @@ -859,6 +866,7 @@ def test_arrange_equiv(self): ar = np.arange(start, stop, step) coord = get_coord(start=start, stop=stop, step=step) assert coord.shape == ar.shape + assert coord.size == ar.size def test_unchanged_len(self): """Ensure an array converted to Coord range has same len. See #229.""" diff --git a/tests/test_core/test_patch.py b/tests/test_core/test_patch.py index 3a3971f6..63d77ad9 100644 --- a/tests/test_core/test_patch.py +++ b/tests/test_core/test_patch.py @@ -552,6 +552,22 @@ def test_coord_time_narrow_select(self, multi_dim_coords_patch): new_coords = new.coords.coord_map assert isinstance(new_coords["time"], CoordRange) + def test_seconds(self, random_patch_with_lat): + """Ensure we can get number of seconds in the patch.""" + sampling_interval = random_patch_with_lat.attrs["time_step"] / np.timedelta64( + 1, "s" + ) + expected = ( + random_patch_with_lat.attrs["time_max"] + - random_patch_with_lat.attrs["time_min"] + ) / np.timedelta64(1, "s") + sampling_interval + assert random_patch_with_lat.seconds == expected + + def test_channel_count(self, random_patch_with_lat): + """Ensure we can get number of channels in the patch.""" + expected = len(random_patch_with_lat.coords["distance"]) + assert random_patch_with_lat.channel_count == expected + class TestApplyOperator: """Tests for applying various ufunc-type operators.""" From 3584d102cc140f18df0e742ee2656a27940ec66b Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 27 Mar 2024 16:07:04 -0600 Subject: [PATCH 4/4] docs in patch tutorial --- docs/tutorial/patch.qmd | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/tutorial/patch.qmd b/docs/tutorial/patch.qmd index de39802a..e67fb579 100644 --- a/docs/tutorial/patch.qmd +++ b/docs/tutorial/patch.qmd @@ -180,7 +180,7 @@ Specific data formats may also add attributes (e.g. "gauge_length", "pulse_width ## String representation -DASCore Patches have as useful string representation: +DASCore Patches have a useful string representation: ```{python} import dascore as dc @@ -189,6 +189,19 @@ patch = dc.get_example_patch() print(patch) ``` +## Shortcuts + +DASCore Patches offer useful shortcuts for quickly accessing information: + +```{python} +import dascore as dc + +patch = dc.get_example_patch() +print(patch.seconds) # to get the number of seconds in the patch. +print(patch.channel_count) # to get the number of channels in the patch. +``` + + # Selecting (trimming) Patches are trimmed using the [`select`](`dascore.Patch.select`) method. Most commonly, `select` takes the coordinate name and a tuple of (lower_limit, upper_limit) as the values. Either limit can be `...` indicating an open interval.