Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Examples to support data registry entries #348

Merged
merged 1 commit into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769c
dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
whale_1.hdf5 a09922969e740307bf26dc6ffa7fb9fbb834dc7cd7d4ced02c66b159fb1ce0cd http://piweb.ooirsn.uw.edu/das/data/Optasense/NorthCable/TransmitFiber/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-03T15_06_51-0700/North-C1-LR-P1kHz-GL50m-Sp2m-FS200Hz_2021-11-04T020002Z.h5
31 changes: 29 additions & 2 deletions dascore/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import tempfile
from collections.abc import Sequence
from contextlib import suppress
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -413,10 +414,20 @@ def get_example_patch(example_name="random_das", **kwargs) -> dc.Patch:
print(df.to_markdown(index=False, stralign="center"))
```

Using an entry from the data_registry file is also supported.
If multiple patches are contained in the specified file, only the
first is returned. Data registry files are:
```{python}
#| echo: false
#| output: asis
from dascore.utils.downloader import get_registry_df
print(get_registry_df()[['name']].to_markdown(index=False, stralign="center"))
```

Parameters
----------
example_name
The name of the example to load. Options are:
The name of the example to load. Options are listed above.
**kwargs
Passed to the corresponding functions to generate data.

Expand All @@ -426,6 +437,9 @@ def get_example_patch(example_name="random_das", **kwargs) -> dc.Patch:
unregistered patch is requested.
"""
if example_name not in EXAMPLE_PATCHES:
# Allow the example name to be a data registry entry.
with suppress(ValueError):
return dc.spool(fetch(example_name))[0]
msg = (
f"No example patch registered with name {example_name} "
f"Registered example patches are {list(EXAMPLE_PATCHES)}"
Expand All @@ -451,6 +465,15 @@ def get_example_spool(example_name="random_das", **kwargs) -> dc.BaseSpool:
print(df.to_markdown(index=False, stralign="center"))
```

Using an entry from the data_registry file is also supported.
These include:
```{python}
#| echo: false
#| output: asis
from dascore.utils.downloader import get_registry_df
print(get_registry_df()[['name']].to_markdown(index=False, stralign="center"))
```

Parameters
----------
example_name
Expand All @@ -460,9 +483,13 @@ def get_example_spool(example_name="random_das", **kwargs) -> dc.BaseSpool:

Raises
------
UnknownExample if unregistered patch is requested.
(`UnknownExampleError`)['dascore.examples.UnknownExampleError`] if
unregistered patch is requested.
"""
if example_name not in EXAMPLE_SPOOLS:
# Allow the example spool to be a data registry file.
with suppress(ValueError):
return dc.spool(fetch(example_name))
msg = (
f"No example spool registered with name {example_name} "
f"Registered example spools are {list(EXAMPLE_SPOOLS)}"
Expand Down
2 changes: 1 addition & 1 deletion dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]:
t_coord = _get_time_coord(node)
info.update(t_coord.get_attrs_dict("time"))
info.update(_get_data_unit_and_type(node))
info["dims"] = ["time", "distance"]
info["dims"] = _get_dims(node)
if extras is not None:
info.update(extras)
info["coords"] = {"time": t_coord, "distance": d_coord}
Expand Down
10 changes: 10 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def test_sin_wav(self):
out = dc.get_example_patch("sin_wav")
assert isinstance(out, dc.Patch)

def test_data_file_name(self):
"""Ensure get_example_spool works on a datafile."""
spool = dc.get_example_spool("dispersion_event.h5")
assert isinstance(spool, dc.BaseSpool)


class TestGetExampleSpool:
"""Test suite for `get_example_spool`."""
Expand All @@ -51,6 +56,11 @@ def test_raises_on_bad_key(self):
with pytest.raises(UnknownExampleError, match="No example spool"):
dc.get_example_spool("NotAnExampleRight????")

def test_data_file_name(self):
"""Ensure get_example_spool works on a datafile."""
spool = dc.get_example_spool("dispersion_event.h5")
assert isinstance(spool, dc.BaseSpool)


class TestRickerMoveout:
"""Tests for Ricker moveout patch."""
Expand Down
9 changes: 6 additions & 3 deletions tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@
# only flat patches can be written to WAV, don't put it here.
COMMON_IO_WRITE_TESTS = (PickleIO(), DASDAEV1())

# Specifies data registry entries which should not be tested.
SKIP_DATA_FILES = {"whale_1.hdf5", "brady_hs_DAS_DTS_coords.csv"}


@cache
def _cached_read(path, io=None):
Expand Down Expand Up @@ -111,10 +114,10 @@ def io_path_tuple(request):
@pytest.fixture(scope="session", params=get_registry_df()["name"])
def data_file_path(request):
"""A fixture of all data files. Will download if needed."""
# TODO remove this segy skip once we support it.
param = request.param
if param.endswith("csv"):
pytest.skip("Not a DAS file.")
# Some files should be skipped if not DAS or too big.
if str(param) in SKIP_DATA_FILES:
pytest.skip(f"Skipping {param}")
return fetch(request.param)


Expand Down
Loading