Skip to content

Commit

Permalink
Resolving channel filter redundancy (#291)
Browse files Browse the repository at this point in the history
* merging channel filter

* checking out notebooks
  • Loading branch information
niyiyu authored Feb 17, 2024
1 parent 773b90b commit 6a44a88
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 58 deletions.
9 changes: 7 additions & 2 deletions integration_tests/cc_stack_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
cross_correlate,
stack_cross_correlations,
)
from noisepy.seis.channel_filter_store import channel_filter
from noisepy.seis.channelcatalog import (
XMLStationChannelCatalog, # Required stationXML handling object
)
Expand All @@ -19,7 +20,6 @@
from noisepy.seis.numpystore import NumpyCCStore, NumpyStackStore
from noisepy.seis.scedc_s3store import ( # Object to query SCEDC data from on S3
SCEDCS3DataStore,
channel_filter,
)

S3_STORAGE_OPTIONS = {"s3": {"anon": True}}
Expand Down Expand Up @@ -52,10 +52,15 @@ def test_cc_stack(tmp_path, stack_method, substack, cc_method):
# timeframe for analysis
timerange = DateTimeRange(config.start_date, config.end_date)

networks = ["CI"]
stations = "RPV,SVD".split(",")
catalog = XMLStationChannelCatalog(S3_STATION_XML, storage_options=S3_STORAGE_OPTIONS) # Station catalog
raw_store = SCEDCS3DataStore(
S3_DATA, catalog, channel_filter(stations, "BH"), timerange, storage_options=S3_STORAGE_OPTIONS
S3_DATA,
catalog,
channel_filter(networks, stations, ["BHE", "BHN", "BHZ"]),
timerange,
storage_options=S3_STORAGE_OPTIONS,
) # Store for reading raw data from S3 bucket
cc_store = NumpyCCStore(cc_data_path) # Store for writing CC data

Expand Down
18 changes: 17 additions & 1 deletion src/noisepy/seis/channel_filter_store.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import List
from typing import Callable, List

import obspy
from datetimerange import DateTimeRange

from .constants import WILD_CARD
from .datatypes import Channel, ChannelData, Station
from .stores import RawDataStore

Expand Down Expand Up @@ -38,3 +39,18 @@ def get_channels(self, timespan: DateTimeRange) -> List[Channel]:
elif ch.type.location < min_chans[key].type.location:
min_chans[key] = ch
return list(min_chans.values())


def channel_filter(net_list: List[str], sta_list: List[str], cha_list: List[str]) -> Callable[[Channel], bool]:
stations = set(sta_list)
networks = set(net_list)
channels = set(cha_list)

def filter(ch: Channel) -> bool:
return (
(WILD_CARD in stations or ch.station.name in stations)
and (WILD_CARD in networks or ch.station.network in networks)
and (WILD_CARD in channels or ch.type.name in channels)
)

return filter
23 changes: 4 additions & 19 deletions src/noisepy/seis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@

from . import __version__
from .asdfstore import ASDFCCStore, ASDFRawDataStore, ASDFStackStore
from .channel_filter_store import LocationChannelFilterStore
from .channel_filter_store import LocationChannelFilterStore, channel_filter
from .channelcatalog import CSVChannelCatalog, XMLStationChannelCatalog
from .constants import CONFIG_FILE, STATION_FILE, WILD_CARD
from .constants import CONFIG_FILE, STATION_FILE
from .correlate import cross_correlate
from .datatypes import Channel, ConfigParameters
from .datatypes import ConfigParameters
from .fdsn_download import download
from .numpystore import NumpyCCStore, NumpyStackStore
from .scedc_s3store import SCEDCS3DataStore
Expand Down Expand Up @@ -124,21 +124,6 @@ def initialize_params(args, data_dir: str) -> ConfigParameters:
return cpy


def get_channel_filter(net_list: List[str], sta_list: List[str], chan_list: List[str]) -> Callable[[Channel], bool]:
stations = set(sta_list)
networks = set(net_list)
channels = set(chan_list)

def filter(ch: Channel) -> bool:
return (
(WILD_CARD in stations or ch.station.name in stations)
and (WILD_CARD in networks or ch.station.network in networks)
and (WILD_CARD in channels or ch.type.name in channels)
)

return filter


def create_raw_store(args, params: ConfigParameters):
raw_dir = args.raw_data_path

Expand All @@ -162,7 +147,7 @@ def count(pat):
store = SCEDCS3DataStore(
raw_dir,
catalog,
get_channel_filter(params.net_list, params.stations, params.channels),
channel_filter(params.net_list, params.stations, params.channels),
DateTimeRange(params.start_date, params.end_date),
params.storage_options,
)
Expand Down
16 changes: 0 additions & 16 deletions src/noisepy/seis/scedc_s3store.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,6 @@
logger = logging.getLogger(__name__)


def channel_filter(stations: List[str], ch_prefixes: str) -> Callable[[Channel], bool]:
"""
Helper function for creating a channel filter to be used in the constructor of the store.
This filter uses a list of allowed station name along with a channel filter prefix.
"""
sta_set = set(stations)

def filter(ch: Channel) -> bool:
if sta_set == {"*"}:
return ch.type.name.lower().startswith(tuple(ch_prefixes.lower().split(",")))
else:
return ch.station.name in sta_set and ch.type.name.lower().startswith(tuple(ch_prefixes.lower().split(",")))

return filter


class MiniSeedS3DataStore(RawDataStore):
"""
A data store implementation to read from a directory of miniSEED (.ms) files from an S3 bucket.
Expand Down
5 changes: 3 additions & 2 deletions tests/test_scedc_s3store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from datetimerange import DateTimeRange
from test_channelcatalog import MockCatalog

from noisepy.seis.channel_filter_store import channel_filter
from noisepy.seis.datatypes import Channel, ChannelType, Station
from noisepy.seis.scedc_s3store import SCEDCS3DataStore, channel_filter
from noisepy.seis.scedc_s3store import SCEDCS3DataStore

timespan1 = DateTimeRange(datetime(2022, 1, 2, tzinfo=timezone.utc), datetime(2022, 1, 3, tzinfo=timezone.utc))
timespan2 = DateTimeRange(datetime(2021, 2, 3, tzinfo=timezone.utc), datetime(2021, 2, 4, tzinfo=timezone.utc))
Expand Down Expand Up @@ -65,7 +66,7 @@ def test_timespan_channels(store: SCEDCS3DataStore):

def test_filter():
# filter for station 'staX' or 'staY' and channel type starts with 'B'
f = channel_filter(["staX", "staY"], "B")
f = channel_filter(["CI"], ["staX", "staY"], ["BHE", "BBB"])
staX = Station("CI", "staX")
staZ = Station("CI", "staZ")

Expand Down
23 changes: 17 additions & 6 deletions tutorials/noisepy_datastore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"outputs": [],
"source": [
"from noisepy.seis import __version__ # noisepy core functions\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore, channel_filter # Object to query SCEDC data from on S3\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore # Object to query SCEDC data from on S3\n",
"from noisepy.seis.channel_filter_store import channel_filter\n",
"from noisepy.seis.channelcatalog import XMLStationChannelCatalog # Required stationXML handling object\n",
"from datetime import datetime\n",
"from datetimerange import DateTimeRange\n",
Expand Down Expand Up @@ -87,8 +88,8 @@
"\n",
"### S3 DataStore\n",
"Here, we instantiate a `SCEDCS3DataStore` class as `raw_store` as an example of Data Store on the cloud. This variable allows reading data from the real data storage backend during the later processing. The initialization parameters of `SCEDCS3DataStore` are\n",
"- S3_DATA: path to the data in the \"s3://\" format. \n",
"- catalog: path to the station XML available in the \"s3://\" format.\n",
"- S3_DATA: path to the data in the `\"s3://\"` format. \n",
"- catalog: path to the station XML available in the `\"s3://\"` format.\n",
"- channel_filter: channel selection, based on station name and/or channel type.\n",
"- time_range: DateTimeRange of data for processing.\n",
"- storage_option: optimal storage option to read S3 data. This is where you can put AWS keys/credential if applicable.\n",
Expand All @@ -114,7 +115,10 @@
"\n",
"stations = \"SBC,RIO,DEV\".split(\",\") # filter to these stations\n",
"catalog = XMLStationChannelCatalog(S3_STATION_XML, storage_options=S3_STORAGE_OPTIONS) # Station catalog\n",
"raw_store = SCEDCS3DataStore(S3_DATA, catalog, channel_filter(stations, \"BH,EH\"), time_range, \n",
"raw_store = SCEDCS3DataStore(S3_DATA, catalog, \n",
" channel_filter([\"CI\"], stations, [\"BHE\", \"BHN\", \"BHZ\",\n",
" \"EHE\", \"EHN\", \"EHZ\"]), \n",
" time_range, \n",
" storage_options=S3_STORAGE_OPTIONS) # Store for reading raw data from S3 bucket\n",
"raw_store.fs"
]
Expand Down Expand Up @@ -198,8 +202,15 @@
"metadata": {},
"outputs": [],
"source": [
"d.stream.plot()"
"d.stream.plot();"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -212,7 +223,7 @@
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": ".env",
"language": "python",
"name": "python3"
},
Expand Down
12 changes: 7 additions & 5 deletions tutorials/noisepy_ncedc_tutorial.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "PIA2IaqUOeOA"
Expand Down Expand Up @@ -68,7 +67,8 @@
"%autoreload 2\n",
"from noisepy.seis import cross_correlate, stack_cross_correlations, __version__ # noisepy core functions\n",
"from noisepy.seis.asdfstore import ASDFCCStore, ASDFStackStore # Object to store ASDF data within noisepy\n",
"from noisepy.seis.scedc_s3store import NCEDCS3DataStore, channel_filter # Object to query SCEDC data from on S3\n",
"from noisepy.seis.scedc_s3store import NCEDCS3DataStore # Object to query SCEDC data from on S3\n",
"from noisepy.seis.channel_filter_store import channel_filter\n",
"from noisepy.seis.datatypes import CCMethod, ConfigParameters, FreqNorm, RmResp, StackMethod, TimeNorm # Main configuration object\n",
"from noisepy.seis.channelcatalog import XMLStationChannelCatalog # Required stationXML handling object\n",
"import os\n",
Expand Down Expand Up @@ -275,7 +275,9 @@
"source": [
"stations = \"KCT,KRP,KHMB\".split(\",\") # filter to these stations\n",
"catalog = XMLStationChannelCatalog(S3_STATION_XML, \"{network}.{name}.xml\", storage_options=S3_STORAGE_OPTIONS) # Station catalog\n",
"raw_store = NCEDCS3DataStore(S3_DATA, catalog, channel_filter(stations, \"HH\"), timerange, storage_options=S3_STORAGE_OPTIONS) # Store for reading raw data from S3 bucket\n",
"raw_store = NCEDCS3DataStore(S3_DATA, catalog, \n",
" channel_filter(config.net_list, stations, [\"HHE\", \"HHN\", \"HHZ\"]), \n",
" timerange, storage_options=S3_STORAGE_OPTIONS) # Store for reading raw data from S3 bucket\n",
"cc_store = ASDFCCStore(cc_data_path) # Store for writing CC data"
]
},
Expand Down Expand Up @@ -478,7 +480,7 @@
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3",
"display_name": ".env",
"language": "python",
"name": "python3"
},
Expand All @@ -492,7 +494,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
7 changes: 5 additions & 2 deletions tutorials/noisepy_pnwstore_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"source": [
"from noisepy.seis import cross_correlate, stack_cross_correlations, plotting_modules # noisepy core functions\n",
"from noisepy.seis.asdfstore import ASDFCCStore, ASDFStackStore # Object to store ASDF data within noisepy\n",
"from noisepy.seis.scedc_s3store import channel_filter\n",
"from noisepy.seis.channel_filter_store import channel_filter\n",
"from noisepy.seis.pnwstore import PNWDataStore\n",
"from noisepy.seis.datatypes import CCMethod, ConfigParameters, Channel, ChannelData, ChannelType, FreqNorm, RmResp, Station, TimeNorm # Main configuration object\n",
"from noisepy.seis.channelcatalog import XMLStationChannelCatalog # Required stationXML handling object\n",
Expand Down Expand Up @@ -239,7 +239,10 @@
"# CC.PANH..BH\n",
"stations = \"BBO,BABR,SHUK,PANH\".split(\",\") # filter to these stations\n",
"catalog = XMLStationChannelCatalog(STATION_XML, path_format=\"{network}\" + os.path.sep + \"{network}.{name}.xml\")\n",
"raw_store = PNWDataStore(DATA, catalog, DB_PATH, channel_filter(stations, \"BH,HH\"), date_range=range) # Store for reading raw data from S3 bucket\n",
"raw_store = PNWDataStore(DATA, catalog, DB_PATH, \n",
" channel_filter([\"UW\", \"UO\", \"PB\", \"CC\"], stations, \n",
" [\"BHE\", \"BHN\", \"BHZ\",\n",
" \"HHE\", \"HHN\", \"HHZ\"]), date_range=range) # Store for reading raw data from S3 bucket\n",
"cc_store = ASDFCCStore(cc_data_path) # Store for writing CC data\n",
"# print the configuration parameters. Some are chosen by default but we can modify them\n",
"# print(config)"
Expand Down
10 changes: 6 additions & 4 deletions tutorials/noisepy_scedc_tutorial.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "PIA2IaqUOeOA"
Expand Down Expand Up @@ -68,7 +67,8 @@
"%autoreload 2\n",
"from noisepy.seis import cross_correlate, stack_cross_correlations, __version__ # noisepy core functions\n",
"from noisepy.seis.asdfstore import ASDFCCStore, ASDFStackStore # Object to store ASDF data within noisepy\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore, channel_filter # Object to query SCEDC data from on S3\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore # Object to query SCEDC data from on S3\n",
"from noisepy.seis.channel_filter_store import channel_filter\n",
"from noisepy.seis.datatypes import CCMethod, ConfigParameters, FreqNorm, RmResp, StackMethod, TimeNorm # Main configuration object\n",
"from noisepy.seis.channelcatalog import XMLStationChannelCatalog # Required stationXML handling object\n",
"import os\n",
Expand Down Expand Up @@ -289,7 +289,9 @@
"# config.load_stations()\n",
"\n",
"catalog = XMLStationChannelCatalog(S3_STATION_XML, storage_options=S3_STORAGE_OPTIONS) # Station catalog\n",
"raw_store = SCEDCS3DataStore(S3_DATA, catalog, channel_filter(config.stations, \"BH\"), timerange, storage_options=S3_STORAGE_OPTIONS) # Store for reading raw data from S3 bucket\n",
"raw_store = SCEDCS3DataStore(S3_DATA, catalog, \n",
" channel_filter(config.net_list, config.stations, [\"BHE\", \"BHN\", \"BHZ\"]), \n",
" timerange, storage_options=S3_STORAGE_OPTIONS) # Store for reading raw data from S3 bucket\n",
"cc_store = ASDFCCStore(cc_data_path) # Store for writing CC data"
]
},
Expand Down Expand Up @@ -483,7 +485,7 @@
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3",
"display_name": ".env",
"language": "python",
"name": "python3"
},
Expand Down
3 changes: 2 additions & 1 deletion tutorials/run_mpi_scedc.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"source": [
"from noisepy.seis import cross_correlate, stack, plotting_modules # noisepy core functions\n",
"from noisepy.seis.asdfstore import ASDFCCStore # Object to store ASDF data within noisepy\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore, channel_filter # Object to query SCEDC data from on S3\n",
"from noisepy.seis.scedc_s3store import SCEDCS3DataStore # Object to query SCEDC data from on S3\n",
"from noisepy.seis.channel_filter_store import channel_filter\n",
"from noisepy.seis.datatypes import ConfigParameters, FreqNorm # Main configuration object\n",
"from noisepy.seis.channelcatalog import XMLStationChannelCatalog # Required stationXML handling object\n",
"import os\n",
Expand Down

0 comments on commit 6a44a88

Please sign in to comment.