diff --git a/justfile b/justfile index 51f12e5..c466eb1 100644 --- a/justfile +++ b/justfile @@ -102,6 +102,26 @@ main-mb05 *more_args="": # --max-segments=5 \ # --output-dir=/Volumes/PAM_Analysis/pypam-space/test_output \ +# Exercise program with `gs://` URIs +main-google date='20200101' *more_args="--max-segments=5": + #!/usr/bin/env bash + WS=noaa-passive-bioacoustic_nrs_11_2019-2021 + mkdir -p $WS/DOWNLOADS + mkdir -p $WS/OUTPUT + PYTHONPATH=. EXCLUDE_LOG_TIME=yes \ + python src/main.py \ + --date={{date}} \ + --gs \ + --json-base-dir=$WS \ + --voltage-multiplier=1 \ + --sensitivity-flat-value=1 \ + --output-dir=$WS/OUTPUT \ + --output-prefix=NRS_ \ + --download-dir=$WS/DOWNLOADS \ + --retain-downloaded-files \ + {{more_args}} +# --assume-downloaded-files \ + # Basic test for cloud processing main-cloud-basic-test max_segments="1" date="20220902": #!/usr/bin/env bash diff --git a/src/file_helper.py b/src/file_helper.py index 4eed1a8..edcde55 100644 --- a/src/file_helper.py +++ b/src/file_helper.py @@ -35,9 +35,10 @@ def __init__( audio_base_dir: Optional[str], audio_path_map_prefix: str, audio_path_prefix: str, - s3_client: Optional[BaseClient], download_dir: Optional[str], assume_downloaded_files: bool, + s3_client: Optional[BaseClient] = None, + gs_client: Optional[GsClient] = None, ): self.logger = logger @@ -47,6 +48,7 @@ def __init__( self.audio_base_dir = audio_base_dir self.audio_path_prefix = audio_path_prefix self.s3_client = s3_client + self.gs_client = gs_client self.download_dir: str = download_dir if download_dir else "." self.assume_downloaded_files = assume_downloaded_files @@ -83,6 +85,7 @@ def _get_sound_filename(self) -> Optional[str]: self.download_dir, self.assume_downloaded_files, self.s3_client, + self.gs_client, ) # otherwise assuming local file, so we only inspect the `path` attribute: @@ -99,7 +102,9 @@ def remove_downloaded_file(self): if not pathlib.Path(self.sound_filename).exists(): return - if self.s3_client is None or self.parsed_uri.scheme != "s3": + if ( + self.s3_client is None and self.gs_client is None + ) or self.parsed_uri.scheme not in ("s3", "gs"): self.logger.debug(f"No file download involved for {self.uri=}") return @@ -294,6 +299,7 @@ def get_local_filename(self, uri: Optional[str]) -> Optional[str]: self.download_dir, self.assume_downloaded_files, self.s3_client, + self.gs_client, ) return parsed_uri.path @@ -337,6 +343,7 @@ def _get_json_s3(self, parsed_uri: ParseResult) -> Optional[str]: self.download_dir, self.assume_downloaded_files, self.s3_client, + self.gs_client, ) if local_filename is None: return None @@ -467,9 +474,10 @@ def _get_sound_status(self, uri: str) -> SoundStatus: audio_base_dir=self.audio_base_dir, audio_path_map_prefix=self.audio_path_map_prefix, audio_path_prefix=self.audio_path_prefix, - s3_client=self.s3_client, download_dir=self.download_dir, assume_downloaded_files=self.assume_downloaded_files, + s3_client=self.s3_client, + gs_client=self.gs_client, ) self.sound_cache[uri] = ss else: diff --git a/src/json_support.py b/src/json_support.py index 1d28e88..763cb76 100644 --- a/src/json_support.py +++ b/src/json_support.py @@ -72,6 +72,8 @@ def get_intersecting_entries( Gets the list of intersecting entries for the UTC "start minute" given by (year, month, day, at_hour, at_minute). + :param logger: + The logger to be used :param json_entries: JSON entries for the day :param year: @@ -90,6 +92,12 @@ def get_intersecting_entries( :return: The list of intersecting entries """ + # for logging purposes: + time_spec = ( + f"year={year} month={month} day={day} at_hour={at_hour} at_minute={at_minute}" + ) + logger.debug(f"get_intersecting_entries: {time_spec} {len(json_entries)=}") + # the requested start minute as datetime: dt = datetime(year, month, day, at_hour, at_minute, tzinfo=timezone.utc) # the start of the requested start minute in seconds: @@ -116,14 +124,14 @@ def get_intersecting_entries( ) tot_duration_secs += duration_secs - # for logging purposes: - time_spec = ( - f"year={year} month={month} day={day} at_hour={at_hour} at_minute={at_minute}" - ) - - if logger.is_enabled_for(logging.DEBUG): + warning = 0 == len(intersecting_entries) + if warning or logger.is_enabled_for(logging.DEBUG): uris = [i.entry.uri for i in intersecting_entries] uris_str = "\n ".join([f"[{e}] {uri}" for e, uri in enumerate(uris)]) - logger.debug(f"{time_spec}: intersection uris({len(uris)}):\n {uris_str}") + msg = f"{time_spec}: intersection uris({len(uris)}):\n {uris_str}" + if warning: + logger.warn(msg) + else: + logger.debug(msg) return intersecting_entries diff --git a/src/main.py b/src/main.py index 10e964b..3772379 100644 --- a/src/main.py +++ b/src/main.py @@ -35,7 +35,7 @@ def main(opts): s3_client = boto3.client("s3", **kwargs) gs_client = None - if opts.s3: + if opts.gs: # pylint: disable=import-outside-toplevel from google.cloud.storage import Client as GsClient