From c078ac549c03e982fa8fdd8e9ccd0dbfbf66ea62 Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Wed, 2 Oct 2024 16:37:38 -0700 Subject: [PATCH 1/6] Basic progress and data estimation implementation. --- doc/devlog/2024-08-26-prism-adrio-demo.ipynb | 44 +++-- epymorph/adrio/prism.py | 166 +++++++++++++++---- 2 files changed, 170 insertions(+), 40 deletions(-) diff --git a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb index a5685e14..b9322c29 100644 --- a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb +++ b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb @@ -47,9 +47,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " |####################| 100% (10.638s)\n", + "Loading epymorph.adrio.prism.Precipitation:\n", + " |####################| 100% (2.736s)\n" + ] + } + ], "source": [ "from epymorph.geography.us_census import CountyScope\n", "from epymorph import *\n", @@ -76,13 +87,13 @@ " \"precipitation\": prism.Precipitation(time_period),\n", " },\n", ")\n", - "\n", - "precipitation = evaluate_param(rume, \"precipitation\")" + "with sim_messaging():\n", + " precipitation = evaluate_param(rume, \"precipitation\")\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -164,9 +175,18 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading epymorph.adrio.prism.Temperature:\n", + " |####################| 100% (81.437s)\n" + ] + } + ], "source": [ "from epymorph.geography.us_census import StateScope\n", "from epymorph import *\n", @@ -191,20 +211,20 @@ " \"temperature\": prism.Temperature(time_period, \"Mean\"),\n", " },\n", ")\n", - "\n", - "temperature = evaluate_param(rume, \"temperature\")" + "with sim_messaging():\n", + " temperature = evaluate_param(rume, \"temperature\")" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "States: ['04']\n", + "Counties: ['04']\n", "\n", "Temperature in Celsius:\n", " [[18.10199928]\n", @@ -242,7 +262,7 @@ } ], "source": [ - "print(f\"States: {geoids}\\n\")\n", + "print(f\"Counties: {geoids}\\n\")\n", "\n", "print(f\"Temperature in Celsius:\\n {temperature}\\n\")" ] diff --git a/epymorph/adrio/prism.py b/epymorph/adrio/prism.py index 0d8188e9..ef8dcb87 100644 --- a/epymorph/adrio/prism.py +++ b/epymorph/adrio/prism.py @@ -10,20 +10,53 @@ from numpy.typing import NDArray from typing_extensions import override -from epymorph.adrio.adrio import Adrio -from epymorph.cache import load_or_fetch_url, module_cache_path +from epymorph.adrio.adrio import Adrio, ProgressCallback, adrio_cache +from epymorph.cache import check_file_in_cache, load_or_fetch_url, module_cache_path from epymorph.data_shape import Shapes from epymorph.data_type import CentroidType +from epymorph.data_usage import DataEstimate from epymorph.error import DataResourceException from epymorph.geography.scope import GeoScope from epymorph.geography.us_census import STATE, CensusScope +from epymorph.geography.us_tiger import CacheEstimate, _url_to_cache_path from epymorph.simulation import AttributeDef, TimeFrame _PRISM_CACHE_PATH = module_cache_path(__name__) +def _generate_file_name( + attribute: str, + latest_date: datetype, + last_completed_month: datetype, + date: datetype, +) -> tuple[str, str, str]: + """ + Generates the url for the given date and climate attribute. Returns a tuple + of strings with the url, stability, and formatted date for other file name usage. + """ + + if date.year == latest_date.year and date.month == latest_date.month: + stability = "early" + + # if it is before the last finished month + elif date > last_completed_month: + stability = "provisional" + + # if it is older than 6 completed months + else: + stability = "stable" + + # format the date for the url + formatted_date = date.strftime("%Y%m%d") + year = date.year + + url = f"https://ftp.prism.oregonstate.edu/daily/{attribute}/{year}/PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.zip" + + return url, stability, formatted_date + + def _fetch_raster( - attribute: str, date_range: TimeFrame + attribute: str, date_range: TimeFrame, progress: ProgressCallback ) -> Generator[BytesIO, None, None]: """ Fetches the raster values at the url with the given attribute and date range. @@ -43,26 +76,14 @@ def _fetch_raster( six_months_ago = datetype.today() + relativedelta(months=-6) last_completed_month = six_months_ago.replace(day=1) - timedelta(days=1) - for single_date in date_list: - if ( - single_date.year == latest_date.year - and single_date.month == latest_date.month - ): - stability = "early" - - # if it is before the last finished month - elif single_date > last_completed_month: - stability = "provisional" - - # if it is older than 6 completed months - else: - stability = "stable" - - # format the date for the url - formatted_date = single_date.strftime("%Y%m%d") - year = single_date.year + # for progress tracking + processing_steps = len(date_list) + 1 - url = f"https://ftp.prism.oregonstate.edu/daily/{attribute}/{year}/PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.zip" + # include i in the loop, representing the processing steps + for i, single_date in enumerate(date_list): + url, stability, formatted_date = _generate_file_name( + attribute, latest_date, last_completed_month, single_date + ) # load/fetch the url for the file try: @@ -73,16 +94,22 @@ def _fetch_raster( file.name = f"PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.bil" + # if the progress isnt None + if progress is not None: + # incremement progress (i + 1), and divide by the processing steps + progress((i + 1) / processing_steps, None) + # (representing the progress by how many total files have been completed) + yield file def _make_centroid_strategy_adrio( - attribute: str, date: TimeFrame, centroids: NDArray + attribute: str, date: TimeFrame, centroids: NDArray, progress: ProgressCallback ) -> NDArray[np.float64]: """ Retrieves the raster value at a centroid of a granularity. """ - raster_files = _fetch_raster(attribute, date) + raster_files = _fetch_raster(attribute, date, progress) results = [] # read in each file @@ -128,6 +155,65 @@ def _validate_scope(scope: GeoScope) -> CensusScope: return scope +def _estimate_size( + file_size: int, date_range: TimeFrame, attribute: str +) -> CacheEstimate: + """ + Estimate the size of all of the files, whether in cache or fetching. + """ + # setup urls as list to check if theyre in the cache + + # set up date variables to check stability + first_day = date_range.start_date + last_day = date_range.end_date + latest_date = datetype.today() - timedelta(days=1) + + # the stability of PRISM data is defined by date, specified around the 6 month mark + six_months_ago = datetype.today() + relativedelta(months=-6) + last_completed_month = six_months_ago.replace(day=1) - timedelta(days=1) + + date_list = [ + first_day + timedelta(days=x) for x in range((last_day - first_day).days + 1) + ] + + urls, _, _ = [ + _generate_file_name(attribute, latest_date, last_completed_month, day) + for day in date_list + ] + + total_files = date_range.duration_days + missing_files = total_files - sum( + 1 for u in urls if check_file_in_cache(_url_to_cache_path(u)) + ) + + est_file_size = Literal[file_size] + + return CacheEstimate( + total_cache_size=total_files * est_file_size, + missing_cache_size=missing_files * est_file_size, + ) + + +def _estimate_data( + self, file_size: int, date_range: TimeFrame, attribute: str +) -> DataEstimate: + """ + Grab estimates for the PRISM simulation. + """ + est = _estimate_size(file_size, date_range, attribute) + + key = "" + return DataEstimate( + name=self.full_name, + cache_key=key, + new_network_bytes=est.missing_cache_size, + new_cache_bytes=est.missing_cache_size, + total_cache_bytes=est.total_cache_size, + max_bandwidth=None, + ) + + +@adrio_cache class Precipitation(Adrio[np.float64]): """ Creates an TxN matrix of floats representing the amount of precipitation in an area, @@ -141,12 +227,19 @@ class Precipitation(Adrio[np.float64]): def __init__(self, date_range: TimeFrame): self.date_range = _validate_dates(date_range) + # estimating data simulation + def estimate_sim(self) -> DataEstimate: + file_size = 1_200_00 + return _estimate_data(self, file_size, self.date_range, "ppt") + @override def evaluate_adrio(self) -> NDArray[np.float64]: scope = self.scope scope = _validate_scope(scope) centroids = self.data("centroid") - raster_vals = _make_centroid_strategy_adrio("ppt", self.date_range, centroids) + raster_vals = _make_centroid_strategy_adrio( + "ppt", self.date_range, centroids, self.progress + ) return raster_vals @@ -163,13 +256,18 @@ class DewPoint(Adrio[np.float64]): def __init__(self, date_range: TimeFrame): self.date_range = _validate_dates(date_range) + # estimating data simulation + def estimate_sim(self) -> DataEstimate: + file_size = 1_700_00 + return _estimate_data(self, file_size, self.date_range, "tdmean") + @override def evaluate_adrio(self) -> NDArray[np.float64]: scope = self.scope scope = _validate_scope(scope) centroids = self.data("centroid") raster_vals = _make_centroid_strategy_adrio( - "tdmean", self.date_range, centroids + "tdmean", self.date_range, centroids, self.progress ) return raster_vals @@ -198,6 +296,11 @@ def __init__(self, date_range: TimeFrame, temp_var: TemperatureType): self.temp_var = temp_var self.date_range = _validate_dates(date_range) + # estimating data simulation + def estimate_sim(self) -> DataEstimate: + file_size = 1_700_00 + return _estimate_data(self, file_size, self.date_range, self.temp_var) + @override def evaluate_adrio(self) -> NDArray[np.float64]: scope = self.scope @@ -205,7 +308,7 @@ def evaluate_adrio(self) -> NDArray[np.float64]: temp_var = self.temp_variables[self.temp_var] centroids = self.data("centroid") raster_vals = _make_centroid_strategy_adrio( - temp_var, self.date_range, centroids + temp_var, self.date_range, centroids, self.progress ) return raster_vals @@ -231,11 +334,18 @@ def __init__(self, date_range: TimeFrame, vpd_var: VPDType): self.vpd_var = vpd_var self.date_range = _validate_dates(date_range) + # estimating data simulation + def estimate_sim(self) -> DataEstimate: + file_size = 1_600_00 + return _estimate_data(self, file_size, self.date_range, self.vpd_var) + @override def evaluate_adrio(self) -> NDArray[np.float64]: scope = self.scope scope = _validate_scope(scope) vpd_var = self.vpd_variables[self.vpd_var] centroids = self.data("centroid") - raster_vals = _make_centroid_strategy_adrio(vpd_var, self.date_range, centroids) + raster_vals = _make_centroid_strategy_adrio( + vpd_var, self.date_range, centroids, self.progress + ) return raster_vals From 304d8cbc02cebfe6ea00e36b4ede6eb3ce13defe Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Fri, 4 Oct 2024 09:45:54 -0700 Subject: [PATCH 2/6] Corrected estimate_data and functions. --- doc/devlog/2024-08-26-prism-adrio-demo.ipynb | 60 ++++++++++++--- epymorph/adrio/prism.py | 79 ++++++++++---------- 2 files changed, 87 insertions(+), 52 deletions(-) diff --git a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb index b9322c29..ea3ab1f2 100644 --- a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb +++ b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb @@ -47,17 +47,19 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", - " |####################| 100% (10.638s)\n", - "Loading epymorph.adrio.prism.Precipitation:\n", - " |####################| 100% (2.736s)\n" + "ADRIO data usage estimation:\n", + "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", + "- epymorph.adrio.prism.Precipitation will download 1.2 MB and write 1.2 MB to disk\n", + "In total we will:\n", + "- Download 1.2 MB, taking a second (assuming 1.0 MB/s)\n", + "- Write 1.2 MB to disk cache (you have 249.2 GB free space)\n" ] } ], @@ -88,12 +90,22 @@ " },\n", ")\n", "with sim_messaging():\n", - " precipitation = evaluate_param(rume, \"precipitation\")\n" + " rume.estimate_data()\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "with sim_messaging():\n", + " precipitation = evaluate_param(rume, \"precipitation\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -175,19 +187,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading epymorph.adrio.prism.Temperature:\n", - " |####################| 100% (81.437s)\n" + "ADRIO data usage estimation:\n", + "- epymorph.adrio.prism.Temperature will download 51.0 MB and write 51.0 MB to disk\n", + "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", + "In total we will:\n", + "- Download 51.0 MB, taking 51 seconds (assuming 1.0 MB/s)\n", + "- Write 51.0 MB to disk cache (you have 249.2 GB free space)\n" ] } ], "source": [ + "from epymorph.rume import estimate_report\n", "from epymorph.geography.us_census import StateScope\n", "from epymorph import *\n", "from epymorph.adrio import acs5, us_tiger\n", @@ -206,18 +223,37 @@ " scope=state_scope,\n", " time_frame=time_period,\n", " params={\n", + " \"temperature\": prism.Temperature(time_period, \"Mean\"),\n", " \"population\": acs5.Population(),\n", " \"centroid\": us_tiger.GeometricCentroid(),\n", - " \"temperature\": prism.Temperature(time_period, \"Mean\"),\n", " },\n", ")\n", + "with sim_messaging():\n", + " rume.estimate_data()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading epymorph.adrio.prism.Temperature:\n", + " |####################| 100% (0.499s)\n" + ] + } + ], + "source": [ "with sim_messaging():\n", " temperature = evaluate_param(rume, \"temperature\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/epymorph/adrio/prism.py b/epymorph/adrio/prism.py index ef8dcb87..7766d8f4 100644 --- a/epymorph/adrio/prism.py +++ b/epymorph/adrio/prism.py @@ -98,7 +98,6 @@ def _fetch_raster( if progress is not None: # incremement progress (i + 1), and divide by the processing steps progress((i + 1) / processing_steps, None) - # (representing the progress by how many total files have been completed) yield file @@ -155,53 +154,44 @@ def _validate_scope(scope: GeoScope) -> CensusScope: return scope -def _estimate_size( - file_size: int, date_range: TimeFrame, attribute: str -) -> CacheEstimate: +def _estimate_prism( + self, file_size: int, date_range: TimeFrame, attribute: str +) -> DataEstimate: """ - Estimate the size of all of the files, whether in cache or fetching. + Grab estimates for the PRISM simulation. """ + est_file_size = file_size + total_files = date_range.duration_days + # setup urls as list to check if theyre in the cache - # set up date variables to check stability + # setup date variables first_day = date_range.start_date last_day = date_range.end_date latest_date = datetype.today() - timedelta(days=1) - - # the stability of PRISM data is defined by date, specified around the 6 month mark six_months_ago = datetype.today() + relativedelta(months=-6) last_completed_month = six_months_ago.replace(day=1) - timedelta(days=1) - date_list = [ first_day + timedelta(days=x) for x in range((last_day - first_day).days + 1) ] - urls, _, _ = [ - _generate_file_name(attribute, latest_date, last_completed_month, day) + # get url names to check in cache + urls = [ + _generate_file_name(attribute, latest_date, last_completed_month, day)[0] for day in date_list ] - total_files = date_range.duration_days + # sum the missing files missing_files = total_files - sum( 1 for u in urls if check_file_in_cache(_url_to_cache_path(u)) ) - est_file_size = Literal[file_size] - - return CacheEstimate( + # calculate the cache estimate + est = CacheEstimate( total_cache_size=total_files * est_file_size, missing_cache_size=missing_files * est_file_size, ) - -def _estimate_data( - self, file_size: int, date_range: TimeFrame, attribute: str -) -> DataEstimate: - """ - Grab estimates for the PRISM simulation. - """ - est = _estimate_size(file_size, date_range, attribute) - key = "" return DataEstimate( name=self.full_name, @@ -227,10 +217,10 @@ class Precipitation(Adrio[np.float64]): def __init__(self, date_range: TimeFrame): self.date_range = _validate_dates(date_range) - # estimating data simulation - def estimate_sim(self) -> DataEstimate: - file_size = 1_200_00 - return _estimate_data(self, file_size, self.date_range, "ppt") + def estimate_data(self) -> DataEstimate: + file_size = 1_200_000 + est = _estimate_prism(self, file_size, self.date_range, "ppt") + return est @override def evaluate_adrio(self) -> NDArray[np.float64]: @@ -256,10 +246,13 @@ class DewPoint(Adrio[np.float64]): def __init__(self, date_range: TimeFrame): self.date_range = _validate_dates(date_range) - # estimating data simulation - def estimate_sim(self) -> DataEstimate: - file_size = 1_700_00 - return _estimate_data(self, file_size, self.date_range, "tdmean") + def estimate_data(self) -> DataEstimate: + year = self.date_range.end_date.year + if year > 2020: + file_size = 1_800_000 + else: + file_size = 1_400_000 + return _estimate_prism(self, file_size, self.date_range, "tdmean") @override def evaluate_adrio(self) -> NDArray[np.float64]: @@ -296,10 +289,13 @@ def __init__(self, date_range: TimeFrame, temp_var: TemperatureType): self.temp_var = temp_var self.date_range = _validate_dates(date_range) - # estimating data simulation - def estimate_sim(self) -> DataEstimate: - file_size = 1_700_00 - return _estimate_data(self, file_size, self.date_range, self.temp_var) + def estimate_data(self) -> DataEstimate: + year = self.date_range.end_date.year + if year > 2020: + file_size = 1_700_000 + else: + file_size = 1_400_000 + return _estimate_prism(self, file_size, self.date_range, self.temp_var) @override def evaluate_adrio(self) -> NDArray[np.float64]: @@ -334,10 +330,13 @@ def __init__(self, date_range: TimeFrame, vpd_var: VPDType): self.vpd_var = vpd_var self.date_range = _validate_dates(date_range) - # estimating data simulation - def estimate_sim(self) -> DataEstimate: - file_size = 1_600_00 - return _estimate_data(self, file_size, self.date_range, self.vpd_var) + def estimate_data(self) -> DataEstimate: + year = self.date_range.end_date.year + if year > 2020: + file_size = 1_700_000 + else: + file_size = 1_300_000 + return _estimate_prism(self, file_size, self.date_range, self.vpd_var) @override def evaluate_adrio(self) -> NDArray[np.float64]: From 14aa35517ece8e322700e08d174a5f09227c1ae2 Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Fri, 4 Oct 2024 10:49:54 -0700 Subject: [PATCH 3/6] Fixed caching estimation. --- doc/devlog/2024-08-26-prism-adrio-demo.ipynb | 37 +++++++++++++++----- epymorph/adrio/prism.py | 23 ++++++------ 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb index ea3ab1f2..5ea61d34 100644 --- a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb +++ b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -95,9 +95,20 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " |####################| 100% (15.719s)\n", + "Loading epymorph.adrio.prism.Precipitation:\n", + " |####################| 100% (1.521s)\n" + ] + } + ], "source": [ "with sim_messaging():\n", " precipitation = evaluate_param(rume, \"precipitation\")" @@ -105,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -187,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -204,7 +215,6 @@ } ], "source": [ - "from epymorph.rume import estimate_report\n", "from epymorph.geography.us_census import StateScope\n", "from epymorph import *\n", "from epymorph.adrio import acs5, us_tiger\n", @@ -234,15 +244,24 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " | | 0% \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " |####################| 100% (2.716s)\n", "Loading epymorph.adrio.prism.Temperature:\n", - " |####################| 100% (0.499s)\n" + " |####################| 100% (24.449s)\n" ] } ], @@ -253,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": {}, "outputs": [ { diff --git a/epymorph/adrio/prism.py b/epymorph/adrio/prism.py index 7766d8f4..76b38d5d 100644 --- a/epymorph/adrio/prism.py +++ b/epymorph/adrio/prism.py @@ -18,7 +18,7 @@ from epymorph.error import DataResourceException from epymorph.geography.scope import GeoScope from epymorph.geography.us_census import STATE, CensusScope -from epymorph.geography.us_tiger import CacheEstimate, _url_to_cache_path +from epymorph.geography.us_tiger import CacheEstimate from epymorph.simulation import AttributeDef, TimeFrame _PRISM_CACHE_PATH = module_cache_path(__name__) @@ -29,7 +29,7 @@ def _generate_file_name( latest_date: datetype, last_completed_month: datetype, date: datetype, -) -> tuple[str, str, str]: +) -> tuple[str, str]: """ Generates the url for the given date and climate attribute. Returns a tuple of strings with the url, stability, and formatted date for other file name usage. @@ -52,7 +52,9 @@ def _generate_file_name( url = f"https://ftp.prism.oregonstate.edu/daily/{attribute}/{year}/PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.zip" - return url, stability, formatted_date + bil_name = f"PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.bil" + + return url, bil_name def _fetch_raster( @@ -81,7 +83,7 @@ def _fetch_raster( # include i in the loop, representing the processing steps for i, single_date in enumerate(date_list): - url, stability, formatted_date = _generate_file_name( + url, bil_name = _generate_file_name( attribute, latest_date, last_completed_month, single_date ) @@ -92,11 +94,11 @@ def _fetch_raster( except Exception as e: raise DataResourceException("Unable to fetch PRISM data.") from e - file.name = f"PRISM_{attribute}_{stability}_4kmD2_{formatted_date}_bil.bil" + file.name = bil_name # if the progress isnt None if progress is not None: - # incremement progress (i + 1), and divide by the processing steps + # progress by one, increasing percentage done progress((i + 1) / processing_steps, None) yield file @@ -181,9 +183,9 @@ def _estimate_prism( for day in date_list ] - # sum the missing files + # sum the files needed to download missing_files = total_files - sum( - 1 for u in urls if check_file_in_cache(_url_to_cache_path(u)) + 1 for u in urls if check_file_in_cache(_PRISM_CACHE_PATH / Path(u).name) ) # calculate the cache estimate @@ -192,7 +194,7 @@ def _estimate_prism( missing_cache_size=missing_files * est_file_size, ) - key = "" + key = f"prism:{attribute}:{date_range}" return DataEstimate( name=self.full_name, cache_key=key, @@ -291,11 +293,12 @@ def __init__(self, date_range: TimeFrame, temp_var: TemperatureType): def estimate_data(self) -> DataEstimate: year = self.date_range.end_date.year + temp_var = self.temp_variables[self.temp_var] if year > 2020: file_size = 1_700_000 else: file_size = 1_400_000 - return _estimate_prism(self, file_size, self.date_range, self.temp_var) + return _estimate_prism(self, file_size, self.date_range, temp_var) @override def evaluate_adrio(self) -> NDArray[np.float64]: From ed151e988efbbed3a720dbb22ea958486f807571 Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Fri, 4 Oct 2024 18:18:22 -0700 Subject: [PATCH 4/6] Minor comment changes. --- epymorph/adrio/prism.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/epymorph/adrio/prism.py b/epymorph/adrio/prism.py index 76b38d5d..67ceb6d4 100644 --- a/epymorph/adrio/prism.py +++ b/epymorph/adrio/prism.py @@ -32,7 +32,7 @@ def _generate_file_name( ) -> tuple[str, str]: """ Generates the url for the given date and climate attribute. Returns a tuple - of strings with the url, stability, and formatted date for other file name usage. + of strings with the url and the name of the bil file within the zip file. """ if date.year == latest_date.year and date.month == latest_date.month: @@ -81,7 +81,6 @@ def _fetch_raster( # for progress tracking processing_steps = len(date_list) + 1 - # include i in the loop, representing the processing steps for i, single_date in enumerate(date_list): url, bil_name = _generate_file_name( attribute, latest_date, last_completed_month, single_date @@ -94,13 +93,13 @@ def _fetch_raster( except Exception as e: raise DataResourceException("Unable to fetch PRISM data.") from e - file.name = bil_name - # if the progress isnt None if progress is not None: # progress by one, increasing percentage done progress((i + 1) / processing_steps, None) + file.name = bil_name + yield file @@ -160,7 +159,7 @@ def _estimate_prism( self, file_size: int, date_range: TimeFrame, attribute: str ) -> DataEstimate: """ - Grab estimates for the PRISM simulation. + Calculate estimates for downloading PRISM files. """ est_file_size = file_size total_files = date_range.duration_days @@ -220,7 +219,7 @@ def __init__(self, date_range: TimeFrame): self.date_range = _validate_dates(date_range) def estimate_data(self) -> DataEstimate: - file_size = 1_200_000 + file_size = 1_200_000 # no significant change in size, average to about 1.2MB est = _estimate_prism(self, file_size, self.date_range, "ppt") return est @@ -250,10 +249,12 @@ def __init__(self, date_range: TimeFrame): def estimate_data(self) -> DataEstimate: year = self.date_range.end_date.year + + # file sizes are larger after the year 2020 if year > 2020: - file_size = 1_800_000 + file_size = 1_800_000 # average to 1.8MB after 2020 else: - file_size = 1_400_000 + file_size = 1_400_000 # average to 1.4MB 2020 and before return _estimate_prism(self, file_size, self.date_range, "tdmean") @override @@ -270,7 +271,7 @@ def evaluate_adrio(self) -> NDArray[np.float64]: class Temperature(Adrio[np.float64]): """ Creates an TxN matrix of floats representing the temperature in an area, represented - in degrees Celsius (°C). + in degrees Celsius (°C). """ date_range: TimeFrame @@ -294,10 +295,12 @@ def __init__(self, date_range: TimeFrame, temp_var: TemperatureType): def estimate_data(self) -> DataEstimate: year = self.date_range.end_date.year temp_var = self.temp_variables[self.temp_var] + + # file sizes are larger after the year 2020 if year > 2020: - file_size = 1_700_000 + file_size = 1_700_000 # average to 1.7MB after 2020 else: - file_size = 1_400_000 + file_size = 1_400_000 # average to 1.4MB 2020 and before return _estimate_prism(self, file_size, self.date_range, temp_var) @override @@ -316,7 +319,7 @@ def evaluate_adrio(self) -> NDArray[np.float64]: class VaporPressureDeficit(Adrio[np.float64]): """ Creates an TxN matrix of floats representing the vapor pressure deficit in an area, - represented in hectopascals (hPa). + represented in hectopascals (hPa). """ date_range: TimeFrame @@ -335,10 +338,12 @@ def __init__(self, date_range: TimeFrame, vpd_var: VPDType): def estimate_data(self) -> DataEstimate: year = self.date_range.end_date.year + + # file sizes are larger after the year 2020 if year > 2020: - file_size = 1_700_000 + file_size = 1_700_000 # average to 1.7MB after 2020 else: - file_size = 1_300_000 + file_size = 1_300_000 # average to 1.3MB 2020 and before return _estimate_prism(self, file_size, self.date_range, self.vpd_var) @override From 625d69c5a5d24583596ef85bfef5691e9e91aa3b Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Fri, 4 Oct 2024 21:38:41 -0700 Subject: [PATCH 5/6] Removed estimate_data in devlog. --- doc/devlog/2024-08-26-prism-adrio-demo.ipynb | 75 +++----------------- 1 file changed, 11 insertions(+), 64 deletions(-) diff --git a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb index 5ea61d34..7ac30ea9 100644 --- a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb +++ b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb @@ -54,12 +54,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "ADRIO data usage estimation:\n", - "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", - "- epymorph.adrio.prism.Precipitation will download 1.2 MB and write 1.2 MB to disk\n", - "In total we will:\n", - "- Download 1.2 MB, taking a second (assuming 1.0 MB/s)\n", - "- Write 1.2 MB to disk cache (you have 249.2 GB free space)\n" + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " |####################| 100% (11.971s)\n", + "Loading epymorph.adrio.prism.Precipitation:\n", + " |####################| 100% (0.227s)\n" ] } ], @@ -90,28 +88,7 @@ " },\n", ")\n", "with sim_messaging():\n", - " rume.estimate_data()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", - " |####################| 100% (15.719s)\n", - "Loading epymorph.adrio.prism.Precipitation:\n", - " |####################| 100% (1.521s)\n" - ] - } - ], - "source": [ - "with sim_messaging():\n", - " precipitation = evaluate_param(rume, \"precipitation\")" + " precipitation = evaluate_param(rume, \"precipitation\")\n" ] }, { @@ -198,19 +175,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "ADRIO data usage estimation:\n", - "- epymorph.adrio.prism.Temperature will download 51.0 MB and write 51.0 MB to disk\n", - "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", - "In total we will:\n", - "- Download 51.0 MB, taking 51 seconds (assuming 1.0 MB/s)\n", - "- Write 51.0 MB to disk cache (you have 249.2 GB free space)\n" + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " |####################| 100% (3.046s)\n", + "Loading epymorph.adrio.prism.Temperature:\n", + " |####################| 100% (22.394s)\n" ] } ], @@ -238,41 +213,13 @@ " \"centroid\": us_tiger.GeometricCentroid(),\n", " },\n", ")\n", - "with sim_messaging():\n", - " rume.estimate_data()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", - " | | 0% \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " |####################| 100% (2.716s)\n", - "Loading epymorph.adrio.prism.Temperature:\n", - " |####################| 100% (24.449s)\n" - ] - } - ], - "source": [ "with sim_messaging():\n", " temperature = evaluate_param(rume, \"temperature\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ { From c21652b6b60814dcefecbb4d0dedd9792e77f371 Mon Sep 17 00:00:00 2001 From: Meaghan Freund Date: Fri, 4 Oct 2024 21:45:15 -0700 Subject: [PATCH 6/6] Reformatted estimate_data in devlog. --- doc/devlog/2024-08-26-prism-adrio-demo.ipynb | 61 +++++++++++++++----- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb index 7ac30ea9..35b66f31 100644 --- a/doc/devlog/2024-08-26-prism-adrio-demo.ipynb +++ b/doc/devlog/2024-08-26-prism-adrio-demo.ipynb @@ -47,17 +47,19 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", - " |####################| 100% (11.971s)\n", - "Loading epymorph.adrio.prism.Precipitation:\n", - " |####################| 100% (0.227s)\n" + "ADRIO data usage estimation:\n", + "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", + "- epymorph.adrio.prism.Precipitation will download 1.2 MB and write 1.2 MB to disk\n", + "In total we will:\n", + "- Download 1.2 MB, taking a second (assuming 1.0 MB/s)\n", + "- Write 1.2 MB to disk cache (you have 249.1 GB free space)\n" ] } ], @@ -87,13 +89,23 @@ " \"precipitation\": prism.Precipitation(time_period),\n", " },\n", ")\n", + "\n", + "rume.estimate_data()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ "with sim_messaging():\n", - " precipitation = evaluate_param(rume, \"precipitation\")\n" + " precipitation = evaluate_param(rume, \"precipitation\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -175,17 +187,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", - " |####################| 100% (3.046s)\n", - "Loading epymorph.adrio.prism.Temperature:\n", - " |####################| 100% (22.394s)\n" + "ADRIO data usage estimation:\n", + "- epymorph.adrio.prism.Temperature will download 51.0 MB and write 51.0 MB to disk\n", + "- epymorph.adrio.us_tiger.GeometricCentroid will download 0 Bytes and write 0 Bytes to disk\n", + "In total we will:\n", + "- Download 51.0 MB, taking 51 seconds (assuming 1.0 MB/s)\n", + "- Write 51.0 MB to disk cache (you have 249.1 GB free space)\n" ] } ], @@ -213,13 +227,34 @@ " \"centroid\": us_tiger.GeometricCentroid(),\n", " },\n", ")\n", + "\n", + "rume.estimate_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading epymorph.adrio.us_tiger.GeometricCentroid:\n", + " |####################| 100% (4.787s)\n", + "Loading epymorph.adrio.prism.Temperature:\n", + " |####################| 100% (21.742s)\n" + ] + } + ], + "source": [ "with sim_messaging():\n", " temperature = evaluate_param(rume, \"temperature\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 12, "metadata": {}, "outputs": [ {