Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LODES and PRISM estimate fix. #236

Merged
merged 3 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions epymorph/adrio/lodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,25 @@ def _validate_scope(scope: GeoScope) -> CensusScope:
return scope


def _estimate_lodes(self, scope: CensusScope, job_type: str, year: int) -> DataEstimate:
scope = _validate_scope(self.scope)
class _LodesADRIO(ADRIO[np.int64], ABC):
_override_year: int | None
"""The year for the commuting data.
If None, defaults to the year in which the simulation time frame starts."""

def __init__(self, year: int | None = None):
self._override_year = year

@property
def data_year(self) -> int:
if self._override_year is not None:
return self._override_year
return self.time_frame.start_date.year


def _estimate_lodes(
adrio_instance: _LodesADRIO, scope: CensusScope, job_type: str, year: int
) -> DataEstimate:
scope = _validate_scope(scope)
est_main_size = 0
est_aux_size = 0
urls_aux = []
Expand Down Expand Up @@ -371,7 +388,7 @@ def _estimate_lodes(self, scope: CensusScope, job_type: str, year: int) -> DataE

key = f"lodes:{year}:{job_type}"
return AvailableDataEstimate(
name=self.full_name,
name=adrio_instance.class_name,
cache_key=key,
new_network_bytes=est.missing_cache_size,
new_cache_bytes=est.missing_cache_size,
Expand All @@ -380,21 +397,6 @@ def _estimate_lodes(self, scope: CensusScope, job_type: str, year: int) -> DataE
)


class _LodesADRIO(ADRIO[np.int64], ABC):
_override_year: int | None
"""The year for the commuting data.
If None, defaults to the year in which the simulation time frame starts."""

def __init__(self, year: int | None = None):
self._override_year = year

@property
def data_year(self) -> int:
if self._override_year is not None:
return self._override_year
return self.time_frame.start_date.year


@adrio_cache
class Commuters(_LodesADRIO):
"""
Expand Down
98 changes: 49 additions & 49 deletions epymorph/adrio/prism.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,55 +139,6 @@ def _validate_dates(date_range: TimeFrame) -> TimeFrame:
return date_range


def _estimate_prism(
self, file_size: int, date_range: TimeFrame, attribute: str
) -> DataEstimate:
"""
Calculate estimates for downloading PRISM files.
"""
est_file_size = file_size
total_files = date_range.duration_days

# setup urls as list to check if theyre in the cache

# setup date variables
first_day = date_range.start_date
last_day = date_range.end_date
latest_date = datetype.today() - timedelta(days=1)
six_months_ago = datetype.today() + relativedelta(months=-6)
last_completed_month = six_months_ago.replace(day=1) - timedelta(days=1)
date_list = [
first_day + timedelta(days=x) for x in range((last_day - first_day).days + 1)
]

# get url names to check in cache
urls = [
_generate_file_name(attribute, latest_date, last_completed_month, day)[0]
for day in date_list
]

# sum the files needed to download
missing_files = total_files - sum(
1 for u in urls if check_file_in_cache(_PRISM_CACHE_PATH / Path(u).name)
)

# calculate the cache estimate
est = CacheEstimate(
total_cache_size=total_files * est_file_size,
missing_cache_size=missing_files * est_file_size,
)

key = f"prism:{attribute}:{date_range}"
return AvailableDataEstimate(
name=self.full_name,
cache_key=key,
new_network_bytes=est.missing_cache_size,
new_cache_bytes=est.missing_cache_size,
total_cache_bytes=est.total_cache_size,
max_bandwidth=None,
)


class _PRISMAdrio(ADRIO[np.float64], ABC):
_override_time_frame: TimeFrame | None
"""An override time frame for which to fetch data.
Expand Down Expand Up @@ -298,6 +249,55 @@ def evaluate_adrio(self) -> NDArray[np.float64]:
return raster_vals


def _estimate_prism(
adrio_instance: _PRISMAdrio, file_size: int, date_range: TimeFrame, attribute: str
) -> DataEstimate:
"""
Calculate estimates for downloading PRISM files.
"""
est_file_size = file_size
total_files = date_range.duration_days

# setup urls as list to check if theyre in the cache

# setup date variables
first_day = date_range.start_date
last_day = date_range.end_date
latest_date = datetype.today() - timedelta(days=1)
six_months_ago = datetype.today() + relativedelta(months=-6)
last_completed_month = six_months_ago.replace(day=1) - timedelta(days=1)
date_list = [
first_day + timedelta(days=x) for x in range((last_day - first_day).days + 1)
]

# get url names to check in cache
urls = [
_generate_file_name(attribute, latest_date, last_completed_month, day)[0]
for day in date_list
]

# sum the files needed to download
missing_files = total_files - sum(
1 for u in urls if check_file_in_cache(_PRISM_CACHE_PATH / Path(u).name)
)

# calculate the cache estimate
est = CacheEstimate(
total_cache_size=total_files * est_file_size,
missing_cache_size=missing_files * est_file_size,
)

key = f"prism:{attribute}:{date_range}"
return AvailableDataEstimate(
name=adrio_instance.class_name,
cache_key=key,
new_network_bytes=est.missing_cache_size,
new_cache_bytes=est.missing_cache_size,
total_cache_bytes=est.total_cache_size,
max_bandwidth=None,
)


@adrio_cache
class Precipitation(_PRISMAdrio):
"""
Expand Down