diff --git a/deployment/docker/requirements.txt b/deployment/docker/requirements.txt index 82b3af18..85449226 100644 --- a/deployment/docker/requirements.txt +++ b/deployment/docker/requirements.txt @@ -46,3 +46,5 @@ xarray==2024.6.0 netCDF4==1.7.1.post1 h5netcdf==1.3.0 scipy==1.14.0 +regionmask==0.12.1 +zarr==2.18.2 diff --git a/django_project/gap/admin.py b/django_project/gap/admin.py index 8d531fd7..518e434e 100644 --- a/django_project/gap/admin.py +++ b/django_project/gap/admin.py @@ -8,7 +8,7 @@ from .models import ( Attribute, Country, Provider, Measurement, Station, IngestorSession, - Dataset, DatasetAttribute, NetCDFFile, DatasetType, Unit + Dataset, DatasetAttribute, DataSourceFile, DatasetType, Unit ) @@ -112,11 +112,12 @@ class IngestorSessionAdmin(admin.ModelAdmin): list_filter = ('ingestor_type', 'status') -@admin.register(NetCDFFile) -class NetCDFFileAdmin(admin.ModelAdmin): - """NetCDFFile admin.""" +@admin.register(DataSourceFile) +class DataSourceFileAdmin(admin.ModelAdmin): + """DataSourceFile admin.""" list_display = ( - 'name', 'dataset', 'start_date_time', 'end_date_time', 'created_on' + 'name', 'dataset', 'format', 'start_date_time', + 'end_date_time', 'created_on' ) list_filter = ('dataset',) diff --git a/django_project/gap/factories.py b/django_project/gap/factories.py index 028cba32..1b3de461 100644 --- a/django_project/gap/factories.py +++ b/django_project/gap/factories.py @@ -22,7 +22,7 @@ ObservationType, DatasetTimeStep, DatasetStore, - NetCDFFile + DataSourceFile ) @@ -182,16 +182,17 @@ class Meta: # noqa value = factory.Faker('pyfloat') -class NetCDFFileFactory( - BaseFactory[NetCDFFile], metaclass=BaseMetaFactory[NetCDFFile] +class DataSourceFileFactory( + BaseFactory[DataSourceFile], metaclass=BaseMetaFactory[DataSourceFile] ): - """Factory class for NetCDFFile model.""" + """Factory class for DataSourceFile model.""" class Meta: # noqa - model = NetCDFFile + model = DataSourceFile name = factory.Faker('text') dataset = factory.SubFactory(DatasetFactory) start_date_time = factory.Faker('date_time') end_date_time = factory.Faker('date_time') created_on = factory.Faker('date_time') + format = DatasetStore.NETCDF diff --git a/django_project/gap/migrations/0001_initial.py b/django_project/gap/migrations/0001_initial.py index ffdb557b..fdb7044b 100644 --- a/django_project/gap/migrations/0001_initial.py +++ b/django_project/gap/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.7 on 2024-07-14 21:07 +# Generated by Django 4.2.7 on 2024-07-17 02:56 import django.contrib.gis.db.models.fields from django.db import migrations, models @@ -46,7 +46,8 @@ class Migration(migrations.Migration): ('name', models.CharField(max_length=512)), ('description', models.TextField(blank=True, null=True)), ('time_step', models.CharField(choices=[('DAILY', 'DAILY'), ('HOURLY', 'HOURLY')], max_length=512)), - ('store_type', models.CharField(choices=[('TABLE', 'TABLE'), ('NETCDF', 'NETCDF'), ('EXT_API', 'EXT_API')], max_length=512)), + ('store_type', models.CharField(choices=[('TABLE', 'TABLE'), ('NETCDF', 'NETCDF'), ('ZARR', 'ZARR'), ('EXT_API', 'EXT_API')], max_length=512)), + ('is_internal_use', models.BooleanField(default=False)), ], options={ 'abstract': False, @@ -126,13 +127,14 @@ class Migration(migrations.Migration): }, ), migrations.CreateModel( - name='NetCDFFile', + name='DataSourceFile', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('name', models.CharField(help_text='Filename with its path in the object storage (S3)', max_length=512)), ('start_date_time', models.DateTimeField()), ('end_date_time', models.DateTimeField()), ('created_on', models.DateTimeField()), + ('format', models.CharField(choices=[('NETCDF', 'NETCDF'), ('ZARR', 'ZARR')], max_length=512)), ('dataset', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gap.dataset')), ], ), diff --git a/django_project/gap/models/__init__.py b/django_project/gap/models/__init__.py index 89243bb6..f7ff6ac9 100644 --- a/django_project/gap/models/__init__.py +++ b/django_project/gap/models/__init__.py @@ -10,4 +10,3 @@ from gap.models.dataset import * from gap.models.measurement import * from gap.models.ingestor import * -from gap.models.netcdf import * diff --git a/django_project/gap/models/dataset.py b/django_project/gap/models/dataset.py index 0291acca..ab25f480 100644 --- a/django_project/gap/models/dataset.py +++ b/django_project/gap/models/dataset.py @@ -35,6 +35,7 @@ class DatasetStore: TABLE = 'TABLE' NETCDF = 'NETCDF' + ZARR = 'ZARR' EXT_API = 'EXT_API' @@ -65,7 +66,31 @@ class Dataset(Definition): choices=( (DatasetStore.TABLE, DatasetStore.TABLE), (DatasetStore.NETCDF, DatasetStore.NETCDF), + (DatasetStore.ZARR, DatasetStore.ZARR), (DatasetStore.EXT_API, DatasetStore.EXT_API), ), max_length=512 ) + is_internal_use = models.BooleanField(default=False) + + +class DataSourceFile(models.Model): + """Model representing a datasource file that is stored in S3 Storage.""" + + name = models.CharField( + max_length=512, + help_text="Filename with its path in the object storage (S3)" + ) + dataset = models.ForeignKey( + Dataset, on_delete=models.CASCADE + ) + start_date_time = models.DateTimeField() + end_date_time = models.DateTimeField() + created_on = models.DateTimeField() + format = models.CharField( + choices=( + (DatasetStore.NETCDF, DatasetStore.NETCDF), + (DatasetStore.ZARR, DatasetStore.ZARR), + ), + max_length=512 + ) diff --git a/django_project/gap/providers/cbam.py b/django_project/gap/providers/cbam.py index fc02c544..5a389589 100644 --- a/django_project/gap/providers/cbam.py +++ b/django_project/gap/providers/cbam.py @@ -10,14 +10,20 @@ from django.contrib.gis.geos import Point import numpy as np import xarray as xr +from xarray.core.dataset import Dataset as xrDataset from gap.models import ( Dataset, DatasetAttribute, - NetCDFFile + DataSourceFile +) +from gap.utils.reader import ( + LocationInputType, + DatasetReaderInput, + DatasetTimelineValue, + DatasetReaderValue, + LocationDatasetReaderValue ) - -from gap.utils.reader import DatasetTimelineValue, DatasetReaderValue from gap.utils.netcdf import ( daterange_inc, BaseNetCDFReader @@ -30,21 +36,23 @@ class CBAMNetCDFReader(BaseNetCDFReader): def __init__( self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: + location_input: DatasetReaderInput, start_date: datetime, + end_date: datetime) -> None: """Initialize CBAMNetCDFReader class. :param dataset: Dataset from CBAM provider :type dataset: Dataset :param attributes: List of attributes to be queried :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point + :param location_input: Location to be queried + :type location_input: DatasetReaderInput :param start_date: Start date time filter :type start_date: datetime :param end_date: End date time filter :type end_date: datetime """ - super().__init__(dataset, attributes, point, start_date, end_date) + super().__init__( + dataset, attributes, location_input, start_date, end_date) def read_historical_data(self, start_date: datetime, end_date: datetime): """Read historical data from dataset. @@ -57,7 +65,7 @@ def read_historical_data(self, start_date: datetime, end_date: datetime): self.setup_netcdf_reader() self.xrDatasets = [] for filter_date in daterange_inc(start_date, end_date): - netcdf_file = NetCDFFile.objects.filter( + netcdf_file = DataSourceFile.objects.filter( dataset=self.dataset, start_date_time__gte=filter_date, end_date_time__lte=filter_date @@ -65,25 +73,23 @@ def read_historical_data(self, start_date: datetime, end_date: datetime): if netcdf_file is None: continue ds = self.open_dataset(netcdf_file) - val = self.read_variables(ds) + val = self.read_variables(ds, filter_date, filter_date) + if val is None: + continue self.xrDatasets.append(val) - def get_data_values(self) -> DatasetReaderValue: - """Fetch data values from list of xArray Dataset object. + def _get_data_values_from_single_location( + self, point: Point, val: xrDataset) -> DatasetReaderValue: + """Read data values from xrDataset. - :return: Data Value. + :param point: grid cell from the query + :type point: Point + :param val: dataset to be read + :type val: xrDataset + :return: Data Values :rtype: DatasetReaderValue """ results = [] - metadata = { - 'dataset': [self.dataset.name], - 'start_date': self.start_date.isoformat(), - 'end_date': self.end_date.isoformat() - } - if len(self.xrDatasets) == 0: - return DatasetReaderValue(metadata, results) - val = xr.combine_nested( - self.xrDatasets, concat_dim=[self.date_variable]) for dt_idx, dt in enumerate(val[self.date_variable].values): value_data = {} for attribute in self.attributes: @@ -95,4 +101,63 @@ def get_data_values(self) -> DatasetReaderValue: dt, value_data )) - return DatasetReaderValue(metadata, results) + return DatasetReaderValue(point, results) + + def _get_data_values_from_multiple_locations( + self, val: xrDataset, locations: List[Point], + lat_dim: int, lon_dim: int) -> DatasetReaderValue: + """Read data values from xrDataset from list of locations. + + :param val: dataset to be read + :type val: xrDataset + :param locations: list of location + :type locations: List[Point] + :param lat_dim: latitude dimension + :type lat_dim: int + :param lon_dim: longitude dimension + :type lon_dim: int + :return: Data Values + :rtype: DatasetReaderValue + """ + results = {} + for dt_idx, dt in enumerate(val[self.date_variable].values): + idx_lat_lon = 0 + for idx_lat in range(lat_dim): + for idx_lon in range(lon_dim): + value_data = {} + for attribute in self.attributes: + v = val[attribute.source].values[ + dt_idx, idx_lat, idx_lon] + value_data[attribute.attribute.variable_name] = ( + v if not np.isnan(v) else None + ) + loc = locations[idx_lat_lon] + if loc in results: + results[loc].append(DatasetTimelineValue( + dt, + value_data + )) + else: + results[loc] = [DatasetTimelineValue( + dt, + value_data + )] + idx_lat_lon += 1 + return LocationDatasetReaderValue(results) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from list of xArray Dataset object. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + if len(self.xrDatasets) == 0: + return DatasetReaderValue(None, []) + val = xr.combine_nested( + self.xrDatasets, concat_dim=[self.date_variable]) + locations, lat_dim, lon_dim = self.find_locations(val) + if self.location_input.type != LocationInputType.POINT: + return self._get_data_values_from_multiple_locations( + val, locations, lat_dim, lon_dim + ) + return self._get_data_values_from_single_location(locations[0], val) diff --git a/django_project/gap/providers/salient.py b/django_project/gap/providers/salient.py index ad78e23e..749643fa 100644 --- a/django_project/gap/providers/salient.py +++ b/django_project/gap/providers/salient.py @@ -5,19 +5,29 @@ .. note:: CBAM Data Reader """ +import json from typing import List from datetime import datetime from django.contrib.gis.geos import Point import numpy as np +import regionmask +import xarray as xr from xarray.core.dataset import Dataset as xrDataset +from shapely.geometry import shape from gap.models import ( Dataset, DatasetAttribute, - NetCDFFile + DataSourceFile ) -from gap.utils.reader import DatasetTimelineValue, DatasetReaderValue +from gap.utils.reader import ( + LocationInputType, + DatasetReaderInput, + DatasetTimelineValue, + DatasetReaderValue, + LocationDatasetReaderValue +) from gap.utils.netcdf import ( BaseNetCDFReader ) @@ -30,21 +40,23 @@ class SalientNetCDFReader(BaseNetCDFReader): def __init__( self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: + location_input: DatasetReaderInput, start_date: datetime, + end_date: datetime) -> None: """Initialize CBAMNetCDFReader class. :param dataset: Dataset from Salient provider :type dataset: Dataset :param attributes: List of attributes to be queried :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point + :param location_input: Location to be queried + :type location_input: DatasetReaderInput :param start_date: Start date time filter :type start_date: datetime :param end_date: End date time filter :type end_date: datetime """ - super().__init__(dataset, attributes, point, start_date, end_date) + super().__init__( + dataset, attributes, location_input, start_date, end_date) def read_forecast_data(self, start_date: datetime, end_date: datetime): """Read forecast data from dataset. @@ -56,59 +68,104 @@ def read_forecast_data(self, start_date: datetime, end_date: datetime): """ self.setup_netcdf_reader() self.xrDatasets = [] - netcdf_file = NetCDFFile.objects.filter( + netcdf_file = DataSourceFile.objects.filter( dataset=self.dataset ).order_by('id').last() if netcdf_file is None: return ds = self.open_dataset(netcdf_file) val = self.read_variables(ds, start_date, end_date) + if val is None: + return self.xrDatasets.append(val) - def read_variables( - self, dataset: xrDataset, start_date: datetime = None, - end_date: datetime = None) -> xrDataset: - """Read data from list variable with filter from given Point. + def _read_variables_by_point( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + point = self.location_input.point + return dataset[variables].sel( + lat=point.y, + lon=point.x, method='nearest').where( + (dataset[self.date_variable] >= start_dt) & + (dataset[self.date_variable] <= end_dt), + drop=True + ) - :param dataset: xArray Dataset object - :type dataset: xrDataset - :param start_date: start date for reading forecast data - :type start_date: datetime - :param end_date: end date for reading forecast data - :type end_date: datetime - :return: filtered xArray Dataset object - :rtype: xrDataset - """ - start_dt = np.datetime64(start_date) - end_dt = np.datetime64(end_date) - variables = [a.source for a in self.attributes] - variables.append(self.date_variable) - val = dataset[variables].sel( - lat=self.point.y, lon=self.point.x, - method='nearest' - ).where( + def _read_variables_by_bbox( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + points = self.location_input.points + lat_min = points[0].y + lat_max = points[1].y + lon_min = points[0].x + lon_max = points[1].x + # output results is in two dimensional array + return dataset[variables].where( + (dataset.lat >= lat_min) & (dataset.lat <= lat_max) & + (dataset.lon >= lon_min) & (dataset.lon <= lon_max) & (dataset[self.date_variable] >= start_dt) & - (dataset[self.date_variable] <= end_dt), - drop=True + (dataset[self.date_variable] <= end_dt), drop=True) + + def _read_variables_by_polygon( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + # Convert the Django GIS Polygon to a format compatible with shapely + shapely_multipolygon = shape( + json.loads(self.location_input.polygon.geojson)) + + # Create a mask using regionmask from the shapely polygon + mask = regionmask.Regions([shapely_multipolygon]).mask(dataset) + # Mask the dataset + return dataset[variables].where( + (mask == 0) & + (dataset[self.date_variable] >= start_dt) & + (dataset[self.date_variable] <= end_dt), drop=True) + + def _read_variables_by_points( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + var_dims = dataset[variables[0]].dims + # use the first variable to get its dimension + if 'ensemble' in var_dims: + # use 0 idx ensemble and 0 idx forecast_day + mask = np.zeros_like(dataset[variables[0]][0][0], dtype=bool) + else: + # use the 0 index for it's date variable + mask = np.zeros_like(dataset[variables[0]][0], dtype=bool) + # Iterate through the points and update the mask + for lon, lat in self.location_input.points: + # Find nearest lat and lon indices + lat_idx = np.abs(dataset['lat'] - lat).argmin() + lon_idx = np.abs(dataset['lon'] - lon).argmin() + mask[lat_idx, lon_idx] = True + mask_da = xr.DataArray( + mask, + coords={ + 'lat': dataset['lat'], 'lon': dataset['lon'] + }, dims=['lat', 'lon'] ) - return val + # Apply the mask to the dataset + return dataset[variables].where( + (mask_da) & + (dataset[self.date_variable] >= start_dt) & + (dataset[self.date_variable] <= end_dt), drop=True) - def get_data_values(self) -> DatasetReaderValue: - """Fetch data values from list of xArray Dataset object. + def _get_data_values_from_single_location( + self, location: Point, val: xrDataset) -> DatasetReaderValue: + """Read data values from xrDataset. - :return: Data Value. + :param location: grid cell from query + :type location: Point + :param val: dataset to be read + :type val: xrDataset + :return: Data Values :rtype: DatasetReaderValue """ results = [] - metadata = { - 'dataset': [self.dataset.name], - 'start_date': self.start_date.isoformat(), - 'end_date': self.end_date.isoformat() - } - if len(self.xrDatasets) == 0: - return DatasetReaderValue(metadata, results) - # forecast will always use latest dataset - val = self.xrDatasets[0] for dt_idx, dt in enumerate(val[self.date_variable].values): value_data = {} for attribute in self.attributes: @@ -125,4 +182,69 @@ def get_data_values(self) -> DatasetReaderValue: dt, value_data )) - return DatasetReaderValue(metadata, results) + return DatasetReaderValue(location, results) + + def _get_data_values_from_multiple_locations( + self, val: xrDataset, locations: List[Point], + lat_dim: int, lon_dim: int) -> DatasetReaderValue: + """Read data values from xrDataset from list of locations. + + :param val: dataset to be read + :type val: xrDataset + :param locations: list of location + :type locations: List[Point] + :param lat_dim: latitude dimension + :type lat_dim: int + :param lon_dim: longitude dimension + :type lon_dim: int + :return: Data Values + :rtype: DatasetReaderValue + """ + results = {} + for dt_idx, dt in enumerate(val[self.date_variable].values): + idx_lat_lon = 0 + for idx_lat in range(lat_dim): + for idx_lon in range(lon_dim): + value_data = {} + for attribute in self.attributes: + if 'ensemble' in val[attribute.source].dims: + value_data[attribute.attribute.variable_name] = ( + val[attribute.source].values[ + :, dt_idx, idx_lat, idx_lon] + ) + else: + v = val[attribute.source].values[ + dt_idx, idx_lat, idx_lon] + value_data[attribute.attribute.variable_name] = ( + v if not np.isnan(v) else None + ) + loc = locations[idx_lat_lon] + if loc in results: + results[loc].append(DatasetTimelineValue( + dt, + value_data + )) + else: + results[loc] = [DatasetTimelineValue( + dt, + value_data + )] + idx_lat_lon += 1 + return LocationDatasetReaderValue(results) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from list of xArray Dataset object. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + if len(self.xrDatasets) == 0: + return DatasetReaderValue(None, []) + # forecast will always use latest dataset + val = self.xrDatasets[0] + locations, lat_dim, lon_dim = self.find_locations(val) + if self.location_input.type != LocationInputType.POINT: + return self._get_data_values_from_multiple_locations( + val, locations, lat_dim, lon_dim + ) + return self._get_data_values_from_single_location(locations[0], val) diff --git a/django_project/gap/providers/tahmo.py b/django_project/gap/providers/tahmo.py index f7a019ae..95ea226e 100644 --- a/django_project/gap/providers/tahmo.py +++ b/django_project/gap/providers/tahmo.py @@ -7,7 +7,7 @@ from typing import List from datetime import datetime -from django.contrib.gis.geos import Point +from django.contrib.gis.geos import Polygon, Point from django.contrib.gis.db.models.functions import Distance from gap.models import ( @@ -17,9 +17,12 @@ Measurement ) from gap.utils.reader import ( + LocationInputType, + DatasetReaderInput, DatasetTimelineValue, DatasetReaderValue, - BaseDatasetReader + BaseDatasetReader, + LocationDatasetReaderValue ) @@ -28,22 +31,68 @@ class TahmoDatasetReader(BaseDatasetReader): def __init__( self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: + location_input: DatasetReaderInput, start_date: datetime, + end_date: datetime) -> None: """Initialize TahmoDatasetReader class. :param dataset: Dataset from Tahmo provider :type dataset: Dataset :param attributes: List of attributes to be queried :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point + :param location_input: Location to be queried + :type location_input: DatasetReaderInput :param start_date: Start date time filter :type start_date: datetime :param end_date: End date time filter :type end_date: datetime """ - super().__init__(dataset, attributes, point, start_date, end_date) - self.results = [] + super().__init__( + dataset, attributes, location_input, start_date, end_date) + self.results = {} + + def _find_nearest_station_by_point(self, point: Point = None): + p = point + if p is None: + p = self.location_input.point + qs = Station.objects.annotate( + distance=Distance('geometry', p) + ).filter( + provider=self.dataset.provider + ).order_by('distance').first() + if qs is None: + return None + return [qs] + + def _find_nearest_station_by_bbox(self): + points = self.location_input.points + polygon = Polygon.from_bbox( + (points[0].x, points[0].y, points[1].x, points[1].y)) + qs = Station.objects.filter( + geometry__within=polygon + ).order_by('id') + if not qs.exists(): + return None + return qs + + def _find_nearest_station_by_polygon(self): + qs = Station.objects.filter( + geometry__within=self.location_input.polygon + ).order_by('id') + if not qs.exists(): + return None + return qs + + def _find_nearest_station_by_points(self): + points = self.location_input.points + results = {} + for point in points: + rs = self._find_nearest_station_by_point(point) + if rs is None: + continue + if rs[0].id in results: + continue + results[rs[0].id] = rs[0] + return results.values() def read_historical_data(self, start_date: datetime, end_date: datetime): """Read historical data from dataset. @@ -53,36 +102,57 @@ def read_historical_data(self, start_date: datetime, end_date: datetime): :param end_date: end date for reading historical data :type end_date: datetime """ - nearest_station = Station.objects.annotate( - distance=Distance('geometry', self.point) - ).filter( - provider=self.dataset.provider - ).order_by('distance').first() - if nearest_station is None: + nearest_stations = None + if self.location_input.type == LocationInputType.POINT: + nearest_stations = self._find_nearest_station_by_point() + elif self.location_input.type == LocationInputType.POLYGON: + nearest_stations = self._find_nearest_station_by_polygon() + elif self.location_input.type == LocationInputType.LIST_OF_POINT: + nearest_stations = self._find_nearest_station_by_points() + elif self.location_input.type == LocationInputType.BBOX: + nearest_stations = self._find_nearest_station_by_bbox() + if nearest_stations is None: return measurements = Measurement.objects.select_related( - 'dataset_attribute', 'dataset_attribute__attribute' + 'dataset_attribute', 'dataset_attribute__attribute', + 'station' ).filter( date_time__gte=start_date, date_time__lte=end_date, dataset_attribute__in=self.attributes, - station=nearest_station - ).order_by('date_time') + station__in=nearest_stations + ).order_by('station', 'date_time', 'dataset_attribute') + # final result, group by point + self.results = {} + curr_point = None curr_dt = None + station_results = [] + # group by date_time measurement_dict = {} for measurement in measurements: - if curr_dt is None: + if curr_point is None: + curr_point = measurement.station.geometry curr_dt = measurement.date_time - elif curr_dt != measurement.date_time: - self.results.append( + elif curr_point != measurement.station.geometry: + station_results.append( DatasetTimelineValue(curr_dt, measurement_dict)) curr_dt = measurement.date_time measurement_dict = {} + self.results[curr_point] = station_results + curr_point = measurement.station.geometry + station_results = [] + else: + if curr_dt != measurement.date_time: + station_results.append( + DatasetTimelineValue(curr_dt, measurement_dict)) + curr_dt = measurement.date_time + measurement_dict = {} measurement_dict[ measurement.dataset_attribute.attribute.variable_name ] = measurement.value - self.results.append( + station_results.append( DatasetTimelineValue(curr_dt, measurement_dict)) + self.results[curr_point] = station_results def get_data_values(self) -> DatasetReaderValue: """Fetch results. @@ -90,9 +160,7 @@ def get_data_values(self) -> DatasetReaderValue: :return: Data Value. :rtype: DatasetReaderValue """ - metadata = { - 'dataset': [self.dataset.name], - 'start_date': self.start_date.isoformat(), - 'end_date': self.end_date.isoformat() - } - return DatasetReaderValue(metadata, self.results) + if len(self.results.keys()) == 1: + key = list(self.results.keys())[0] + return DatasetReaderValue(key, self.results[key]) + return LocationDatasetReaderValue(self.results) diff --git a/django_project/gap/tasks/netcdf_sync.py b/django_project/gap/tasks/netcdf_sync.py index 4f84dcd7..bde762df 100644 --- a/django_project/gap/tasks/netcdf_sync.py +++ b/django_project/gap/tasks/netcdf_sync.py @@ -17,7 +17,7 @@ from gap.models import ( Attribute, Provider, - NetCDFFile, + DataSourceFile, Dataset, DatasetAttribute, DatasetStore, @@ -134,7 +134,12 @@ def sync_by_dataset(dataset: Dataset): file_path = filename if file_path.startswith('/'): file_path = file_path[1:] - if NetCDFFile.objects.filter(name=file_path).exists(): + check_exist = DataSourceFile.objects.filter( + name=file_path, + dataset=dataset, + format=DatasetStore.NETCDF + ).exists() + if check_exist: continue netcdf_filename = os.path.split(file_path)[1] file_date = datetime.strptime( @@ -143,12 +148,13 @@ def sync_by_dataset(dataset: Dataset): file_date.year, file_date.month, file_date.day, 0, 0, 0, tzinfo=pytz.UTC ) - NetCDFFile.objects.create( + DataSourceFile.objects.create( name=file_path, dataset=dataset, start_date_time=start_datetime, end_date_time=start_datetime, - created_on=timezone.now() + created_on=timezone.now(), + format=DatasetStore.NETCDF ) count += 1 if count > 0: diff --git a/django_project/gap/tests/providers/test_cbam.py b/django_project/gap/tests/providers/test_cbam.py index 868bec2d..7ff36446 100644 --- a/django_project/gap/tests/providers/test_cbam.py +++ b/django_project/gap/tests/providers/test_cbam.py @@ -12,6 +12,7 @@ from unittest.mock import Mock, patch from core.settings.utils import absolute_path +from gap.utils.reader import DatasetReaderInput from gap.utils.netcdf import ( NetCDFProvider, ) @@ -23,16 +24,29 @@ DatasetFactory, DatasetAttributeFactory, AttributeFactory, - NetCDFFileFactory + DataSourceFileFactory ) class TestCBAMNetCDFReader(TestCase): """Unit test for CBAM NetCDFReader class.""" + def setUp(self) -> None: + """Set test class.""" + self.dataset = DatasetFactory.create( + provider=ProviderFactory(name=NetCDFProvider.CBAM)) + self.attribute = AttributeFactory.create( + name='Max Total Temperature', + variable_name='max_total_temperature') + self.dataset_attr = DatasetAttributeFactory.create( + dataset=self.dataset, + attribute=self.attribute, + source='max_total_temperature' + ) + @patch('gap.utils.netcdf.daterange_inc', return_value=[datetime(2023, 1, 1)]) - @patch('gap.models.NetCDFFile.objects.filter') + @patch('gap.models.DataSourceFile.objects.filter') def test_read_historical_data_empty( self, mock_filter, mock_daterange_inc): """Test for reading historical data that returns empty.""" @@ -49,20 +63,10 @@ def test_read_historical_data_empty( def test_read_historical_data(self): """Test for reading historical data from CBAM sample.""" - dataset = DatasetFactory.create( - provider=ProviderFactory(name=NetCDFProvider.CBAM)) - attribute = AttributeFactory.create( - name='Max Total Temperature', - variable_name='max_total_temperature') - dataset_attr = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute, - source='max_total_temperature' - ) dt = datetime(2019, 11, 1, 0, 0, 0) p = Point(x=26.97, y=-12.56) - NetCDFFileFactory.create( - dataset=dataset, + DataSourceFileFactory.create( + dataset=self.dataset, start_date_time=dt, end_date_time=dt ) @@ -73,7 +77,10 @@ def test_read_historical_data(self): mock_open.return_value = ( xr.open_dataset(file_path) ) - reader = CBAMNetCDFReader(dataset, [dataset_attr], p, dt, dt) + reader = CBAMNetCDFReader( + self.dataset, [self.dataset_attr], + DatasetReaderInput.from_point(p), + dt, dt) reader.read_historical_data(dt, dt) mock_open.assert_called_once() self.assertEqual(len(reader.xrDatasets), 1) @@ -82,3 +89,44 @@ def test_read_historical_data(self): self.assertEqual( data_value.results[0].values['max_total_temperature'], 33.371735) + + def test_get_data_values_from_multiple_locations(self): + """Test get data values from several locations.""" + dt = datetime(2019, 11, 1, 0, 0, 0) + p = Point(x=26.97, y=-12.56) + attr1 = DatasetAttributeFactory.create( + dataset=self.dataset, + source='var1' + ) + attr2 = DatasetAttributeFactory.create( + dataset=self.dataset, + source='var2' + ) + reader = CBAMNetCDFReader( + self.dataset, [attr1, attr2], + DatasetReaderInput.from_point(p), + dt, dt) + date_variable = 'time' + date_values = [datetime(2020, 1, 1), datetime(2020, 1, 2)] + data_var1 = [[[10, 20], [30, 40]], [[50, 60], [70, 80]]] + data_var2 = [[[90, 100], [110, 120]], [[130, 140], [150, 160]]] + + val = xr.Dataset( + { + 'var1': (['time', 'lat', 'lon'], data_var1), + 'var2': (['time', 'lat', 'lon'], data_var2) + }, + coords={'time': date_values, 'lat': [0, 1], 'lon': [0, 1]} + ) + + locations = [Point(0, 0), Point(1, 1), Point(0, 1), Point(1, 0)] + + reader.date_variable = date_variable + result = reader._get_data_values_from_multiple_locations( + val, locations, 2, 2) + self.assertIn(locations[0], result.results) + self.assertIn(locations[1], result.results) + self.assertIn(locations[2], result.results) + self.assertIn(locations[3], result.results) + data = result.results[locations[3]] + self.assertEqual(len(data), 2) diff --git a/django_project/gap/tests/providers/test_salient.py b/django_project/gap/tests/providers/test_salient.py index e3204463..e56ceb2a 100644 --- a/django_project/gap/tests/providers/test_salient.py +++ b/django_project/gap/tests/providers/test_salient.py @@ -8,10 +8,16 @@ from django.test import TestCase from datetime import datetime import xarray as xr -from django.contrib.gis.geos import Point +from django.contrib.gis.geos import Point, MultiPoint from unittest.mock import Mock, patch from core.settings.utils import absolute_path +from gap.models import DatasetAttribute +from gap.utils.reader import ( + DatasetReaderInput, + LocationDatasetReaderValue, + LocationInputType +) from gap.utils.netcdf import ( NetCDFProvider, ) @@ -23,14 +29,46 @@ DatasetFactory, DatasetAttributeFactory, AttributeFactory, - NetCDFFileFactory + DataSourceFileFactory ) class TestSalientNetCDFReader(TestCase): """Unit test for Salient NetCDFReader class.""" - @patch('gap.models.NetCDFFile.objects.filter') + def setUp(self): + """Set Test class for SalientNetCDFReader.""" + self.dataset = DatasetFactory.create( + provider=ProviderFactory(name=NetCDFProvider.SALIENT)) + self.attribute1 = AttributeFactory.create( + name='Temperature Climatology', + variable_name='temp_clim') + self.dataset_attr1 = DatasetAttributeFactory.create( + dataset=self.dataset, + attribute=self.attribute1, + source='temp_clim' + ) + self.attribute2 = AttributeFactory.create( + name='Precipitation Anomaly', + variable_name='precip_anom') + self.dataset_attr2 = DatasetAttributeFactory.create( + dataset=self.dataset, + attribute=self.attribute2, + source='precip_anom' + ) + self.attributes = [DatasetAttribute(source='var1'), + DatasetAttribute(source='var2')] + self.location_input = DatasetReaderInput.from_point( + Point(1.0, 2.0) + ) + self.start_date = datetime(2020, 1, 1) + self.end_date = datetime(2020, 1, 31) + self.reader = SalientNetCDFReader( + self.dataset, self.attributes, self.location_input, + self.start_date, self.end_date + ) + + @patch('gap.models.DataSourceFile.objects.filter') @patch('xarray.open_dataset') def test_read_forecast_data_empty(self, mock_open_dataset, mock_filter): """Test for reading forecast data.""" @@ -49,30 +87,12 @@ def test_read_forecast_data_empty(self, mock_open_dataset, mock_filter): def test_read_forecast_data(self): """Test for reading forecast data from Salient sample.""" - dataset = DatasetFactory.create( - provider=ProviderFactory(name=NetCDFProvider.SALIENT)) - attribute1 = AttributeFactory.create( - name='Temperature Climatology', - variable_name='temp_clim') - dataset_attr1 = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute1, - source='temp_clim' - ) - attribute2 = AttributeFactory.create( - name='Precipitation Anomaly', - variable_name='precip_anom') - dataset_attr2 = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute2, - source='precip_anom' - ) dt = datetime(2024, 3, 14, 0, 0, 0) dt1 = datetime(2024, 3, 15, 0, 0, 0) dt2 = datetime(2024, 3, 17, 0, 0, 0) p = Point(x=29.12, y=-2.625) - NetCDFFileFactory.create( - dataset=dataset, + DataSourceFileFactory.create( + dataset=self.dataset, start_date_time=dt, end_date_time=dt ) @@ -84,7 +104,8 @@ def test_read_forecast_data(self): xr.open_dataset(file_path) ) reader = SalientNetCDFReader( - dataset, [dataset_attr1, dataset_attr2], p, dt1, dt2) + self.dataset, [self.dataset_attr1, self.dataset_attr2], + DatasetReaderInput.from_point(p), dt1, dt2) reader.read_forecast_data(dt1, dt2) self.assertEqual(len(reader.xrDatasets), 1) data_value = reader.get_data_values() @@ -94,3 +115,73 @@ def test_read_forecast_data(self): data_value.results[0].values['temp_clim'], 19.461235) self.assertEqual( len(data_value.results[0].values['precip_anom']), 50) + + def test_read_from_bbox(self): + """Test for reading forecast data using bbox.""" + dt = datetime(2024, 3, 14, 0, 0, 0) + dt1 = datetime(2024, 3, 15, 0, 0, 0) + dt2 = datetime(2024, 3, 17, 0, 0, 0) + p = Point(x=29.125, y=-2.625) + DataSourceFileFactory.create( + dataset=self.dataset, + start_date_time=dt, + end_date_time=dt + ) + file_path = absolute_path( + 'gap', 'tests', 'netcdf', 'salient.nc' + ) + with patch.object(SalientNetCDFReader, 'open_dataset') as mock_open: + mock_open.return_value = ( + xr.open_dataset(file_path) + ) + reader = SalientNetCDFReader( + self.dataset, [self.dataset_attr1, self.dataset_attr2], + DatasetReaderInput.from_bbox( + [p.x, p.y, p.x + 0.5, p.y + 0.5]), + dt1, dt2 + ) + reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(reader.xrDatasets), 1) + data_value = reader.get_data_values() + mock_open.assert_called_once() + self.assertTrue(isinstance(data_value, LocationDatasetReaderValue)) + self.assertEqual(len(data_value.results), 2) + self.assertIn(p, data_value.results) + val = data_value.results[p] + self.assertEqual(len(val), 3) + self.assertEqual( + val[0].values['temp_clim'], 19.461235) + self.assertEqual( + len(val[0].values['precip_anom']), 50) + + def test_read_from_points(self): + """Test for reading forecast data using points.""" + dt = datetime(2024, 3, 14, 0, 0, 0) + dt1 = datetime(2024, 3, 15, 0, 0, 0) + dt2 = datetime(2024, 3, 17, 0, 0, 0) + p = Point(x=29.125, y=-2.625) + DataSourceFileFactory.create( + dataset=self.dataset, + start_date_time=dt, + end_date_time=dt + ) + file_path = absolute_path( + 'gap', 'tests', 'netcdf', 'salient.nc' + ) + with patch.object(SalientNetCDFReader, 'open_dataset') as mock_open: + mock_open.return_value = ( + xr.open_dataset(file_path) + ) + location_input = DatasetReaderInput( + MultiPoint([p, Point(x=p.x + 0.5, y=p.y + 0.5)]), + LocationInputType.LIST_OF_POINT) + reader = SalientNetCDFReader( + self.dataset, [self.dataset_attr1, self.dataset_attr2], + location_input, dt1, dt2) + reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(reader.xrDatasets), 1) + data_value = reader.get_data_values() + mock_open.assert_called_once() + self.assertTrue(isinstance(data_value, LocationDatasetReaderValue)) + self.assertEqual(len(data_value.results), 2) + self.assertIn(p, data_value.results) diff --git a/django_project/gap/tests/providers/test_tahmo.py b/django_project/gap/tests/providers/test_tahmo.py index 3e4cca3e..1b9f60c4 100644 --- a/django_project/gap/tests/providers/test_tahmo.py +++ b/django_project/gap/tests/providers/test_tahmo.py @@ -7,7 +7,9 @@ from django.test import TestCase from datetime import datetime -from django.contrib.gis.geos import Point +from django.contrib.gis.geos import ( + Point, MultiPoint, MultiPolygon, Polygon +) from gap.providers import ( TahmoDatasetReader @@ -20,40 +22,126 @@ StationFactory, MeasurementFactory ) +from gap.utils.reader import ( + DatasetReaderInput, + LocationInputType, + LocationDatasetReaderValue +) class TestTahmoReader(TestCase): """Unit test for Tahmo NetCDFReader class.""" - def test_read_historical_data(self): - """Test for reading historical data from Tahmo.""" - dataset = DatasetFactory.create( + def setUp(self): + """Set test for TahmoReader.""" + self.dataset = DatasetFactory.create( provider=ProviderFactory(name='Tahmo')) - attribute = AttributeFactory.create( + self.attribute = AttributeFactory.create( name='Surface air temperature', variable_name='surface_air_temperature') - dataset_attr = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute, + self.dataset_attr = DatasetAttributeFactory.create( + dataset=self.dataset, + attribute=self.attribute, source='surface_air_temperature' ) - dt = datetime(2019, 11, 1, 0, 0, 0) p = Point(x=26.97, y=-12.56, srid=4326) - station = StationFactory.create( + self.station = StationFactory.create( geometry=p, - provider=dataset.provider + provider=self.dataset.provider + ) + self.location_input = DatasetReaderInput.from_point(p) + self.start_date = datetime(2020, 1, 1) + self.end_date = datetime(2020, 1, 31) + self.reader = TahmoDatasetReader( + self.dataset, [self.dataset_attr], self.location_input, + self.start_date, self.end_date ) + + def test_find_nearest_station_by_point(self): + """Test find nearest station from single point.""" + result = self.reader._find_nearest_station_by_point() + self.assertEqual(result, [self.station]) + + def test_find_nearest_station_by_bbox(self): + """Test find nearest station from bbox.""" + self.reader.location_input = DatasetReaderInput.from_bbox( + [-180, -90, 180, 90]) + result = self.reader._find_nearest_station_by_bbox() + self.assertEqual(list(result), [self.station]) + + def test_find_nearest_station_by_polygon(self): + """Test find nearest station from polygon.""" + self.reader.location_input.type = LocationInputType.POLYGON + self.reader.location_input.geom_collection = MultiPolygon( + Polygon.from_bbox((-180, -90, 180, 90))) + result = self.reader._find_nearest_station_by_polygon() + self.assertEqual(list(result), [self.station]) + + def test_find_nearest_station_by_points(self): + """Test find nearest station from list of point.""" + self.reader.location_input.type = LocationInputType.LIST_OF_POINT + self.reader.location_input.geom_collection = MultiPoint( + [Point(0, 0), self.station.geometry]) + result = self.reader._find_nearest_station_by_points() + self.assertEqual(list(result), [self.station]) + + def test_read_historical_data(self): + """Test for reading historical data from Tahmo.""" + dt = datetime(2019, 11, 1, 0, 0, 0) MeasurementFactory.create( - station=station, - dataset_attribute=dataset_attr, + station=self.station, + dataset_attribute=self.dataset_attr, date_time=dt, value=100 ) reader = TahmoDatasetReader( - dataset, [dataset_attr], p, dt, dt) + self.dataset, [self.dataset_attr], DatasetReaderInput.from_point( + self.station.geometry + ), dt, dt) reader.read_historical_data(dt, dt) data_value = reader.get_data_values() self.assertEqual(len(data_value.results), 1) self.assertEqual( data_value.results[0].values['surface_air_temperature'], 100) + + def test_read_historical_data_multiple_locations(self): + """Test for reading historical data from multiple locations.""" + dt1 = datetime(2019, 11, 1, 0, 0, 0) + dt2 = datetime(2019, 11, 2, 0, 0, 0) + MeasurementFactory.create( + station=self.station, + dataset_attribute=self.dataset_attr, + date_time=dt1, + value=100 + ) + MeasurementFactory.create( + station=self.station, + dataset_attribute=self.dataset_attr, + date_time=dt2, + value=200 + ) + p = Point(x=28.97, y=-10.56, srid=4326) + station2 = StationFactory.create( + geometry=p, + provider=self.dataset.provider + ) + MeasurementFactory.create( + station=station2, + dataset_attribute=self.dataset_attr, + date_time=dt1, + value=300 + ) + location_input = DatasetReaderInput( + MultiPoint([self.station.geometry, p]), + LocationInputType.LIST_OF_POINT) + reader = TahmoDatasetReader( + self.dataset, [self.dataset_attr], location_input, dt1, dt2) + reader.read_historical_data(dt1, dt2) + data_value = reader.get_data_values() + self.assertTrue(isinstance(data_value, LocationDatasetReaderValue)) + self.assertEqual(len(data_value.results), 2) + self.assertIn(p, data_value.results) + self.assertIn(self.station.geometry, data_value.results) + results = data_value.results[self.station.geometry] + self.assertEqual(len(results), 2) diff --git a/django_project/gap/tests/test_task_netcdf_sync.py b/django_project/gap/tests/test_task_netcdf_sync.py index 622a072a..a2cef254 100644 --- a/django_project/gap/tests/test_task_netcdf_sync.py +++ b/django_project/gap/tests/test_task_netcdf_sync.py @@ -14,7 +14,7 @@ Unit, Attribute, DatasetAttribute, - NetCDFFile, + DataSourceFile, CastType ) from gap.utils.netcdf import ( @@ -29,7 +29,7 @@ from gap.factories import ( ProviderFactory, DatasetFactory, - NetCDFFileFactory + DataSourceFileFactory ) @@ -108,30 +108,30 @@ def test_sync_by_dataset( provider = ProviderFactory.create(name=NetCDFProvider.CBAM) dataset = DatasetFactory.create(provider=provider) # add existing NetCDF File - NetCDFFileFactory.create( + DataSourceFileFactory.create( dataset=dataset, name='2023-01-02.nc' ) sync_by_dataset(dataset) mock_fs.walk.assert_called_with('s3://test_bucket/cbam') self.assertEqual( - NetCDFFile.objects.filter( + DataSourceFile.objects.filter( dataset=dataset, name='2023-01-02.nc' ).count(), 1 ) self.assertFalse( - NetCDFFile.objects.filter( + DataSourceFile.objects.filter( dataset=dataset, name='dmrpp/2023-01-01.nc.dmrpp' ).exists() ) self.assertTrue( - NetCDFFile.objects.filter( + DataSourceFile.objects.filter( dataset=dataset, name='2023-01-01.nc' ).exists() ) self.assertTrue( - NetCDFFile.objects.filter( + DataSourceFile.objects.filter( dataset=dataset, name='2023/2023-02-01.nc' ).exists() ) diff --git a/django_project/gap/tests/test_utils_netcdf.py b/django_project/gap/tests/test_utils_netcdf.py index d962bb65..6d407980 100644 --- a/django_project/gap/tests/test_utils_netcdf.py +++ b/django_project/gap/tests/test_utils_netcdf.py @@ -5,15 +5,25 @@ .. note:: Unit tests for NetCDF Utilities. """ +import os +import json from django.test import TestCase from datetime import datetime import numpy as np -from django.contrib.gis.geos import Point +import xarray as xr +from django.contrib.gis.geos import ( + Point, GeometryCollection, Polygon, MultiPolygon, + MultiPoint +) from unittest.mock import Mock, MagicMock, patch +from gap.models import Provider, Dataset, DatasetAttribute from gap.utils.reader import ( DatasetTimelineValue, - DatasetReaderValue + DatasetReaderValue, + LocationDatasetReaderValue, + DatasetReaderInput, + LocationInputType ) from gap.utils.netcdf import ( NetCDFProvider, @@ -36,11 +46,57 @@ class TestNetCDFProvider(TestCase): """Unit test for NetCDFProvider class.""" + def setUp(self): + """Set test for NetCDFProvider class.""" + self.provider = Provider(name='CBAM') + self.env_vars = { + 'CBAM_AWS_ACCESS_KEY_ID': 'test_key', + 'CBAM_AWS_SECRET_ACCESS_KEY': 'test_secret', + 'CBAM_AWS_ENDPOINT_URL': 'https://test.endpoint', + 'CBAM_AWS_BUCKET_NAME': 'test_bucket', + 'CBAM_AWS_DIR_PREFIX': 'test_prefix', + 'CBAM_AWS_REGION_NAME': 'test_region' + } + def test_constants(self): """Test for correct constants.""" self.assertEqual(NetCDFProvider.CBAM, 'CBAM') self.assertEqual(NetCDFProvider.SALIENT, 'Salient') + @patch.dict(os.environ, { + 'CBAM_AWS_ACCESS_KEY_ID': 'test_key', + 'CBAM_AWS_SECRET_ACCESS_KEY': 'test_secret', + 'CBAM_AWS_ENDPOINT_URL': 'https://test.endpoint', + 'CBAM_AWS_BUCKET_NAME': 'test_bucket', + 'CBAM_AWS_DIR_PREFIX': 'test_prefix', + 'CBAM_AWS_REGION_NAME': 'test_region' + }) + def test_get_s3_variables(self): + """Test get_s3_variables method.""" + expected = { + 'AWS_ACCESS_KEY_ID': 'test_key', + 'AWS_SECRET_ACCESS_KEY': 'test_secret', + 'AWS_ENDPOINT_URL': 'https://test.endpoint', + 'AWS_BUCKET_NAME': 'test_bucket', + 'AWS_DIR_PREFIX': 'test_prefix', + 'AWS_REGION_NAME': 'test_region' + } + self.assertEqual( + NetCDFProvider.get_s3_variables(self.provider), expected) + + @patch.dict(os.environ, { + 'CBAM_AWS_ENDPOINT_URL': 'https://test.endpoint', + 'CBAM_AWS_REGION_NAME': 'test_region' + }) + def test_get_s3_client_kwargs(self): + """Test for get_s3_client_kwargs.""" + expected = { + 'endpoint_url': 'https://test.endpoint', + 'region_name': 'test_region' + } + self.assertEqual( + NetCDFProvider.get_s3_client_kwargs(self.provider), expected) + class TestDaterangeInc(TestCase): """Unit test for daterange_inc function.""" @@ -61,39 +117,164 @@ def test_daterange(self): class TestDatasetTimelineValue(TestCase): """Unit test for class DatasetTimelineValue.""" - def test_to_dict(self): - """Test convert to dict.""" - dt = np.datetime64('2023-01-01T00:00:00') - values = {'temperature': 20.5} + def test_to_dict_with_datetime(self): + """Test to_dict with python datetime object.""" + dt = datetime(2023, 7, 16, 12, 0, 0) + values = {"temperature": 25} + dtv = DatasetTimelineValue(dt, values) + expected = { + 'datetime': '2023-07-16T12:00:00', + 'values': values + } + self.assertEqual(dtv.to_dict(), expected) + + def test_to_dict_with_np_datetime64(self): + """Test to_dict with numpy datetime64 object.""" + dt = np.datetime64('2023-07-16T12:00:00') + values = {"temperature": 25} dtv = DatasetTimelineValue(dt, values) - expected_dict = { - 'datetime': np.str_('2023-01-01T00:00:00Z'), + expected = { + 'datetime': np.datetime_as_string(dt, unit='s', timezone='UTC'), 'values': values } - self.assertEqual(dtv.to_dict(), expected_dict) + self.assertEqual(dtv.to_dict(), expected) + + def test_to_dict_with_none_datetime(self): + """Test to_dict with empty datetime.""" + dtv = DatasetTimelineValue(None, {"temperature": 25}) + expected = { + 'datetime': '', + 'values': {"temperature": 25} + } + self.assertEqual(dtv.to_dict(), expected) class TestDatasetReaderValue(TestCase): """Unit test for class DatasetReaderValue.""" + def test_to_dict_with_location(self): + """Test to_dict with location.""" + location = Point(1, 1) + dtv = DatasetTimelineValue( + datetime(2023, 7, 16, 12, 0, 0), {"temperature": 25}) + drv = DatasetReaderValue(location, [dtv]) + expected = { + 'geometry': json.loads(location.json), + 'data': [dtv.to_dict()] + } + self.assertEqual(drv.to_dict(), expected) + + def test_to_dict_with_none_location(self): + """Test to_dict with empty location.""" + drv = DatasetReaderValue(None, []) + expected = {} + self.assertEqual(drv.to_dict(), expected) + + +class TestLocationDatasetReaderValue(TestCase): + """Unit test for LocationDatasetReaderValue class.""" + def test_to_dict(self): - """Test convert to dict.""" - metadata = {'source': 'test'} + """Test to_dict method returrning dictionary.""" + location1 = Point(1, 1) + location2 = Point(2, 2) dtv1 = DatasetTimelineValue( - np.datetime64('2023-01-01T00:00:00'), {'temp': 20.5}) + datetime(2023, 7, 16, 12, 0, 0), {"temperature": 25}) dtv2 = DatasetTimelineValue( - np.datetime64('2023-01-02T00:00:00'), {'temp': 21.0}) - drv = DatasetReaderValue(metadata, [dtv1, dtv2]) - expected_dict = { - 'metadata': metadata, - 'data': [dtv1.to_dict(), dtv2.to_dict()] - } - self.assertEqual(drv.to_dict(), expected_dict) + datetime(2023, 7, 16, 13, 0, 0), {"temperature": 26}) + results = {location1: [dtv1], location2: [dtv2]} + ldrv = LocationDatasetReaderValue(results) + expected = [ + DatasetReaderValue(location1, [dtv1]).to_dict(), + DatasetReaderValue(location2, [dtv2]).to_dict() + ] + self.assertEqual(ldrv.to_dict(), expected) + + +class TestDatasetReaderInput(TestCase): + """Unit test for DatasetReaderInput class.""" + + def test_point(self): + """Test get property point.""" + geom_collection = GeometryCollection(Point(1, 1)) + dri = DatasetReaderInput(geom_collection, LocationInputType.POINT) + self.assertEqual(dri.point, Point(1, 1, srid=4326)) + + def test_polygon(self): + """Test get property polygon.""" + polygon = MultiPolygon(Polygon(((0, 0), (1, 0), (1, 1), (0, 0)))) + geom_collection = GeometryCollection(polygon) + dri = DatasetReaderInput(geom_collection, LocationInputType.POLYGON) + self.assertEqual(dri.polygon, geom_collection) + + def test_points(self): + """Test get property points.""" + points = [Point(1, 1), Point(2, 2)] + geom_collection = GeometryCollection(*points) + dri = DatasetReaderInput( + geom_collection, LocationInputType.LIST_OF_POINT) + self.assertEqual( + dri.points, [Point(1, 1, srid=4326), Point(2, 2, srid=4326)]) + + def test_from_point(self): + """Test create object from point.""" + point = Point(1, 1, srid=4326) + dri = DatasetReaderInput.from_point(point) + self.assertEqual(dri.type, LocationInputType.POINT) + self.assertEqual(dri.point, point) + + def test_from_bbox(self): + """Test create object from bbox.""" + bbox_list = [1.0, 1.0, 2.0, 2.0] + dri = DatasetReaderInput.from_bbox(bbox_list) + self.assertEqual(dri.type, LocationInputType.BBOX) + expected_points = [ + Point(x=bbox_list[0], y=bbox_list[1], srid=4326), + Point(x=bbox_list[2], y=bbox_list[3], srid=4326) + ] + self.assertEqual(dri.points, expected_points) + + def test_invalid_point_access(self): + """Test get property point with invalid type.""" + geom_collection = GeometryCollection(Point(1, 1)) + dri = DatasetReaderInput(geom_collection, LocationInputType.BBOX) + with self.assertRaises(TypeError): + _ = dri.point + + def test_invalid_polygon_access(self): + """Test get property polygon with invalid type.""" + geom_collection = GeometryCollection(Point(1, 1)) + dri = DatasetReaderInput(geom_collection, LocationInputType.POINT) + with self.assertRaises(TypeError): + _ = dri.polygon + + def test_invalid_points_access(self): + """Test get property points with invalid type.""" + geom_collection = GeometryCollection(Point(1, 1)) + dri = DatasetReaderInput(geom_collection, LocationInputType.POINT) + with self.assertRaises(TypeError): + _ = dri.points class TestBaseNetCDFReader(TestCase): """Unit test for class BaseNetCDFReader.""" + def setUp(self): + """Set test for BaseNetCDFReader.""" + self.provider = Provider(name='CBAM') + self.dataset = Dataset(provider=self.provider) + self.attributes = [DatasetAttribute(source='var1'), + DatasetAttribute(source='var2')] + self.location_input = DatasetReaderInput.from_point( + Point(1.0, 2.0) + ) + self.start_date = datetime(2020, 1, 1) + self.end_date = datetime(2020, 1, 31) + self.reader = BaseNetCDFReader( + self.dataset, self.attributes, self.location_input, + self.start_date, self.end_date + ) + def test_add_attribute(self): """Test adding a new attribute to Reader.""" reader = BaseNetCDFReader(Mock(), [], Mock(), Mock(), Mock()) @@ -101,6 +282,21 @@ def test_add_attribute(self): reader.add_attribute(DatasetAttributeFactory.create()) self.assertEqual(len(reader.attributes), 1) + def test_get_attributes_metadata(self): + """Test get attributes metadata dict.""" + attrib = DatasetAttributeFactory.create() + attrib.attribute.variable_name = 'temperature' + attrib.attribute.unit.name = 'C' + attrib.attribute.name = 'Temperature' + reader = BaseNetCDFReader(Mock(), [attrib], Mock(), Mock(), Mock()) + expected = { + 'temperature': { + 'units': 'C', + 'longname': 'Temperature' + } + } + self.assertEqual(reader.get_attributes_metadata(), expected) + def test_read_variables(self): """Test reading variables.""" dataset = DatasetFactory.create(name=NetCDFProvider.CBAM) @@ -109,7 +305,8 @@ def test_read_variables(self): dataset_attr = DatasetAttributeFactory( dataset=dataset, attribute=attribute) reader = BaseNetCDFReader( - dataset, [dataset_attr], Point(x=29.125, y=-2.215), + dataset, [dataset_attr], + DatasetReaderInput.from_point(Point(x=29.125, y=-2.215)), Mock(), Mock()) xrArray = MagicMock() xrArray.sel.return_value = [] @@ -121,7 +318,7 @@ def test_read_variables(self): @patch('gap.utils.netcdf.NetCDFProvider.get_s3_client_kwargs') @patch('gap.utils.netcdf.NetCDFProvider.get_s3_variables') @patch('fsspec.filesystem') - def test_setupNetCDFReader( + def test_setup_netcdf_reader( self, mock_filesystem, mock_get_s3_vars, mock_get_s3_kwargs): """Test for setup NetCDFReader class.""" mock_get_s3_kwargs.return_value = { @@ -167,3 +364,157 @@ def test_from_dataset(self): dataset3 = DatasetFactory.create() with self.assertRaises(TypeError): get_reader_from_dataset(dataset3) + + def test_read_variables_by_point(self): + """Test read variables xarray by point.""" + # Prepare a mock dataset + data = np.random.rand(2, 10, 10) + lats = np.linspace(-90, 90, 10) + lons = np.linspace(-180, 180, 10) + times = np.array(['2020-01-01', '2020-01-02'], dtype='datetime64') + dataset = xr.Dataset( + {'var1': (['time', 'lat', 'lon'], data), + 'var2': (['time', 'lat', 'lon'], data)}, + coords={'time': times, 'lat': lats, 'lon': lons} + ) + result = self.reader._read_variables_by_point( + dataset, ['var1', 'var2'], + np.datetime64(self.start_date), np.datetime64(self.end_date) + ) + self.assertIsInstance(result, xr.Dataset) + + def test_read_variables_by_bbox(self): + """Test read variables xarray by bbox.""" + data = np.random.rand(2, 10, 10) + lats = np.linspace(-90, 90, 10) + lons = np.linspace(-180, 180, 10) + times = np.array(['2020-01-01', '2020-01-02'], dtype='datetime64') + dataset = xr.Dataset( + {'var1': (['time', 'lat', 'lon'], data), + 'var2': (['time', 'lat', 'lon'], data)}, + coords={'time': times, 'lat': lats, 'lon': lons} + ) + self.reader.location_input = DatasetReaderInput.from_bbox( + [-180, -90, 180, 90]) + result = self.reader._read_variables_by_bbox( + dataset, ['var1', 'var2'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + self.assertIsInstance(result, xr.Dataset) + + def test_read_variables_by_points(self): + """Test read variables xarray by points.""" + data = np.random.rand(2, 10, 10) + lats = np.linspace(-90, 90, 10) + lons = np.linspace(-180, 180, 10) + times = np.array(['2020-01-01', '2020-01-02'], dtype='datetime64') + dataset = xr.Dataset( + {'var1': (['time', 'lat', 'lon'], data), + 'var2': (['time', 'lat', 'lon'], data)}, + coords={'time': times, 'lat': lats, 'lon': lons} + ) + self.reader.location_input.type = LocationInputType.LIST_OF_POINT + self.reader.location_input.geom_collection = MultiPoint( + [Point(0, 0), Point(10, 10)]) + result = self.reader._read_variables_by_points( + dataset, ['var1', 'var2'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + self.assertIsInstance(result, xr.Dataset) + + def test_find_locations(self): + """Test find locations method.""" + data = np.random.rand(10, 10) + lats = np.linspace(-90, 90, 10) + lons = np.linspace(-180, 180, 10) + dataset = xr.Dataset( + {'var1': (['lat', 'lon'], data)}, + coords={'lat': lats, 'lon': lons} + ) + locations, lat_len, lon_len = self.reader.find_locations(dataset) + self.assertEqual(len(locations), 100) + self.assertEqual(lat_len, 10) + self.assertEqual(lon_len, 10) + + @patch.object(BaseNetCDFReader, '_read_variables_by_point') + @patch.object(BaseNetCDFReader, '_read_variables_by_bbox') + @patch.object(BaseNetCDFReader, '_read_variables_by_polygon') + @patch.object(BaseNetCDFReader, '_read_variables_by_points') + def test_read_variables_several_cases( + self, mock_read_by_points, mock_read_by_polygon, + mock_read_by_bbox, mock_read_by_point): + """Test several cases in read_variables function.""" + data = np.random.rand(2, 10, 10) + lats = np.linspace(-90, 90, 10) + lons = np.linspace(-180, 180, 10) + times = np.array(['2020-01-01', '2020-01-02'], dtype='datetime64') + dataset = xr.Dataset( + {'var1': (['time', 'lat', 'lon'], data), + 'var2': (['time', 'lat', 'lon'], data)}, + coords={'time': times, 'lat': lats, 'lon': lons} + ) + + # Mock results + mock_read_by_point.return_value = dataset + mock_read_by_bbox.return_value = dataset + mock_read_by_polygon.return_value = dataset + mock_read_by_points.return_value = dataset + + # Test POINT input + self.reader.location_input.type = LocationInputType.POINT + result = self.reader.read_variables( + dataset, self.start_date, self.end_date) + self.assertIsInstance(result, xr.Dataset) + mock_read_by_point.assert_called_once_with( + dataset, ['var1', 'var2', 'date'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + + # Test BBOX input + self.reader.location_input = DatasetReaderInput.from_bbox( + [-180, -90, 180, 90]) + result = self.reader.read_variables( + dataset, self.start_date, self.end_date) + self.assertIsInstance(result, xr.Dataset) + mock_read_by_bbox.assert_called_once_with( + dataset, ['var1', 'var2', 'date'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + + # Test POLYGON input + self.reader.location_input.type = LocationInputType.POLYGON + polygon = Polygon(((0, 0), (0, 10), (10, 10), (10, 0), (0, 0))) + self.reader.location_input.geom_collection = MultiPolygon(polygon) + result = self.reader.read_variables( + dataset, self.start_date, self.end_date) + self.assertIsInstance(result, xr.Dataset) + mock_read_by_polygon.assert_called_once_with( + dataset, ['var1', 'var2', 'date'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + + # Test LIST_OF_POINT input + self.reader.location_input.type = LocationInputType.LIST_OF_POINT + points = [Point(0, 0), Point(10, 10)] + self.reader.location_input.geom_collection = GeometryCollection( + *points) + result = self.reader.read_variables( + dataset, self.start_date, self.end_date) + self.assertIsInstance(result, xr.Dataset) + mock_read_by_points.assert_called_once_with( + dataset, ['var1', 'var2', 'date'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) + + # Test exception handling + mock_read_by_bbox.reset_mock() + mock_read_by_bbox.side_effect = Exception('Unexpected error') + self.reader.location_input = DatasetReaderInput.from_bbox( + [-180, -90, 180, 90]) + result = self.reader.read_variables( + dataset, self.start_date, self.end_date) + self.assertIsNone(result) + mock_read_by_bbox.assert_called_once_with( + dataset, ['var1', 'var2', 'date'], np.datetime64(self.start_date), + np.datetime64(self.end_date) + ) diff --git a/django_project/gap/utils/netcdf.py b/django_project/gap/utils/netcdf.py index 651fdf74..f930cd9b 100644 --- a/django_project/gap/utils/netcdf.py +++ b/django_project/gap/utils/netcdf.py @@ -6,24 +6,34 @@ """ import os +import json +import logging from typing import List from datetime import datetime, timedelta from django.contrib.gis.geos import Point +import numpy as np import xarray as xr +import regionmask from xarray.core.dataset import Dataset as xrDataset import fsspec +from shapely.geometry import shape from gap.models import ( Provider, Dataset, DatasetAttribute, - NetCDFFile + DataSourceFile ) from gap.utils.reader import ( - BaseDatasetReader + LocationInputType, + BaseDatasetReader, + DatasetReaderInput ) +logger = logging.getLogger(__name__) + + class NetCDFProvider: """Class contains NetCDF Provider.""" @@ -170,21 +180,23 @@ class BaseNetCDFReader(BaseDatasetReader): def __init__( self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: + location_input: DatasetReaderInput, + start_date: datetime, end_date: datetime) -> None: """Initialize BaseNetCDFReader class. :param dataset: Dataset for reading :type dataset: Dataset :param attributes: List of attributes to be queried :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point + :param location_input: Location to be queried + :type location_input: DatasetReaderInput :param start_date: Start date time filter :type start_date: datetime :param end_date: End date time filter :type end_date: datetime """ - super().__init__(dataset, attributes, point, start_date, end_date) + super().__init__( + dataset, attributes, location_input, start_date, end_date) self.xrDatasets = [] def setup_netcdf_reader(self): @@ -199,11 +211,11 @@ def setup_netcdf_reader(self): ) ) - def open_dataset(self, netcdf_file: NetCDFFile) -> xrDataset: + def open_dataset(self, netcdf_file: DataSourceFile) -> xrDataset: """Open a NetCDFFile using xArray. :param netcdf_file: NetCDF from a dataset - :type netcdf_file: NetCDFFile + :type netcdf_file: DataSourceFile :return: xArray Dataset object :rtype: xrDataset """ @@ -215,6 +227,65 @@ def open_dataset(self, netcdf_file: NetCDFFile) -> xrDataset: netcdf_url += f'{netcdf_file.name}' return xr.open_dataset(self.fs.open(netcdf_url)) + def _read_variables_by_point( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + point = self.location_input.point + return dataset[variables].sel( + lat=point.y, + lon=point.x, method='nearest') + + def _read_variables_by_bbox( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + points = self.location_input.points + lat_min = points[0].y + lat_max = points[1].y + lon_min = points[0].x + lon_max = points[1].x + # output results is in two dimensional array + return dataset[variables].where( + (dataset.lat >= lat_min) & (dataset.lat <= lat_max) & + (dataset.lon >= lon_min) & (dataset.lon <= lon_max), drop=True) + + def _read_variables_by_polygon( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + # Convert the Django GIS Polygon to a format compatible with shapely + shapely_multipolygon = shape( + json.loads(self.location_input.polygon.geojson)) + + # Create a mask using regionmask from the shapely polygon + mask = regionmask.Regions([shapely_multipolygon]).mask(dataset) + # Mask the dataset + return dataset[variables].where(mask == 0, drop=True) + + def _read_variables_by_points( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + # use the first variable to get its dimension + # use the 0 index for it's date variable + mask = np.zeros_like(dataset[variables[0]][0], dtype=bool) + # Iterate through the points and update the mask + for lon, lat in self.location_input.points: + # Find nearest lat and lon indices + lat_idx = np.abs(dataset['lat'] - lat).argmin() + lon_idx = np.abs(dataset['lon'] - lon).argmin() + mask[lat_idx, lon_idx] = True + mask_da = xr.DataArray( + mask, + coords={ + 'lat': dataset['lat'], 'lon': dataset['lon'] + }, + dims=['lat', 'lon'] + ) + # Apply the mask to the dataset + return dataset[variables].where(mask_da, drop=True) + def read_variables( self, dataset: xrDataset, start_date: datetime = None, end_date: datetime = None) -> xrDataset: @@ -225,7 +296,47 @@ def read_variables( :return: filtered xArray Dataset object :rtype: xrDataset """ + start_dt = np.datetime64(start_date, 'ns') + end_dt = np.datetime64(end_date, 'ns') variables = [a.source for a in self.attributes] variables.append(self.date_variable) - return dataset[variables].sel( - lat=self.point.y, lon=self.point.x, method='nearest') + result: xrDataset = None + try: + if self.location_input.type == LocationInputType.BBOX: + result = self._read_variables_by_bbox( + dataset, variables, start_dt, end_dt) + elif self.location_input.type == LocationInputType.POLYGON: + result = self._read_variables_by_polygon( + dataset, variables, start_dt, end_dt) + elif self.location_input.type == LocationInputType.LIST_OF_POINT: + result = self._read_variables_by_points( + dataset, variables, start_dt, end_dt) + else: + result = self._read_variables_by_point( + dataset, variables, start_dt, end_dt) + except Exception as ex: + logger.error( + 'Failed to read_variables from ' + f'netcdf dataset {self.dataset.provider.name} ' + f'date {start_date} - {end_date} with vars: {variables}' + ) + logger.error(ex) + return result + + def find_locations(self, val: xrDataset) -> List[Point]: + """Find locations from dataset. + + :param val: dataset to be read + :type val: xrDataset + :return: points + :rtype: List[Point] + """ + locations = [] + lat_values = val['lat'].values + lon_values = val['lon'].values + if lat_values.ndim == 0 and lon_values.ndim == 0: + return [Point(x=float(lon_values), y=float(lat_values))], 1, 1 + for lat in lat_values: + for lon in lon_values: + locations.append(Point(x=float(lon), y=float(lat))) + return locations, len(lat_values), len(lon_values) diff --git a/django_project/gap/utils/reader.py b/django_project/gap/utils/reader.py index be1f9480..ae0abbec 100644 --- a/django_project/gap/utils/reader.py +++ b/django_project/gap/utils/reader.py @@ -5,11 +5,14 @@ .. note:: Helper for reading dataset """ -from typing import Union, List +import json +from typing import Union, List, Dict import numpy as np from datetime import datetime import pytz -from django.contrib.gis.geos import Point +from django.contrib.gis.geos import ( + Point, MultiPolygon, GeometryCollection, MultiPoint +) from gap.models import ( Dataset, @@ -35,6 +38,8 @@ def __init__( def _datetime_as_str(self): """Convert datetime object to string.""" + if self.datetime is None: + return '' if isinstance(self.datetime, np.datetime64): return np.datetime_as_string( self.datetime, unit='s', timezone='UTC') @@ -56,16 +61,16 @@ class DatasetReaderValue: """Class representing all values from reader.""" def __init__( - self, metadata: dict, + self, location: Point, results: List[DatasetTimelineValue]) -> None: """Initialize DatasetReaderValue object. - :param metadata: Dictionary of metadata - :type metadata: dict + :param location: point to the observed station/grid cell + :type location: Point :param results: Data value list :type results: List[DatasetTimelineValue] """ - self.metadata = metadata + self.location = location self.results = results def to_dict(self): @@ -74,26 +79,127 @@ def to_dict(self): :return: Dictionary of metadata and data :rtype: dict """ + if self.location is None: + return {} return { - 'metadata': self.metadata, + 'geometry': json.loads(self.location.json), 'data': [result.to_dict() for result in self.results] } +class LocationDatasetReaderValue(DatasetReaderValue): + """Class representing data values for multiple locations.""" + + def __init__( + self, results: Dict[Point, List[DatasetTimelineValue]]) -> None: + """Initialize LocationDatasetReaderValue.""" + super().__init__(None, []) + self.results = results + + def to_dict(self): + """Convert into dict. + + :return: Dictionary of metadata and data + :rtype: dict + """ + location_data = [] + for location, values in self.results.items(): + val = DatasetReaderValue(location, values) + location_data.append(val.to_dict()) + return location_data + + +class LocationInputType: + """Class for data input type.""" + + POINT = 'point' + BBOX = 'bbox' + POLYGON = 'polygon' + LIST_OF_POINT = 'list_of_point' + + +class DatasetReaderInput: + """Class to store the dataset reader input. + + Input type: Point, bbox, polygon, list of point + """ + + def __init__(self, geom_collection: GeometryCollection, type: str): + """Initialize DatasetReaderInput class.""" + self.geom_collection = geom_collection + self.type = type + + @property + def point(self) -> Point: + """Get single point from input.""" + if self.type != LocationInputType.POINT: + raise TypeError('Location input type is not bbox/point!') + return Point( + x=self.geom_collection[0].x, + y=self.geom_collection[0].y, srid=4326) + + @property + def polygon(self) -> MultiPolygon: + """Get MultiPolygon object from input.""" + if self.type != LocationInputType.POLYGON: + raise TypeError('Location input type is not polygon!') + return self.geom_collection + + @property + def points(self) -> List[Point]: + """Get list of point from input.""" + if self.type not in [ + LocationInputType.BBOX, LocationInputType.LIST_OF_POINT + ]: + raise TypeError('Location input type is not bbox/point!') + return [ + Point(x=point.x, y=point.y, srid=4326) for + point in self.geom_collection + ] + + @classmethod + def from_point(cls, point: Point): + """Create input from single point. + + :param point: single point + :type point: Point + :return: DatasetReaderInput with POINT type + :rtype: DatasetReaderInput + """ + return DatasetReaderInput( + MultiPoint([point]), LocationInputType.POINT) + + @classmethod + def from_bbox(cls, bbox_list: List[float]): + """Create input from bbox (xmin, ymin, xmax, ymax). + + :param bbox_list: (xmin, ymin, xmax, ymax) + :type bbox_list: List[float] + :return: DatasetReaderInput with BBOX type + :rtype: DatasetReaderInput + """ + return DatasetReaderInput( + MultiPoint([ + Point(x=bbox_list[0], y=bbox_list[1], srid=4326), + Point(x=bbox_list[2], y=bbox_list[3], srid=4326) + ]), LocationInputType.BBOX) + + class BaseDatasetReader: """Base class for Dataset Reader.""" def __init__( self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: + location_input: DatasetReaderInput, + start_date: datetime, end_date: datetime) -> None: """Initialize BaseDatasetReader class. :param dataset: Dataset for reading :type dataset: Dataset :param attributes: List of attributes to be queried :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point + :param location_input: Location to be queried + :type location_input: DatasetReaderInput :param start_date: Start date time filter :type start_date: datetime :param end_date: End date time filter @@ -101,7 +207,7 @@ def __init__( """ self.dataset = dataset self.attributes = attributes - self.point = point + self.location_input = location_input self.start_date = start_date self.end_date = end_date diff --git a/django_project/gap_api/api_views/measurement.py b/django_project/gap_api/api_views/measurement.py index c7252d93..45fb87d8 100644 --- a/django_project/gap_api/api_views/measurement.py +++ b/django_project/gap_api/api_views/measurement.py @@ -7,6 +7,7 @@ from typing import Dict import pytz +import json from datetime import date, datetime, time from drf_yasg.utils import swagger_auto_schema from drf_yasg import openapi @@ -14,13 +15,23 @@ from rest_framework.response import Response from rest_framework.views import APIView from django.db.models.functions import Lower -from django.contrib.gis.geos import Point +from django.contrib.gis.geos import ( + GEOSGeometry, + Point, + MultiPoint, + MultiPolygon +) from gap.models import ( Attribute, DatasetAttribute ) -from gap.utils.reader import DatasetReaderValue, BaseDatasetReader +from gap.utils.reader import ( + LocationInputType, + DatasetReaderInput, + DatasetReaderValue, + BaseDatasetReader +) from gap_api.serializers.common import APIErrorSerializer from gap_api.utils.helper import ApiTag from gap.providers import get_reader_from_dataset @@ -31,6 +42,27 @@ class MeasurementAPI(APIView): date_format = '%Y-%m-%d' permission_classes = [IsAuthenticated] + api_parameters = [ + openapi.Parameter( + 'attributes', openapi.IN_QUERY, + description='List of attribute name', type=openapi.TYPE_STRING + ), + openapi.Parameter( + 'start_date', openapi.IN_QUERY, + description='Start Date', + type=openapi.TYPE_STRING + ), + openapi.Parameter( + 'end_date', openapi.IN_QUERY, + description='End Date', + type=openapi.TYPE_STRING + ), + openapi.Parameter( + 'providers', openapi.IN_QUERY, + description='List of provider name', + type=openapi.TYPE_STRING + ), + ] def _get_attribute_filter(self): """Get list of attributes in the query parameter. @@ -56,17 +88,41 @@ def _get_date_filter(self, attr_name): datetime.strptime(date_str, self.date_format).date() ) - def _get_location_filter(self): + def _get_location_filter(self) -> DatasetReaderInput: """Get location from lon and lat in the request parameters. :return: Location to be queried - :rtype: Point + :rtype: DatasetReaderInput """ + if self.request.method == 'POST': + features = self.request.data['features'] + geom = None + point_list = [] + for geojson in features: + geom = GEOSGeometry( + json.dumps(geojson['geometry']), srid=4326 + ) + if isinstance(geom, MultiPolygon): + break + point_list.append(geom[0]) + if geom is None: + raise TypeError('Unknown geometry type!') + if isinstance(geom, MultiPolygon): + return DatasetReaderInput( + geom, LocationInputType.POLYGON) + return DatasetReaderInput( + MultiPoint(point_list), LocationInputType.LIST_OF_POINT) lon = self.request.GET.get('lon', None) lat = self.request.GET.get('lat', None) - if lon is None or lat is None: - return None - return Point(x=float(lon), y=float(lat), srid=4326) + if lon is not None and lat is not None: + return DatasetReaderInput.from_point( + Point(x=float(lon), y=float(lat), srid=4326)) + # (xmin, ymin, xmax, ymax) + bbox = self.request.GET.get('bbox', None) + if bbox is not None: + number_list = [float(a) for a in bbox.split(',')] + return DatasetReaderInput.from_bbox(number_list) + return None def _get_provider_filter(self): """Get provider name filter in the request parameters. @@ -110,7 +166,8 @@ def get_response_data(self): if location is None: return data dataset_attributes = DatasetAttribute.objects.filter( - attribute__in=attributes + attribute__in=attributes, + dataset__is_internal_use=False ) provider_filter = self._get_provider_filter() if provider_filter: @@ -127,44 +184,29 @@ def get_response_data(self): reader = get_reader_from_dataset(da.dataset) dataset_dict[da.dataset.id] = reader( da.dataset, [da], location, start_dt, end_dt) + data = { + 'metadata': { + 'start_date': start_dt.isoformat(timespec='seconds'), + 'end_date': end_dt.isoformat(timespec='seconds'), + 'dataset': [] + }, + 'results': [] + } for reader in dataset_dict.values(): + data['metadata']['dataset'].append({ + 'provider': reader.dataset.provider.name, + 'attributes': reader.get_attributes_metadata() + }) values = self._read_data(reader).to_dict() - if 'metadata' in data: - data['metadata']['dataset'].append( - reader.dataset.name) - data['metadata']['attributes'].update( - reader.get_attributes_metadata()) - else: - data['metadata'] = values['metadata'] - data['metadata']['attributes'] = ( - reader.get_attributes_metadata() - ) - if 'data' in data: - data['data'][reader.dataset.name] = values['data'] - else: - data['data'] = { - reader.dataset.name: values['data'] - } + if values: + data['results'].append(values) return data @swagger_auto_schema( operation_id='get-measurement', tags=[ApiTag.Measurement], manual_parameters=[ - openapi.Parameter( - 'attributes', openapi.IN_QUERY, - description='List of attribute name', type=openapi.TYPE_STRING - ), - openapi.Parameter( - 'start_date', openapi.IN_QUERY, - description='Start Date', - type=openapi.TYPE_STRING - ), - openapi.Parameter( - 'end_date', openapi.IN_QUERY, - description='End Date', - type=openapi.TYPE_STRING - ), + *api_parameters, openapi.Parameter( 'lat', openapi.IN_QUERY, description='Latitude', @@ -176,8 +218,8 @@ def get_response_data(self): type=openapi.TYPE_NUMBER ), openapi.Parameter( - 'providers', openapi.IN_QUERY, - description='List of provider name', + 'bbox', openapi.IN_QUERY, + description='Bounding box: xmin, ymin, xmax, ymax', type=openapi.TYPE_STRING ) ], @@ -198,3 +240,31 @@ def get(self, request, *args, **kwargs): status=200, data=self.get_response_data() ) + + @swagger_auto_schema( + operation_id='get-measurement-by-polygon', + tags=[ApiTag.Measurement], + manual_parameters=[ + *api_parameters + ], + request_body=openapi.Schema( + description='Polygon (SRID 4326) in geojson format', + type=openapi.TYPE_STRING + ), + responses={ + 200: openapi.Schema( + description=( + 'Measurement data' + ), + type=openapi.TYPE_OBJECT, + properties={} + ), + 400: APIErrorSerializer + } + ) + def post(self, request, *args, **kwargs): + """Fetch measurement data by polygon.""" + return Response( + status=200, + data=self.get_response_data() + ) diff --git a/django_project/gap_api/tests/test_measurement_api.py b/django_project/gap_api/tests/test_measurement_api.py index 067db687..13438447 100644 --- a/django_project/gap_api/tests/test_measurement_api.py +++ b/django_project/gap_api/tests/test_measurement_api.py @@ -5,20 +5,22 @@ .. note:: Unit tests for User API. """ +import json from datetime import datetime from typing import List -from django.contrib.gis.geos import Point from django.urls import reverse from unittest.mock import patch +from django.contrib.gis.geos import Polygon, MultiPolygon from core.tests.common import FakeResolverMatchV1, BaseAPIViewTest from django_project.gap.models import DatasetAttribute from django_project.gap.utils.reader import ( DatasetReaderValue, - DatasetTimelineValue + DatasetTimelineValue, + DatasetReaderInput ) from gap_api.api_views.measurement import MeasurementAPI -from gap.utils.reader import BaseDatasetReader +from gap.utils.reader import BaseDatasetReader, LocationInputType from gap.factories import DatasetAttributeFactory @@ -26,17 +28,20 @@ class MockDatasetReader(BaseDatasetReader): """Class to mock a dataset reader.""" def __init__(self, dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, + location_input: DatasetReaderInput, start_date: datetime, end_date: datetime) -> None: """Initialize MockDatasetReader class.""" - super().__init__(dataset, attributes, point, start_date, end_date) + super().__init__( + dataset, attributes, location_input, start_date, end_date) def get_data_values(self) -> DatasetReaderValue: """Override data values with a mock object.""" + if self.location_input.type == LocationInputType.POLYGON: + p = self.location_input.polygon[0] + else: + p = self.location_input.point return DatasetReaderValue( - { - 'dataset': [self.dataset.name] - }, + p, [DatasetTimelineValue(self.start_date, { 'test': 100 })] @@ -79,6 +84,52 @@ def _get_measurement_request( return request + def _post_measurement_request( + self, lat=-2.215, lon=29.125, attributes='max_total_temperature', + start_dt='2024-04-01', end_dt='2024-04-04', providers=None): + """Get request for Measurement API. + + :param lat: latitude, defaults to -2.215 + :type lat: float, optional + :param lon: longitude, defaults to 29.125 + :type lon: float, optional + :param attributes: comma separated list of attribute, + defaults to 'max_total_temperature' + :type attributes: str, optional + :param start_dt: start date range, defaults to '2024-04-01' + :type start_dt: str, optional + :param end_dt: end date range, defaults to '2024-04-04' + :type end_dt: str, optional + :return: Request object + :rtype: WSGIRequest + """ + request_params = ( + f'?lat={lat}&lon={lon}&attributes={attributes}' + f'&start_date={start_dt}&end_date={end_dt}' + ) + if providers: + request_params = request_params + f'&providers={providers}' + polygon = Polygon(((0, 0), (0, 10), (10, 10), (10, 0), (0, 0))) + data = { + "type": "FeatureCollection", + "name": "polygon", + "features": [ + { + "type": "Feature", + "properties": {"name": "1"}, + "geometry": json.loads(MultiPolygon(polygon).json) + } + ] + } + request = self.factory.post( + reverse('api:v1:get-measurement') + request_params, + data=data, format='json' + ) + request.user = self.superuser + request.resolver_match = FakeResolverMatchV1 + return request + + class HistoricalAPITest(CommonMeasurementAPITest): """Historical api test case.""" @@ -88,7 +139,8 @@ def test_read_historical_data_empty(self): request = self._get_measurement_request() response = view(request) self.assertEqual(response.status_code, 200) - self.assertEqual(response.data, {}) + self.assertIn('metadata', response.data) + self.assertEqual(response.data['results'], []) @patch('gap_api.api_views.measurement.get_reader_from_dataset') def test_read_historical_data(self, mocked_reader): @@ -110,14 +162,13 @@ def test_read_historical_data(self, mocked_reader): self.assertEqual(response.status_code, 200) mocked_reader.assert_called_once_with(attribute1.dataset) self.assertIn('metadata', response.data) - self.assertIn('data', response.data) - response_data = response.data['data'] - self.assertIn(attribute1.dataset.name, response_data) - results = response_data[attribute1.dataset.name] + self.assertIn('results', response.data) + results = response.data['results'] self.assertEqual(len(results), 1) - self.assertIn('values', results[0]) - self.assertIn('test', results[0]['values']) - self.assertEqual(100, results[0]['values']['test']) + result_data = results[0]['data'] + self.assertIn('values', result_data[0]) + self.assertIn('test', result_data[0]['values']) + self.assertEqual(100, result_data[0]['values']['test']) # with providers request = self._get_measurement_request( attributes=','.join(attribs), @@ -125,16 +176,30 @@ def test_read_historical_data(self, mocked_reader): ) response = view(request) self.assertEqual(response.status_code, 200) - self.assertEqual(response.data, {}) - - -class ForecastAPITest(CommonMeasurementAPITest): - """Forecast api test case.""" + self.assertIn('metadata', response.data) + self.assertIn('results', response.data) + self.assertEqual(response.data['results'], []) - def test_read_forecast_data_empty(self): - """Test read forecast data that returns empty.""" + @patch('gap_api.api_views.measurement.get_reader_from_dataset') + def test_read_historical_data_by_polygon(self, mocked_reader): + """Test read historical data.""" view = MeasurementAPI.as_view() - request = self._get_measurement_request() + mocked_reader.return_value = MockDatasetReader + attribute1 = DatasetAttributeFactory.create() + attribute2 = DatasetAttributeFactory.create( + dataset=attribute1.dataset + ) + attribs = [ + attribute1.attribute.variable_name, + attribute2.attribute.variable_name + ] + request = self._post_measurement_request( + attributes=','.join(attribs) + ) response = view(request) self.assertEqual(response.status_code, 200) - self.assertEqual(response.data, {}) + mocked_reader.assert_called_once_with(attribute1.dataset) + self.assertIn('metadata', response.data) + self.assertIn('results', response.data) + results = response.data['results'] + self.assertEqual(len(results), 1)