Skip to content

Commit

Permalink
GAP API: add query by bbox, polygon and list of point (#44)
Browse files Browse the repository at this point in the history
* add bbox query to return multiple points

* add post method to query by polygon or points

* tahmo measurement find nearest station by points

* update response format

* tahmo group by locations

* add exception handler when reading variables from netcdf

* fix unit tests

* fix api measurement test

* fix lint in unit test measurement api

* refactor netcdffile model to data source file

* fix test for netcdf and tahmo

* add tests to provider classes
  • Loading branch information
danangmassandy authored Jul 17, 2024
1 parent fdaf120 commit b75e9ad
Show file tree
Hide file tree
Showing 19 changed files with 1,498 additions and 277 deletions.
2 changes: 2 additions & 0 deletions deployment/docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,5 @@ xarray==2024.6.0
netCDF4==1.7.1.post1
h5netcdf==1.3.0
scipy==1.14.0
regionmask==0.12.1
zarr==2.18.2
11 changes: 6 additions & 5 deletions django_project/gap/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .models import (
Attribute, Country, Provider, Measurement, Station, IngestorSession,
Dataset, DatasetAttribute, NetCDFFile, DatasetType, Unit
Dataset, DatasetAttribute, DataSourceFile, DatasetType, Unit
)


Expand Down Expand Up @@ -112,11 +112,12 @@ class IngestorSessionAdmin(admin.ModelAdmin):
list_filter = ('ingestor_type', 'status')


@admin.register(NetCDFFile)
class NetCDFFileAdmin(admin.ModelAdmin):
"""NetCDFFile admin."""
@admin.register(DataSourceFile)
class DataSourceFileAdmin(admin.ModelAdmin):
"""DataSourceFile admin."""

list_display = (
'name', 'dataset', 'start_date_time', 'end_date_time', 'created_on'
'name', 'dataset', 'format', 'start_date_time',
'end_date_time', 'created_on'
)
list_filter = ('dataset',)
11 changes: 6 additions & 5 deletions django_project/gap/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
ObservationType,
DatasetTimeStep,
DatasetStore,
NetCDFFile
DataSourceFile
)


Expand Down Expand Up @@ -182,16 +182,17 @@ class Meta: # noqa
value = factory.Faker('pyfloat')


class NetCDFFileFactory(
BaseFactory[NetCDFFile], metaclass=BaseMetaFactory[NetCDFFile]
class DataSourceFileFactory(
BaseFactory[DataSourceFile], metaclass=BaseMetaFactory[DataSourceFile]
):
"""Factory class for NetCDFFile model."""
"""Factory class for DataSourceFile model."""

class Meta: # noqa
model = NetCDFFile
model = DataSourceFile

name = factory.Faker('text')
dataset = factory.SubFactory(DatasetFactory)
start_date_time = factory.Faker('date_time')
end_date_time = factory.Faker('date_time')
created_on = factory.Faker('date_time')
format = DatasetStore.NETCDF
8 changes: 5 additions & 3 deletions django_project/gap/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 4.2.7 on 2024-07-14 21:07
# Generated by Django 4.2.7 on 2024-07-17 02:56

import django.contrib.gis.db.models.fields
from django.db import migrations, models
Expand Down Expand Up @@ -46,7 +46,8 @@ class Migration(migrations.Migration):
('name', models.CharField(max_length=512)),
('description', models.TextField(blank=True, null=True)),
('time_step', models.CharField(choices=[('DAILY', 'DAILY'), ('HOURLY', 'HOURLY')], max_length=512)),
('store_type', models.CharField(choices=[('TABLE', 'TABLE'), ('NETCDF', 'NETCDF'), ('EXT_API', 'EXT_API')], max_length=512)),
('store_type', models.CharField(choices=[('TABLE', 'TABLE'), ('NETCDF', 'NETCDF'), ('ZARR', 'ZARR'), ('EXT_API', 'EXT_API')], max_length=512)),
('is_internal_use', models.BooleanField(default=False)),
],
options={
'abstract': False,
Expand Down Expand Up @@ -126,13 +127,14 @@ class Migration(migrations.Migration):
},
),
migrations.CreateModel(
name='NetCDFFile',
name='DataSourceFile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='Filename with its path in the object storage (S3)', max_length=512)),
('start_date_time', models.DateTimeField()),
('end_date_time', models.DateTimeField()),
('created_on', models.DateTimeField()),
('format', models.CharField(choices=[('NETCDF', 'NETCDF'), ('ZARR', 'ZARR')], max_length=512)),
('dataset', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gap.dataset')),
],
),
Expand Down
1 change: 0 additions & 1 deletion django_project/gap/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@
from gap.models.dataset import *
from gap.models.measurement import *
from gap.models.ingestor import *
from gap.models.netcdf import *
25 changes: 25 additions & 0 deletions django_project/gap/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class DatasetStore:

TABLE = 'TABLE'
NETCDF = 'NETCDF'
ZARR = 'ZARR'
EXT_API = 'EXT_API'


Expand Down Expand Up @@ -65,7 +66,31 @@ class Dataset(Definition):
choices=(
(DatasetStore.TABLE, DatasetStore.TABLE),
(DatasetStore.NETCDF, DatasetStore.NETCDF),
(DatasetStore.ZARR, DatasetStore.ZARR),
(DatasetStore.EXT_API, DatasetStore.EXT_API),
),
max_length=512
)
is_internal_use = models.BooleanField(default=False)


class DataSourceFile(models.Model):
"""Model representing a datasource file that is stored in S3 Storage."""

name = models.CharField(
max_length=512,
help_text="Filename with its path in the object storage (S3)"
)
dataset = models.ForeignKey(
Dataset, on_delete=models.CASCADE
)
start_date_time = models.DateTimeField()
end_date_time = models.DateTimeField()
created_on = models.DateTimeField()
format = models.CharField(
choices=(
(DatasetStore.NETCDF, DatasetStore.NETCDF),
(DatasetStore.ZARR, DatasetStore.ZARR),
),
max_length=512
)
109 changes: 87 additions & 22 deletions django_project/gap/providers/cbam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@
from django.contrib.gis.geos import Point
import numpy as np
import xarray as xr
from xarray.core.dataset import Dataset as xrDataset

from gap.models import (
Dataset,
DatasetAttribute,
NetCDFFile
DataSourceFile
)
from gap.utils.reader import (
LocationInputType,
DatasetReaderInput,
DatasetTimelineValue,
DatasetReaderValue,
LocationDatasetReaderValue
)

from gap.utils.reader import DatasetTimelineValue, DatasetReaderValue
from gap.utils.netcdf import (
daterange_inc,
BaseNetCDFReader
Expand All @@ -30,21 +36,23 @@ class CBAMNetCDFReader(BaseNetCDFReader):

def __init__(
self, dataset: Dataset, attributes: List[DatasetAttribute],
point: Point, start_date: datetime, end_date: datetime) -> None:
location_input: DatasetReaderInput, start_date: datetime,
end_date: datetime) -> None:
"""Initialize CBAMNetCDFReader class.
:param dataset: Dataset from CBAM provider
:type dataset: Dataset
:param attributes: List of attributes to be queried
:type attributes: List[DatasetAttribute]
:param point: Location to be queried
:type point: Point
:param location_input: Location to be queried
:type location_input: DatasetReaderInput
:param start_date: Start date time filter
:type start_date: datetime
:param end_date: End date time filter
:type end_date: datetime
"""
super().__init__(dataset, attributes, point, start_date, end_date)
super().__init__(
dataset, attributes, location_input, start_date, end_date)

def read_historical_data(self, start_date: datetime, end_date: datetime):
"""Read historical data from dataset.
Expand All @@ -57,33 +65,31 @@ def read_historical_data(self, start_date: datetime, end_date: datetime):
self.setup_netcdf_reader()
self.xrDatasets = []
for filter_date in daterange_inc(start_date, end_date):
netcdf_file = NetCDFFile.objects.filter(
netcdf_file = DataSourceFile.objects.filter(
dataset=self.dataset,
start_date_time__gte=filter_date,
end_date_time__lte=filter_date
).first()
if netcdf_file is None:
continue
ds = self.open_dataset(netcdf_file)
val = self.read_variables(ds)
val = self.read_variables(ds, filter_date, filter_date)
if val is None:
continue
self.xrDatasets.append(val)

def get_data_values(self) -> DatasetReaderValue:
"""Fetch data values from list of xArray Dataset object.
def _get_data_values_from_single_location(
self, point: Point, val: xrDataset) -> DatasetReaderValue:
"""Read data values from xrDataset.
:return: Data Value.
:param point: grid cell from the query
:type point: Point
:param val: dataset to be read
:type val: xrDataset
:return: Data Values
:rtype: DatasetReaderValue
"""
results = []
metadata = {
'dataset': [self.dataset.name],
'start_date': self.start_date.isoformat(),
'end_date': self.end_date.isoformat()
}
if len(self.xrDatasets) == 0:
return DatasetReaderValue(metadata, results)
val = xr.combine_nested(
self.xrDatasets, concat_dim=[self.date_variable])
for dt_idx, dt in enumerate(val[self.date_variable].values):
value_data = {}
for attribute in self.attributes:
Expand All @@ -95,4 +101,63 @@ def get_data_values(self) -> DatasetReaderValue:
dt,
value_data
))
return DatasetReaderValue(metadata, results)
return DatasetReaderValue(point, results)

def _get_data_values_from_multiple_locations(
self, val: xrDataset, locations: List[Point],
lat_dim: int, lon_dim: int) -> DatasetReaderValue:
"""Read data values from xrDataset from list of locations.
:param val: dataset to be read
:type val: xrDataset
:param locations: list of location
:type locations: List[Point]
:param lat_dim: latitude dimension
:type lat_dim: int
:param lon_dim: longitude dimension
:type lon_dim: int
:return: Data Values
:rtype: DatasetReaderValue
"""
results = {}
for dt_idx, dt in enumerate(val[self.date_variable].values):
idx_lat_lon = 0
for idx_lat in range(lat_dim):
for idx_lon in range(lon_dim):
value_data = {}
for attribute in self.attributes:
v = val[attribute.source].values[
dt_idx, idx_lat, idx_lon]
value_data[attribute.attribute.variable_name] = (
v if not np.isnan(v) else None
)
loc = locations[idx_lat_lon]
if loc in results:
results[loc].append(DatasetTimelineValue(
dt,
value_data
))
else:
results[loc] = [DatasetTimelineValue(
dt,
value_data
)]
idx_lat_lon += 1
return LocationDatasetReaderValue(results)

def get_data_values(self) -> DatasetReaderValue:
"""Fetch data values from list of xArray Dataset object.
:return: Data Value.
:rtype: DatasetReaderValue
"""
if len(self.xrDatasets) == 0:
return DatasetReaderValue(None, [])
val = xr.combine_nested(
self.xrDatasets, concat_dim=[self.date_variable])
locations, lat_dim, lon_dim = self.find_locations(val)
if self.location_input.type != LocationInputType.POINT:
return self._get_data_values_from_multiple_locations(
val, locations, lat_dim, lon_dim
)
return self._get_data_values_from_single_location(locations[0], val)
Loading

0 comments on commit b75e9ad

Please sign in to comment.