Skip to content

Commit 99eace2

Browse files
Review netcdf to_xarray args (#369)
* Review netcdf to_xarray args
1 parent 0212f7f commit 99eace2

File tree

7 files changed

+174
-20
lines changed

7 files changed

+174
-20
lines changed

docs/guide/sources.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ We can get data from a given source by using :func:`from_source`:
3838
- read data from a stream
3939
* - :ref:`data-sources-memory`
4040
- read data from a memory buffer
41+
* - :ref:`data-sources-multi`
42+
- read data from multiple sources
4143
* - :ref:`data-sources-ads`
4244
- retrieve data from the `Copernicus Atmosphere Data Store <https://ads.atmosphere.copernicus.eu/>`_ (ADS)
4345
* - :ref:`data-sources-cds`
@@ -418,6 +420,27 @@ memory
418420
print(f.metadata("param"))
419421
420422
423+
.. _data-sources-multi:
424+
425+
multi
426+
--------------
427+
428+
.. py:function:: from_source("multi", *sources, merger=None, **kwargs)
429+
:noindex:
430+
431+
The ``multi`` source reads multiple sources.
432+
433+
:param tuple *sources: the sources
434+
:param merger: if it is None an attempt is made to merge/concatenate the sources by their classes (using the nearest common class). Otherwise the sources are merged/concatenated using the merger in a lazy way. The merger can one of the following:
435+
436+
- class/object implementing the :func:`to_xarray` or :func:`to_pandas` methods
437+
- callable
438+
- str, describing a call either to "concat" or "merge". E.g.: "concat(concat_dim=time)"
439+
- tuple with 2 elements. The fist element is a str, either "concat" or "merge", and the second element is a dict with the keyword arguments for the call. E.g.: ("concat", {"concat_dim": "time"})
440+
:param dict **kwargs: other keyword arguments
441+
442+
443+
421444
.. _data-sources-ads:
422445

423446
ads

docs/howtos.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,31 @@ save the results of a :ref:`MARS retrieval <data-sources-mars>` into a file:
2424
)
2525
2626
ds.save("my_data.grib")
27+
28+
29+
How to call to_xarray() with arguments for NetCDF data?
30+
---------------------------------------------------------
31+
32+
When calling :func:`to_xarray` for NetCDF data it calls ``xarray.open_mfdataset`` internally. You can pass arguments to this xarray function by using the ``xarray_open_mfdataset_kwargs`` option. For example:
33+
34+
35+
.. code-block:: python
36+
37+
import earthkit.data
38+
39+
req = {
40+
"format": "zip",
41+
"origin": "c3s",
42+
"sensor": "olci",
43+
"version": "1_1",
44+
"year": "2022",
45+
"month": "04",
46+
"nominal_day": "01",
47+
"variable": "pixel_variables",
48+
"region": "europe",
49+
}
50+
51+
ds = earthkit.data.from_source("cds", "satellite-fire-burned-area", req)
52+
r = ds.to_xarray(
53+
xarray_open_mfdataset_kwargs=dict(decode_cf=False, decode_times=False)
54+
)

earthkit/data/mergers/__init__.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
LOG = logging.getLogger(__name__)
1717

18-
FORWARDS = ("to_xarray", "to_pandas", "to_tfdataset")
18+
FORWARDS = ("to_xarray", "to_pandas")
1919

2020

2121
def _nearest_common_class(objects):
@@ -86,16 +86,6 @@ def to_pandas(self, **kwargs):
8686
**kwargs,
8787
)
8888

89-
def to_tfdataset(self, **kwargs):
90-
from .tfdataset import merge
91-
92-
return merge(
93-
sources=self.sources,
94-
paths=self.paths,
95-
reader_class=self.reader_class,
96-
**kwargs,
97-
)
98-
9989
def to_xarray(self, **kwargs):
10090
from .xarray import merge
10191

@@ -118,9 +108,6 @@ def to_xarray(self, *args, **kwargs):
118108
def to_pandas(self, *args, **kwargs):
119109
return self.obj.to_pandas(self.paths_or_sources, **kwargs)
120110

121-
def to_tfdataset(self, *args, **kwargs):
122-
return self.obj.to_tfdataset(self.paths_or_sources, **kwargs)
123-
124111

125112
class CallableMerger(Merger):
126113
def __init__(self, func, sources, *args, **kwargs):
@@ -132,7 +119,6 @@ def _call_func(self, *args, **kwargs):
132119

133120
to_xarray = _call_func
134121
to_pandas = _call_func
135-
to_tfdataset = _call_func
136122

137123

138124
class XarrayGenericMerger(Merger):

earthkit/data/readers/netcdf/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def to_xarray_multi_from_paths(cls, paths, **kwargs):
6666

6767
options = dict()
6868
options.update(kwargs.get("xarray_open_mfdataset_kwargs", {}))
69+
if not options:
70+
options = dict(**kwargs)
6971

7072
return xr.open_mfdataset(
7173
paths,

earthkit/data/readers/netcdf/fieldlist.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ def to_xarray_multi_from_paths(cls, paths, **kwargs):
188188

189189
options = dict()
190190
options.update(kwargs.get("xarray_open_mfdataset_kwargs", {}))
191+
if not options:
192+
options = dict(**kwargs)
191193

192194
return xr.open_mfdataset(
193195
paths,
@@ -248,7 +250,7 @@ def __init__(self, *args, **kwargs):
248250
def to_xarray(self, **kwargs):
249251
import xarray as xr
250252

251-
return xr.merge([x.ds for x in self._indexes], **kwargs)
253+
return xr.merge([x._ds for x in self._indexes], **kwargs)
252254

253255

254256
class NetCDFFieldList(XArrayFieldListCore):
@@ -268,10 +270,8 @@ def new_mask_index(cls, *args, **kwargs):
268270
return NetCDFMaskFieldList(*args, **kwargs)
269271

270272
def to_xarray(self, **kwargs):
271-
import xarray as xr
272-
273-
if self.path.startswith("http"):
274-
return xr.open_dataset(self.path, **kwargs)
273+
# if self.path.startswith("http"):
274+
# return xr.open_dataset(self.path, **kwargs)
275275
return type(self).to_xarray_multi_from_paths([self.path], **kwargs)
276276

277277
def write(self, *args, **kwargs):

tests/netcdf/test_netcdf_concat.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,55 @@ def test_netcdf_concat(mode):
7474
],
7575
}
7676

77+
import xarray as xr
78+
79+
target = xr.merge([ds1.to_xarray(), ds2.to_xarray()])
80+
merged = ds.to_xarray()
81+
assert target.identical(merged)
82+
83+
84+
def test_netcdf_read_multiple_files():
85+
ds = from_source(
86+
"file",
87+
[
88+
earthkit_test_data_file("era5_2t_1.nc"),
89+
earthkit_test_data_file("era5_2t_2.nc"),
90+
],
91+
)
92+
93+
assert len(ds) == 2
94+
assert ds.metadata("variable") == ["t2m", "t2m"]
95+
96+
assert ds[0].datetime() == {
97+
"base_time": datetime.datetime(2021, 3, 1, 12, 0),
98+
"valid_time": datetime.datetime(2021, 3, 1, 12, 0),
99+
}
100+
assert ds[1].datetime() == {
101+
"base_time": datetime.datetime(2021, 3, 2, 12, 0),
102+
"valid_time": datetime.datetime(2021, 3, 2, 12, 0),
103+
}
104+
assert ds.datetime() == {
105+
"base_time": [
106+
datetime.datetime(2021, 3, 1, 12, 0),
107+
datetime.datetime(2021, 3, 2, 12, 0),
108+
],
109+
"valid_time": [
110+
datetime.datetime(2021, 3, 1, 12, 0),
111+
datetime.datetime(2021, 3, 2, 12, 0),
112+
],
113+
}
114+
115+
import xarray as xr
116+
117+
target = xr.merge(
118+
[
119+
xr.open_dataset(earthkit_test_data_file("era5_2t_1.nc")),
120+
xr.open_dataset(earthkit_test_data_file("era5_2t_2.nc")),
121+
]
122+
)
123+
merged = ds.to_xarray()
124+
assert target.identical(merged)
125+
77126

78127
@pytest.mark.parametrize("custom_merger", (merger_func, Merger_obj()))
79128
def test_netdcf_merge_custom(custom_merger):

tests/netcdf/test_netcdf_convert.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
# (C) Copyright 2020 ECMWF.
4+
#
5+
# This software is licensed under the terms of the Apache Licence Version 2.0
6+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7+
# In applying this licence, ECMWF does not waive the privileges and immunities
8+
# granted to it by virtue of its status as an intergovernmental organisation
9+
# nor does it submit to any jurisdiction.
10+
#
11+
12+
import numpy as np
13+
import pytest
14+
15+
from earthkit.data import from_source
16+
from earthkit.data.testing import earthkit_remote_test_data_file
17+
18+
19+
@pytest.mark.long_test
20+
@pytest.mark.download
21+
def test_netcdf_to_xarray_args():
22+
# The JD variable in the NetCDF is defined as follows:
23+
#
24+
# short JD(time, lat, lon) ;
25+
# string JD:long_name = "Date of the first detection" ;
26+
# string JD:units = "days since 2022-01-01" ;
27+
# string JD:comment = "Possible values: 0 when the pixel is not burned; 1 to 366 day of
28+
# the first detection when the pixel is burned; -1 when the pixel is not observed
29+
# in the month; -2 when pixel is not burnable: water bodies, bare areas, urban areas,
30+
# and permanent snow and ice.
31+
#
32+
# when loaded with xarray.open_dataset/xarray.open_mdataset without any kwargs the
33+
# type of the JD variable is datetime64[ns], which is wrong. The correct type should
34+
# be int16.
35+
36+
ds = from_source(
37+
"url",
38+
earthkit_remote_test_data_file(
39+
"test-data", "20220401-C3S-L3S_FIRE-BA-OLCI-AREA_3-fv1.1.nc"
40+
),
41+
)
42+
43+
r = ds.to_xarray(
44+
xarray_open_mfdataset_kwargs=dict(decode_cf=False, decode_times=False)
45+
)
46+
assert r["JD"].dtype == "int16"
47+
r["JD"].shape == (1, 20880, 28440)
48+
assert np.isclose(r["JD"].values.min(), -2)
49+
assert np.isclose(r["JD"].values.max(), 120)
50+
51+
r = ds.to_xarray(decode_cf=False, decode_times=False)
52+
assert r["JD"].dtype == "int16"
53+
r["JD"].shape == (1, 20880, 28440)
54+
assert np.isclose(r["JD"].values.min(), -2)
55+
assert np.isclose(r["JD"].values.max(), 120)
56+
57+
r = ds.to_xarray()
58+
assert r["JD"].dtype == "<M8[ns]"
59+
r["JD"].shape == (1, 20880, 28440)
60+
61+
62+
if __name__ == "__main__":
63+
from earthkit.data.testing import main
64+
65+
# test_datetime()
66+
main(__file__)

0 commit comments

Comments
 (0)