Skip to content

Commit 81fb43a

Browse files
authored
Merge pull request #10985 from rouault/gdal_fsspec
Python bindings: add a osgeo.gdal_fsspec module that on import will register GDAL VSI file system handlers as fsspec AbstractFileSystem
2 parents 8fb79c4 + 0b07ea7 commit 81fb43a

File tree

14 files changed

+624
-84
lines changed

14 files changed

+624
-84
lines changed

.github/workflows/cmake_builds.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ jobs:
433433
cfitsio freexl geotiff libjpeg-turbo libpq libspatialite libwebp-base pcre pcre2 postgresql \
434434
sqlite tiledb zstd cryptopp cgal doxygen librttopo libkml openssl xz \
435435
openjdk ant qhull armadillo blas blas-devel libblas libcblas liblapack liblapacke blosc libarchive \
436-
arrow-cpp pyarrow libaec libheif libavif cmake
436+
arrow-cpp pyarrow libaec libheif libavif cmake fsspec
437437
- name: Check CMake version
438438
shell: bash -l {0}
439439
run: |

.github/workflows/ubuntu_24.04/Dockerfile.ci

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,5 @@ RUN python3 -m pip install -U --break-system-packages -r /tmp/requirements.txt
158158
# cfchecker requires udunits2
159159
RUN apt-get install -y --allow-unauthenticated libudunits2-0 libudunits2-data
160160
RUN python3 -m pip install --break-system-packages cfchecker
161+
162+
RUN python3 -m pip install --break-system-packages fsspec

.pre-commit-config.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ repos:
55
- id: black
66
exclude: >
77
(?x)^(
8-
swig/python/osgeo/|
8+
swig/python/osgeo/__init__.py|
9+
swig/python/osgeo/gdalnumeric.py|
910
autotest/ogr/data/
1011
)
1112
- repo: https://github.com/timothycrosley/isort
@@ -14,7 +15,8 @@ repos:
1415
- id: isort
1516
exclude: >
1617
(?x)^(
17-
swig/python/osgeo/|
18+
swig/python/osgeo/__init__.py|
19+
swig/python/osgeo/gdalnumeric.py|
1820
autotest/ogr/data/
1921
)
2022
- repo: https://github.com/pycqa/flake8
@@ -23,7 +25,8 @@ repos:
2325
- id: flake8
2426
exclude: >
2527
(?x)^(
26-
swig/python/osgeo/|
28+
swig/python/osgeo/__init__.py|
29+
swig/python/osgeo/gdalnumeric.py|
2730
examples/|
2831
autotest/ogr/data/
2932
)

autotest/gcore/test_gdal_fsspec.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
#!/usr/bin/env pytest
2+
# -*- coding: utf-8 -*-
3+
###############################################################################
4+
# Project: GDAL/OGR Test Suite
5+
# Purpose: Test gdal_fsspec module
6+
# Author: Even Rouault <even dot rouault at spatialys.com>
7+
#
8+
###############################################################################
9+
# Copyright (c) 20124, Even Rouault <even dot rouault at spatialys.com>
10+
#
11+
# SPDX-License-Identifier: MIT
12+
###############################################################################
13+
14+
import pytest
15+
16+
from osgeo import gdal
17+
18+
fsspec = pytest.importorskip("fsspec")
19+
pytest.importorskip("fsspec.spec")
20+
21+
from osgeo import gdal_fsspec # NOQA
22+
23+
24+
def test_gdal_fsspec_open_read():
25+
26+
with fsspec.open("gdalvsi://data/byte.tif") as f:
27+
assert len(f.read()) == gdal.VSIStatL("data/byte.tif").size
28+
29+
30+
def test_gdal_fsspec_info_file():
31+
32+
fs = fsspec.filesystem("gdalvsi")
33+
info = fs.info("data/byte.tif")
34+
assert "mtime" in info
35+
del info["mtime"]
36+
assert (info["mode"] & 32768) != 0
37+
del info["mode"]
38+
assert info == {
39+
"name": "data/byte.tif",
40+
"size": 736,
41+
"type": "file",
42+
}
43+
44+
45+
def test_gdal_fsspec_info_dir():
46+
47+
fs = fsspec.filesystem("gdalvsi")
48+
info = fs.info("data")
49+
assert (info["mode"] & 16384) != 0
50+
del info["mode"]
51+
assert info == {
52+
"name": "data",
53+
"size": 0,
54+
"type": "directory",
55+
}
56+
57+
58+
def test_gdal_fsspec_info_error():
59+
60+
fs = fsspec.filesystem("gdalvsi")
61+
with pytest.raises(FileNotFoundError):
62+
fs.info("/i/do/not/exist")
63+
64+
65+
def test_gdal_fsspec_ls():
66+
67+
fs = fsspec.filesystem("gdalvsi")
68+
ret = fs.ls("data")
69+
assert len(ret) > 2
70+
item_of_interest = None
71+
for item in ret:
72+
if item["name"] == "data/byte.tif":
73+
item_of_interest = item
74+
break
75+
assert item_of_interest
76+
assert "mtime" in item_of_interest
77+
del item_of_interest["mtime"]
78+
assert item_of_interest == {
79+
"name": "data/byte.tif",
80+
"size": 736,
81+
"type": "file",
82+
}
83+
84+
85+
def test_gdal_fsspec_ls_file():
86+
87+
fs = fsspec.filesystem("gdalvsi")
88+
ret = fs.ls("data/byte.tif")
89+
assert ret == ["data/byte.tif"]
90+
91+
92+
def test_gdal_fsspec_ls_error():
93+
94+
fs = fsspec.filesystem("gdalvsi")
95+
with pytest.raises(FileNotFoundError):
96+
fs.ls("gdalvsi://i/do/not/exist")
97+
98+
99+
def test_gdal_fsspec_modified():
100+
101+
fs = fsspec.filesystem("gdalvsi")
102+
modified = fs.modified("data/byte.tif")
103+
assert modified is not None
104+
import datetime
105+
106+
assert isinstance(modified, datetime.datetime)
107+
108+
109+
def test_gdal_fsspec_modified_error():
110+
111+
fs = fsspec.filesystem("gdalvsi")
112+
with pytest.raises(FileNotFoundError):
113+
fs.modified("gdalvsi://i/do/not/exist")
114+
115+
116+
def test_gdal_fsspec_rm():
117+
118+
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
119+
f.write(b"""bar""")
120+
fs = fsspec.filesystem("gdalvsi")
121+
fs.info("/vsimem/foo.bin")
122+
fs.rm("/vsimem/foo.bin")
123+
with pytest.raises(FileNotFoundError):
124+
fs.info("/vsimem/foo.bin")
125+
126+
127+
def test_gdal_fsspec_rm_error():
128+
129+
fs = fsspec.filesystem("gdalvsi")
130+
with pytest.raises(FileNotFoundError):
131+
fs.rm("/vsimem/foo.bin")
132+
133+
134+
def test_gdal_fsspec_copy():
135+
136+
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
137+
f.write(b"""bar""")
138+
fs = fsspec.filesystem("gdalvsi")
139+
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")
140+
assert fs.info("/vsimem/bar.bin")["size"] == 3
141+
assert fs.info("/vsimem/foo.bin")["size"] == 3
142+
fs.rm("/vsimem/foo.bin")
143+
fs.rm("/vsimem/bar.bin")
144+
145+
146+
def test_gdal_fsspec_copy_error():
147+
148+
fs = fsspec.filesystem("gdalvsi")
149+
with pytest.raises(FileNotFoundError):
150+
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")
151+
152+
153+
def test_gdal_fsspec_mv():
154+
155+
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
156+
f.write(b"""bar""")
157+
fs = fsspec.filesystem("gdalvsi")
158+
fs.mv("/vsimem/foo.bin", "/vsimem/bar.bin")
159+
assert fs.info("/vsimem/bar.bin")["size"] == 3
160+
with pytest.raises(FileNotFoundError):
161+
fs.info("/vsimem/foo.bin")
162+
fs.rm("/vsimem/bar.bin")
163+
164+
165+
def test_gdal_fsspec_mv_error():
166+
167+
fs = fsspec.filesystem("gdalvsi")
168+
with pytest.raises(FileNotFoundError):
169+
fs.mv("/vsimem/foo.bin", "/bar.bin")
170+
171+
172+
def test_gdal_fsspec_mkdir(tmp_path):
173+
174+
fs = fsspec.filesystem("gdalvsi")
175+
176+
my_path = str(tmp_path) + "/my_dir"
177+
178+
fs.mkdir(my_path)
179+
assert fs.info(my_path)["type"] == "directory"
180+
with pytest.raises(FileExistsError):
181+
fs.mkdir(my_path)
182+
fs.rmdir(my_path)
183+
184+
fs.mkdir(my_path + "/my_subdir")
185+
assert fs.info(my_path)["type"] == "directory"
186+
assert fs.info(my_path + "/my_subdir")["type"] == "directory"
187+
fs.rmdir(my_path + "/my_subdir")
188+
fs.rmdir(my_path)
189+
with pytest.raises(FileNotFoundError):
190+
fs.info(my_path)
191+
192+
fs = fsspec.filesystem("gdalvsi")
193+
with pytest.raises(Exception):
194+
fs.mkdir(my_path + "/my_subdir", create_parents=False)
195+
with pytest.raises(FileNotFoundError):
196+
fs.info(my_path)
197+
198+
199+
def test_gdal_fsspec_makedirs(tmp_path):
200+
201+
fs = fsspec.filesystem("gdalvsi")
202+
203+
my_path = str(tmp_path) + "/my_dir"
204+
fs.makedirs(my_path)
205+
assert fs.info(my_path)["type"] == "directory"
206+
with pytest.raises(FileExistsError):
207+
fs.makedirs(my_path)
208+
fs.makedirs(my_path, exist_ok=True)
209+
fs.rmdir(my_path)
210+
211+
212+
def test_gdal_fsspec_usable_by_pyarrow_dataset(tmp_vsimem):
213+
214+
ds = pytest.importorskip("pyarrow.dataset")
215+
216+
tmp_vsimem_file = str(tmp_vsimem / "tmp.parquet")
217+
gdal.FileFromMemBuffer(
218+
tmp_vsimem_file, open("../ogr/data/parquet/test.parquet", "rb").read()
219+
)
220+
221+
fs_vsimem = fsspec.filesystem("gdalvsi")
222+
223+
assert ds.dataset(tmp_vsimem_file, filesystem=fs_vsimem) is not None
224+
225+
assert ds.dataset(str(tmp_vsimem), filesystem=fs_vsimem) is not None

autotest/pymod/gdaltest.py

Lines changed: 1 addition & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,87 +2102,8 @@ def reopen(ds, update=False, open_options=None):
21022102
)
21032103

21042104

2105-
# VSIFile helper class
2106-
2107-
2108-
class VSIFile:
2109-
def __init__(self, path, mode, encoding="utf-8"):
2110-
self._path = path
2111-
self._mode = mode
2112-
2113-
self._binary = "b" in mode
2114-
self._encoding = encoding
2115-
2116-
self._fp = gdal.VSIFOpenExL(self._path, self._mode, True)
2117-
if self._fp is None:
2118-
raise OSError(gdal.VSIGetLastErrorMsg())
2119-
2120-
self._closed = False
2121-
2122-
def __enter__(self):
2123-
return self
2124-
2125-
def __exit__(self, *args):
2126-
self.close()
2127-
2128-
def __iter__(self):
2129-
return self
2130-
2131-
def __next__(self):
2132-
line = gdal.CPLReadLineL(self._fp)
2133-
if line is None:
2134-
raise StopIteration
2135-
if self._binary:
2136-
return line.encode()
2137-
return line
2138-
2139-
def close(self):
2140-
if self._closed:
2141-
return
2142-
2143-
self._closed = True
2144-
gdal.VSIFCloseL(self._fp)
2145-
2146-
def read(self, size=-1):
2147-
if size == -1:
2148-
pos = self.tell()
2149-
self.seek(0, 2)
2150-
size = self.tell()
2151-
self.seek(pos)
2152-
2153-
raw = gdal.VSIFReadL(1, size, self._fp)
2154-
2155-
if self._binary:
2156-
return bytes(raw)
2157-
else:
2158-
return raw.decode(self._encoding)
2159-
2160-
def write(self, x):
2161-
2162-
if self._binary:
2163-
assert type(x) in (bytes, bytearray, memoryview)
2164-
else:
2165-
assert type(x) is str
2166-
x = x.encode(self._encoding)
2167-
2168-
planned_write = len(x)
2169-
actual_write = gdal.VSIFWriteL(x, 1, planned_write, self._fp)
2170-
2171-
if planned_write != actual_write:
2172-
raise OSError(
2173-
f"Expected to write {planned_write} bytes but {actual_write} were written"
2174-
)
2175-
2176-
def seek(self, offset, whence=0):
2177-
if gdal.VSIFSeekL(self._fp, offset, whence) != 0:
2178-
raise OSError(gdal.VSIGetLastErrorMsg())
2179-
2180-
def tell(self):
2181-
return gdal.VSIFTellL(self._fp)
2182-
2183-
21842105
def vsi_open(path, mode="r"):
2185-
return VSIFile(path, mode)
2106+
return gdal.VSIFile(path, mode)
21862107

21872108

21882109
def vrt_has_open_support():

doc/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# This file may be used to create an environment using:
22
# $ pip install --upgrade -r <this file>
3+
fsspec
34
numpy
45
sphinx
56
breathe

doc/source/api/python/general.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,26 @@ Error Handling
9595
File Management
9696
---------------
9797

98+
osgeo.gdal_fsspec module
99+
++++++++++++++++++++++++
100+
101+
.. automodule:: osgeo.gdal_fsspec
102+
:members:
103+
:undoc-members:
104+
:show-inheritance:
105+
:noindex:
106+
107+
osgeo.gdal.VSIFile class
108+
++++++++++++++++++++++++
109+
110+
.. autoclass:: osgeo.gdal.VSIFile
111+
:members:
112+
:undoc-members:
113+
:noindex:
114+
115+
Low level functions
116+
+++++++++++++++++++
117+
98118
.. autofunction:: osgeo.gdal.CloseDir
99119

100120
.. autofunction:: osgeo.gdal.CopyFile

0 commit comments

Comments
 (0)