Skip to content

Commit a60aa37

Browse files
authored
Merge pull request duckdb#464 from Maxxen/main
Only check external access flag on raw GDAL vis handlers, disable filter pushdown in st_read
2 parents cd02956 + 4cda975 commit a60aa37

File tree

6 files changed

+39
-73
lines changed

6 files changed

+39
-73
lines changed

Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,9 @@ EXT_CONFIG=${PROJ_DIR}extension_config.cmake
66

77
# Include the Makefile from extension-ci-tools
88
include extension-ci-tools/makefiles/duckdb_extension.Makefile
9+
10+
#### Override the included format target because we have different source tree layout
11+
format:
12+
find spatial/src -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
13+
find spatial/include -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
14+
cmake-format -i CMakeLists.txt

spatial/include/spatial/gdal/file_handler.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ namespace gdal {
1010
class DuckDBFileSystemHandler;
1111

1212
class GDALClientContextState : public ClientContextState {
13+
ClientContext &context;
1314
string client_prefix;
1415
DuckDBFileSystemHandler *fs_handler;
1516

spatial/src/spatial/core/io/osm/st_read_osm.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,6 @@ static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindIn
8484
names.push_back("ref_types");
8585

8686
// Create bind data
87-
auto &config = DBConfig::GetConfig(context);
88-
if (!config.options.enable_external_access) {
89-
throw PermissionException("Scanning OSM files is disabled through configuration");
90-
}
91-
9287
auto file_name = StringValue::Get(input.inputs[0]);
9388
auto result = make_uniq<BindData>(file_name);
9489
return std::move(result);

spatial/src/spatial/gdal/file_handler.cpp

+21-7
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ class DuckDBFileHandle : public VSIVirtualHandle {
2424
bool is_eof;
2525

2626
public:
27-
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)), is_eof(false) {
27+
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p)
28+
: file_handle(std::move(file_handle_p)), is_eof(false) {
2829
}
2930

3031
vsi_l_offset Tell() override {
@@ -216,16 +217,24 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
216217
return nullptr;
217218
}
218219

219-
// Fall back to GDAL instead
220-
auto handler = VSIFileManager::GetHandler(file_name);
221-
if (handler) {
222-
return handler->Open(file_name, access);
223-
} else {
220+
// Fall back to GDAL instead (if external access is enabled)
221+
if (!context.db->config.options.enable_external_access) {
222+
if (bSetError) {
223+
VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled",
224+
file_name);
225+
}
226+
return nullptr;
227+
}
228+
229+
const auto handler = VSIFileManager::GetHandler(file_name);
230+
if (!handler) {
224231
if (bSetError) {
225232
VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what());
226233
}
227234
return nullptr;
228235
}
236+
237+
return handler->Open(file_name, access);
229238
}
230239
}
231240

@@ -377,7 +386,7 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
377386
// use their own attached file systems. This is necessary because GDAL is
378387
// not otherwise aware of the connection context.
379388
//
380-
GDALClientContextState::GDALClientContextState(ClientContext &context) {
389+
GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) {
381390

382391
// Create a new random prefix for this client
383392
client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID()));
@@ -387,6 +396,8 @@ GDALClientContextState::GDALClientContextState(ClientContext &context) {
387396

388397
// Register the file handler
389398
VSIFileManager::InstallHandler(client_prefix, fs_handler);
399+
400+
// Also pass a reference to the client context
390401
}
391402

392403
GDALClientContextState::~GDALClientContextState() {
@@ -404,6 +415,9 @@ void GDALClientContextState::QueryEnd() {
404415
string GDALClientContextState::GetPrefix(const string &value) const {
405416
// If the user explicitly asked for a VSI prefix, we don't add our own
406417
if (StringUtil::StartsWith(value, "/vsi")) {
418+
if (!context.db->config.options.enable_external_access) {
419+
throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value);
420+
}
407421
return value;
408422
}
409423
return client_prefix + value;

spatial/src/spatial/gdal/functions/st_read.cpp

-61
Original file line numberDiff line numberDiff line change
@@ -61,54 +61,6 @@ static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter
6161
}
6262
}
6363

64-
// TODO: Verify that this actually corresponds to the same sql subset expected by OGR SQL
65-
static string FilterToGdal(const TableFilter &filter, const string &column_name) {
66-
67-
switch (filter.filter_type) {
68-
case TableFilterType::CONSTANT_COMPARISON: {
69-
auto &constant_filter = filter.Cast<ConstantFilter>();
70-
return KeywordHelper::WriteOptionallyQuoted(column_name) +
71-
ExpressionTypeToOperator(constant_filter.comparison_type) + constant_filter.constant.ToSQLString();
72-
}
73-
case TableFilterType::CONJUNCTION_AND: {
74-
auto &and_filter = filter.Cast<ConjunctionAndFilter>();
75-
vector<string> filters;
76-
for (const auto &child_filter : and_filter.child_filters) {
77-
filters.push_back(FilterToGdal(*child_filter, column_name));
78-
}
79-
return StringUtil::Join(filters, " AND ");
80-
}
81-
case TableFilterType::CONJUNCTION_OR: {
82-
auto &or_filter = filter.Cast<ConjunctionOrFilter>();
83-
vector<string> filters;
84-
for (const auto &child_filter : or_filter.child_filters) {
85-
filters.push_back(FilterToGdal(*child_filter, column_name));
86-
}
87-
return StringUtil::Join(filters, " OR ");
88-
}
89-
case TableFilterType::IS_NOT_NULL: {
90-
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NOT NULL";
91-
}
92-
case TableFilterType::IS_NULL: {
93-
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NULL";
94-
}
95-
default:
96-
throw NotImplementedException("FilterToGdal: filter type not implemented");
97-
}
98-
}
99-
100-
static string FilterToGdal(const TableFilterSet &set, const vector<idx_t> &column_ids,
101-
const vector<string> &column_names) {
102-
103-
vector<string> filters;
104-
for (auto &input_filter : set.filters) {
105-
auto col_idx = column_ids[input_filter.first];
106-
auto &col_name = column_names[col_idx];
107-
filters.push_back(FilterToGdal(*input_filter.second, col_name));
108-
}
109-
return StringUtil::Join(filters, " AND ");
110-
}
111-
11264
struct GdalScanFunctionData : public TableFunctionData {
11365
int layer_idx;
11466
bool sequential_layer_scan = false;
@@ -153,11 +105,6 @@ struct GdalScanGlobalState : ArrowScanGlobalState {
153105
unique_ptr<FunctionData> GdalTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input,
154106
vector<LogicalType> &return_types, vector<string> &names) {
155107

156-
auto &config = DBConfig::GetConfig(context);
157-
if (!config.options.enable_external_access) {
158-
throw PermissionException("Scanning GDAL files is disabled through configuration");
159-
}
160-
161108
auto result = make_uniq<GdalScanFunctionData>();
162109

163110
// First scan for "options" parameter
@@ -470,13 +417,6 @@ unique_ptr<GlobalTableFunctionState> GdalTableFunction::InitGlobal(ClientContext
470417
TryApplySpatialFilter(layer, data.spatial_filter.get());
471418
// TODO: Apply projection pushdown
472419

473-
// Apply predicate pushdown
474-
// We simply create a string out of the predicates and pass it to GDAL.
475-
if (input.filters) {
476-
auto filter_clause = FilterToGdal(*input.filters, input.column_ids, data.all_names);
477-
layer->SetAttributeFilter(filter_clause.c_str());
478-
}
479-
480420
// Create arrow stream from layer
481421

482422
gstate.stream = make_uniq<ArrowArrayStreamWrapper>();
@@ -678,7 +618,6 @@ void GdalTableFunction::Register(DatabaseInstance &db) {
678618
scan.get_partition_data = ArrowTableFunction::ArrowGetPartitionData;
679619

680620
scan.projection_pushdown = true;
681-
scan.filter_pushdown = true;
682621

683622
scan.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR);
684623
scan.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR);

test/sql/gdal/gdal_vsi.test

+11
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,14 @@ query I
55
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
66
----
77
50
8+
9+
10+
# Disable external access
11+
statement ok
12+
set enable_external_access = false;
13+
14+
# Test read via VSI
15+
statement error
16+
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
17+
----
18+
with VSI prefix: External access is disabled

0 commit comments

Comments
 (0)