From 02cf60098c2ec4712b7b50535f40043735360002 Mon Sep 17 00:00:00 2001 From: Pramod Satya Date: Mon, 19 May 2025 20:11:11 -0700 Subject: [PATCH] Add hive configs for supported read and write formats --- .../src/main/sphinx/connector/hive.rst | 6 ++++ .../hive/s3select/S3SelectTestHelper.java | 3 +- .../presto/hive/HiveClientConfig.java | 30 ++++++++++++++++++- .../facebook/presto/hive/HiveMetadata.java | 8 ++++- .../presto/hive/HiveSessionProperties.java | 13 ++++++++ .../presto/hive/HiveSplitManager.java | 22 ++++++++++++-- .../presto/hive/AbstractTestHiveClient.java | 3 +- .../hive/AbstractTestHiveFileSystem.java | 3 +- .../presto/hive/TestHiveClientConfig.java | 10 +++++-- .../presto/hive/TestHiveSplitManager.java | 6 ++-- 10 files changed, 93 insertions(+), 11 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/hive.rst b/presto-docs/src/main/sphinx/connector/hive.rst index 113c5520dd12c..441b11e4f9f21 100644 --- a/presto-docs/src/main/sphinx/connector/hive.rst +++ b/presto-docs/src/main/sphinx/connector/hive.rst @@ -208,6 +208,12 @@ Property Name Description ``hive.file-status-cache.max-retained-size`` Maximum size in bytes of the directory listing cache ``0KB`` ``hive.metastore.catalog.name`` Specifies the catalog name to be passed to the metastore. + + ``hive.read-formats`` Comma separated list of file formats supported for reads + from tables. + + ``hive.write-formats`` Comma separated list of file formats supported for writes + to tables. ================================================== ============================================================ ============ Metastore Configuration Properties diff --git a/presto-hive-hadoop2/src/test/java/com/facebook/presto/hive/s3select/S3SelectTestHelper.java b/presto-hive-hadoop2/src/test/java/com/facebook/presto/hive/s3select/S3SelectTestHelper.java index e28d632f64354..f8f7fd42e26e9 100644 --- a/presto-hive-hadoop2/src/test/java/com/facebook/presto/hive/s3select/S3SelectTestHelper.java +++ b/presto-hive-hadoop2/src/test/java/com/facebook/presto/hive/s3select/S3SelectTestHelper.java @@ -199,7 +199,8 @@ public S3SelectTestHelper(String host, config.getRecursiveDirWalkerEnabled(), new ConfigBasedCacheQuotaRequirementProvider(cacheConfig), new HiveEncryptionInformationProvider(ImmutableSet.of()), - new HivePartitionSkippabilityChecker()); + new HivePartitionSkippabilityChecker(), + ImmutableList.of()); pageSourceProvider = new HivePageSourceProvider( config, hdfsEnvironment, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java index 6c569bb7ea255..ee4289b505ad4 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java @@ -222,6 +222,8 @@ public class HiveClientConfig private int parquetQuickStatsMaxConcurrentCalls = 500; private int quickStatsMaxConcurrentCalls = 100; private boolean legacyTimestampBucketing; + private List readFormats = ImmutableList.of(); + private String writeFormats; @Min(0) public int getMaxInitialSplits() @@ -464,7 +466,7 @@ public List getResourceConfigFiles() @Config("hive.config.resources") public HiveClientConfig setResourceConfigFiles(String files) { - this.resourceConfigFiles = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(files); + this.resourceConfigFiles = SPLITTER.splitToList(files); return this; } @@ -1831,4 +1833,30 @@ public HiveClientConfig setLegacyTimestampBucketing(boolean legacyTimestampBucke this.legacyTimestampBucketing = legacyTimestampBucketing; return this; } + + @Config("hive.read-formats") + @ConfigDescription("File formats supported for read operation.") + public HiveClientConfig setReadFormats(String formats) + { + this.readFormats = SPLITTER.splitToList(formats); + return this; + } + + public List getReadFormats() + { + return readFormats; + } + + @Config("hive.write-formats") + @ConfigDescription("File formats supported for write operation.") + public HiveClientConfig setWriteFormats(String formats) + { + this.writeFormats = formats; + return this; + } + + public String getWriteFormats() + { + return writeFormats; + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java index f20393703e9c3..4795c247bef77 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java @@ -215,6 +215,7 @@ import static com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableSchema; import static com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableStorageFormat; import static com.facebook.presto.hive.HiveSessionProperties.getVirtualBucketCount; +import static com.facebook.presto.hive.HiveSessionProperties.getWriteFormats; import static com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled; import static com.facebook.presto.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite; import static com.facebook.presto.hive.HiveSessionProperties.isCreateEmptyBucketFiles; @@ -1927,6 +1928,12 @@ private HiveInsertTableHandle beginInsertInternal(ConnectorSession session, Conn SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName(); Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); + HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table); + List writeFormats = getWriteFormats(session); + if (!writeFormats.isEmpty() && !writeFormats.contains(tableStorageFormat)) { + throw new PrestoException(NOT_SUPPORTED, + format("File format %s not supported for write operation.", tableStorageFormat)); + } tableWritabilityChecker.checkTableWritable(table); @@ -1947,7 +1954,6 @@ private HiveInsertTableHandle beginInsertInternal(ConnectorSession session, Conn .filter(columnHandle -> !columnHandle.isHidden()) .collect(toList()); - HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table); LocationHandle locationHandle; boolean isTemporaryTable = table.getTableType().equals(TEMPORARY_TABLE); boolean tempPathRequired = isTempPathRequired( diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java index 12249c8dd5186..f1efbffaae2cf 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java @@ -17,6 +17,7 @@ import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.session.PropertyMetadata; +import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; import io.airlift.units.Duration; @@ -134,6 +135,7 @@ public final class HiveSessionProperties public static final String DYNAMIC_SPLIT_SIZES_ENABLED = "dynamic_split_sizes_enabled"; public static final String SKIP_EMPTY_FILES = "skip_empty_files"; public static final String LEGACY_TIMESTAMP_BUCKETING = "legacy_timestamp_bucketing"; + public static final String WRITE_FORMATS = "write_formats"; public static final String NATIVE_STATS_BASED_FILTER_REORDER_DISABLED = "native_stats_based_filter_reorder_disabled"; @@ -660,6 +662,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon NATIVE_STATS_BASED_FILTER_REORDER_DISABLED, "Native Execution only. Disable stats based filter reordering.", false, + true), + stringProperty( + WRITE_FORMATS, + "File formats supported for write operation.", + hiveClientConfig.getWriteFormats(), true)); } @@ -1148,4 +1155,10 @@ public static boolean isLegacyTimestampBucketing(ConnectorSession session) { return session.getProperty(LEGACY_TIMESTAMP_BUCKETING, Boolean.class); } + + public static List getWriteFormats(ConnectorSession session) + { + String formats = session.getProperty(WRITE_FORMATS, String.class); + return (formats == null) ? ImmutableList.of() : Splitter.on(',').trimResults().omitEmptyStrings().splitToList(formats); + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java index 62e620bee3e2f..3ade3a024b645 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java @@ -151,6 +151,7 @@ public class HiveSplitManager private final CacheQuotaRequirementProvider cacheQuotaRequirementProvider; private final HiveEncryptionInformationProvider encryptionInformationProvider; private final PartitionSkippabilityChecker partitionSkippabilityChecker; + private final List readFormats; @Inject public HiveSplitManager( @@ -181,7 +182,8 @@ public HiveSplitManager( hiveClientConfig.getRecursiveDirWalkerEnabled(), cacheQuotaRequirementProvider, encryptionInformationProvider, - partitionSkippabilityChecker); + partitionSkippabilityChecker, + hiveClientConfig.getReadFormats()); } public HiveSplitManager( @@ -200,7 +202,8 @@ public HiveSplitManager( boolean recursiveDfsWalkerEnabled, CacheQuotaRequirementProvider cacheQuotaRequirementProvider, HiveEncryptionInformationProvider encryptionInformationProvider, - PartitionSkippabilityChecker partitionSkippabilityChecker) + PartitionSkippabilityChecker partitionSkippabilityChecker, + List readFormats) { this.hiveTransactionManager = requireNonNull(hiveTransactionManager, "hiveTransactionManager is null"); this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null"); @@ -219,6 +222,7 @@ public HiveSplitManager( this.cacheQuotaRequirementProvider = requireNonNull(cacheQuotaRequirementProvider, "cacheQuotaRequirementProvider is null"); this.encryptionInformationProvider = requireNonNull(encryptionInformationProvider, "encryptionInformationProvider is null"); this.partitionSkippabilityChecker = requireNonNull(partitionSkippabilityChecker, "partitionSkippabilityChecker is null"); + this.readFormats = requireNonNull(readFormats, "readFormats is null"); } @Override @@ -250,6 +254,20 @@ public ConnectorSplitSource getSplits( session.getRuntimeStats()); Table table = layout.getTable(metastore, metastoreContext); + if (!readFormats.isEmpty()) { + StorageFormat storageFormat = table.getStorage().getStorageFormat(); + Optional hiveStorageFormat = getHiveStorageFormat(storageFormat); + if (hiveStorageFormat.isPresent()) { + if (!readFormats.contains(hiveStorageFormat.get())) { + throw new HiveNotReadableException(tableName, Optional.empty(), + format("File format %s not supported for read operation.", hiveStorageFormat.get())); + } + } + else { + throw new HiveNotReadableException(tableName, Optional.empty(), "Storage format is null."); + } + } + if (!isOfflineDataDebugModeEnabled(session)) { // verify table is not marked as non-readable String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index 9035d5b5de72d..9b91f01cb7cf9 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -1067,7 +1067,8 @@ protected final void setup(String databaseName, HiveClientConfig hiveClientConfi false, new ConfigBasedCacheQuotaRequirementProvider(cacheConfig), encryptionInformationProvider, - new HivePartitionSkippabilityChecker()); + new HivePartitionSkippabilityChecker(), + ImmutableList.of()); pageSinkProvider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(hiveClientConfig, metastoreClientConfig), hdfsEnvironment, diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java index e27b042b04a66..5c96d303bbafc 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java @@ -247,7 +247,8 @@ protected void setup(String host, int port, String databaseName, BiFunction