Skip to content

Commit 8bb9a88

Browse files
authored
feat: Introduce snapshot summary properties (#1336)
## Which issue does this PR close? - Closes #1329 TLDR: - I want to add customized metadata (with size controlled within 200B) for each snapshot, summary properties is the perfect place for this feature request ## What changes are included in this PR? I add a new table update action for snapshot summary properties. ## Are these changes tested? Yes, unit tests are added.
1 parent 322e674 commit 8bb9a88

File tree

4 files changed

+120
-0
lines changed

4 files changed

+120
-0
lines changed

crates/iceberg/src/catalog/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,12 @@ pub enum TableUpdate {
495495
/// Schema IDs to remove.
496496
schema_ids: Vec<i32>,
497497
},
498+
/// Add snapshot summary properties.
499+
#[serde(rename_all = "kebab-case")]
500+
AddSnapshotSummaryProperties {
501+
/// Additional properties to add.
502+
properties: HashMap<String, String>,
503+
},
498504
}
499505

500506
impl TableUpdate {
@@ -539,6 +545,9 @@ impl TableUpdate {
539545
Ok(builder.remove_partition_statistics(snapshot_id))
540546
}
541547
TableUpdate::RemoveSchemas { schema_ids } => builder.remove_schemas(&schema_ids),
548+
TableUpdate::AddSnapshotSummaryProperties { properties } => {
549+
builder.add_snapshot_summary_properties(properties)
550+
}
542551
}
543552
}
544553
}
@@ -2098,4 +2107,24 @@ mod tests {
20982107
},
20992108
);
21002109
}
2110+
2111+
#[test]
2112+
fn test_add_snapshot_summary_properties() {
2113+
let mut expected_properties = HashMap::new();
2114+
expected_properties.insert("prop-key".to_string(), "prop-value".to_string());
2115+
2116+
test_serde_json(
2117+
r#"
2118+
{
2119+
"action": "add-snapshot-summary-properties",
2120+
"properties": {
2121+
"prop-key": "prop-value"
2122+
}
2123+
}
2124+
"#,
2125+
TableUpdate::AddSnapshotSummaryProperties {
2126+
properties: expected_properties,
2127+
},
2128+
);
2129+
}
21012130
}

crates/iceberg/src/spec/snapshot.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ impl Snapshot {
214214
snapshot_id: self.snapshot_id,
215215
}
216216
}
217+
218+
/// Add the given properties map to snapshot summary.
219+
pub(crate) fn add_summary_properties(&mut self, props: HashMap<String, String>) {
220+
self.summary.additional_properties.extend(props);
221+
}
217222
}
218223

219224
pub(super) mod _serde {

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,33 @@ impl TableMetadataBuilder {
12441244

12451245
Ok(self)
12461246
}
1247+
1248+
/// Add summary properties to the latest snapshot for the table metadata.
1249+
pub fn add_snapshot_summary_properties(
1250+
mut self,
1251+
properties: HashMap<String, String>,
1252+
) -> Result<Self> {
1253+
if properties.is_empty() {
1254+
return Ok(self);
1255+
}
1256+
1257+
let snapshot_id = self.metadata.current_snapshot_id.unwrap();
1258+
let mut cur_snapshot = self
1259+
.metadata
1260+
.snapshots
1261+
.remove(&snapshot_id)
1262+
.unwrap()
1263+
.as_ref()
1264+
.clone();
1265+
cur_snapshot.add_summary_properties(properties.clone());
1266+
self.metadata
1267+
.snapshots
1268+
.insert(snapshot_id, Arc::new(cur_snapshot));
1269+
self.changes
1270+
.push(TableUpdate::AddSnapshotSummaryProperties { properties });
1271+
1272+
Ok(self)
1273+
}
12471274
}
12481275

12491276
impl From<TableMetadataBuildResult> for TableMetadata {
@@ -2496,4 +2523,51 @@ mod tests {
24962523
};
24972524
assert_eq!(remove_schema_ids, &[0]);
24982525
}
2526+
2527+
#[test]
2528+
fn test_add_snapshot_summary_properties() {
2529+
let file = File::open(format!(
2530+
"{}/testdata/table_metadata/{}",
2531+
env!("CARGO_MANIFEST_DIR"),
2532+
"TableMetadataV2Valid.json"
2533+
))
2534+
.unwrap();
2535+
let reader = BufReader::new(file);
2536+
let resp = serde_json::from_reader::<_, TableMetadata>(reader).unwrap();
2537+
2538+
let table = Table::builder()
2539+
.metadata(resp)
2540+
.metadata_location("s3://bucket/test/location/metadata/v1.json".to_string())
2541+
.identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap())
2542+
.file_io(FileIOBuilder::new("memory").build().unwrap())
2543+
.build()
2544+
.unwrap();
2545+
assert!(
2546+
table
2547+
.metadata()
2548+
.current_snapshot()
2549+
.unwrap()
2550+
.summary()
2551+
.additional_properties
2552+
.is_empty()
2553+
);
2554+
2555+
let mut new_properties = HashMap::new();
2556+
new_properties.insert("prop-key".to_string(), "prop-value".to_string());
2557+
2558+
let mut meta_data_builder = table.metadata().clone().into_builder(None);
2559+
meta_data_builder = meta_data_builder
2560+
.add_snapshot_summary_properties(new_properties.clone())
2561+
.unwrap();
2562+
let build_result = meta_data_builder.build().unwrap();
2563+
assert_eq!(
2564+
build_result
2565+
.metadata
2566+
.current_snapshot()
2567+
.unwrap()
2568+
.summary()
2569+
.additional_properties,
2570+
new_properties
2571+
);
2572+
}
24992573
}

crates/iceberg/src/transaction/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,18 @@ impl<'a> Transaction<'a> {
128128
Ok(self)
129129
}
130130

131+
/// Add snapshot summary properties.
132+
pub fn add_snapshot_summary_properties(
133+
mut self,
134+
props: HashMap<String, String>,
135+
) -> Result<Self> {
136+
self.apply(
137+
vec![TableUpdate::AddSnapshotSummaryProperties { properties: props }],
138+
vec![],
139+
)?;
140+
Ok(self)
141+
}
142+
131143
fn generate_unique_snapshot_id(&self) -> i64 {
132144
let generate_random_id = || -> i64 {
133145
let (lhs, rhs) = Uuid::new_v4().as_u64_pair();

0 commit comments

Comments
 (0)