Skip to content

Commit 24618cc

Browse files
authored
Improve the handling of conflicting properties while merging schemas (#549)
1 parent 739717c commit 24618cc

File tree

8 files changed

+11752
-1758
lines changed

8 files changed

+11752
-1758
lines changed

typify-impl/src/merge.rs

+36-40
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023 Oxide Computer Company
1+
// Copyright 2024 Oxide Computer Company
22

33
use std::{
44
collections::{BTreeMap, BTreeSet},
@@ -25,17 +25,19 @@ fn try_merge_all(schemas: &[Schema], defs: &BTreeMap<RefKey, Schema>) -> Result<
2525
serde_json::to_string_pretty(schemas).unwrap(),
2626
);
2727

28-
match schemas {
28+
let merged_schema = match schemas {
2929
[] => panic!("we should not be trying to merge an empty array of schemas"),
30-
[only] => Ok(only.clone()),
30+
[only] => only.clone(),
3131
[first, second, rest @ ..] => {
3232
let mut out = try_merge_schema(first, second, defs)?;
3333
for schema in rest {
3434
out = try_merge_schema(&out, schema, defs)?;
3535
}
36-
Ok(out)
36+
out
3737
}
38-
}
38+
};
39+
40+
Ok(merged_schema)
3941
}
4042

4143
/// Given two additionalItems schemas that might be None--which is equivalent
@@ -71,12 +73,16 @@ fn merge_additional_properties(
7173
}
7274
}
7375

76+
fn merge_schema(a: &Schema, b: &Schema, defs: &BTreeMap<RefKey, Schema>) -> Schema {
77+
try_merge_schema(a, b, defs).unwrap_or(Schema::Bool(false))
78+
}
79+
7480
/// Merge two schemas returning the resulting schema. If the two schemas are
7581
/// incompatible (i.e. if there is no data that can satisfy them both
7682
/// simultaneously) then this returns Err.
7783
fn try_merge_schema(a: &Schema, b: &Schema, defs: &BTreeMap<RefKey, Schema>) -> Result<Schema, ()> {
7884
match (a, b) {
79-
(Schema::Bool(false), _) | (_, Schema::Bool(false)) => Ok(Schema::Bool(false)),
85+
(Schema::Bool(false), _) | (_, Schema::Bool(false)) => Err(()),
8086
(Schema::Bool(true), other) | (other, Schema::Bool(true)) => Ok(other.clone()),
8187

8288
// If we have two references to the same schema, that's easy!
@@ -489,9 +495,11 @@ fn try_merge_schema_not(
489495
if required.contains(not_required) {
490496
return Err(());
491497
}
492-
// We don't care if a property we're saying is not
493-
// required was not present in the properties map.
494-
let _ = properties.remove(not_required);
498+
// Set the property's schema to false i.e. that the
499+
// presence of any value would be invalid. We ignore
500+
// the return value as it doesn't matter if the
501+
// property was there previously or not.
502+
let _ = properties.insert(not_required.clone(), Schema::Bool(false));
495503
}
496504
}
497505
}
@@ -947,6 +955,11 @@ fn merge_so_object(
947955
match (a, b) {
948956
(None, other) | (other, None) => Ok(other.cloned().map(Box::new)),
949957
(Some(aa), Some(bb)) => {
958+
let required = aa
959+
.required
960+
.union(&bb.required)
961+
.cloned()
962+
.collect::<BTreeSet<_>>();
950963
let additional_properties = merge_additional_properties(
951964
aa.additional_properties.as_deref(),
952965
bb.additional_properties.as_deref(),
@@ -984,29 +997,29 @@ fn merge_so_object(
984997
let resolved_schema = match ab_schema {
985998
AOrB::A(a_schema) => filter_prop(name, a_schema, bb),
986999
AOrB::B(b_schema) => filter_prop(name, b_schema, aa),
987-
AOrB::Both(a_schema, b_schema) => {
988-
try_merge_schema(a_schema, b_schema, defs)
989-
}
1000+
AOrB::Both(a_schema, b_schema) => merge_schema(a_schema, b_schema, defs),
9901001
};
9911002
match resolved_schema {
9921003
// If a required field is incompatible with the
9931004
// other schema, this object is unsatisfiable.
994-
Err(()) if aa.required.contains(name) => Some(Err(())),
1005+
Schema::Bool(false) if required.contains(name) => Some(Err(())),
9951006

9961007
// For incompatible, non-required fields we need to
9971008
// exclude the property from any values. If
9981009
// `additionalProperties` is `false` (i.e. excludes all
9991010
// other properties) then we can simply omit the
1000-
// property. Otherwise we include the optional property
1001-
// but with the `false` schema that means that no value
1002-
// will satisfy that property--the value would always
1003-
// be None and any serialization that included the
1004-
// named property would fail to deserialize.
1011+
// property knowing that it (like all other unnamed
1012+
// properties) will not be permitted. Otherwise we
1013+
// include the optional property but with the `false`
1014+
// schema that means that no value will satisfy that
1015+
// property--the value would always be None and any
1016+
// serialization that included the named property would
1017+
// fail to deserialize.
10051018
//
10061019
// If we ever make use of `propertyNames`, it's
10071020
// conceivable that we might check it or modify it in
10081021
// this case, but that may be overly complex.
1009-
Err(()) => {
1022+
Schema::Bool(false) => {
10101023
if let Some(Schema::Bool(false)) = additional_properties {
10111024
None
10121025
} else {
@@ -1015,25 +1028,11 @@ fn merge_so_object(
10151028
}
10161029

10171030
// Compatible schema; proceed.
1018-
Ok(schema) => Some(Ok((name.clone(), schema))),
1031+
schema => Some(Ok((name.clone(), schema))),
10191032
}
10201033
})
10211034
.collect::<Result<schemars::Map<_, _>, _>>()?;
10221035

1023-
let additional_properties = additional_properties.map(Box::new);
1024-
let required = aa.required.union(&bb.required).cloned().collect();
1025-
1026-
// So. We merged the properties and we merged the array of
1027-
// properties that are required. We use the absence of a property
1028-
// in the properties map to indicate that the property must *not*
1029-
// be present. This is imprecise, but allows us to make progress
1030-
// without a most significant conversion to a new representation.
1031-
for req_prop in &required {
1032-
if !properties.contains_key(req_prop) {
1033-
return Err(());
1034-
}
1035-
}
1036-
10371036
let max_properties = choose_value(aa.max_properties, bb.max_properties, Ord::min);
10381037
let min_properties = choose_value(aa.min_properties, bb.min_properties, Ord::max);
10391038

@@ -1046,7 +1045,7 @@ fn merge_so_object(
10461045
let object_validation = ObjectValidation {
10471046
required,
10481047
properties,
1049-
additional_properties,
1048+
additional_properties: additional_properties.map(Box::new),
10501049
max_properties,
10511050
min_properties,
10521051
pattern_properties: Default::default(), // TODO
@@ -1057,11 +1056,7 @@ fn merge_so_object(
10571056
}
10581057
}
10591058

1060-
fn filter_prop(
1061-
name: &str,
1062-
prop_schema: &Schema,
1063-
object_schema: &ObjectValidation,
1064-
) -> Result<Schema, ()> {
1059+
fn filter_prop(name: &str, prop_schema: &Schema, object_schema: &ObjectValidation) -> Schema {
10651060
// We're only considering properties we *know* do not appear in the other
10661061
// object's schema.
10671062
assert!(!object_schema.properties.contains_key(name));
@@ -1077,6 +1072,7 @@ fn filter_prop(
10771072
assert!(object_schema.pattern_properties.is_empty());
10781073

10791074
merge_additional(object_schema.additional_properties.as_deref(), prop_schema)
1075+
.unwrap_or(Schema::Bool(false))
10801076
}
10811077

10821078
fn merge_additional(additional: Option<&Schema>, prop_schema: &Schema) -> Result<Schema, ()> {

typify-impl/src/structs.rs

+42-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 Oxide Computer Company
1+
// Copyright 2024 Oxide Computer Company
22

33
use heck::ToSnakeCase;
44
use proc_macro2::TokenStream;
@@ -27,16 +27,49 @@ impl TypeSpace {
2727
//assert!(validation.pattern_properties.is_empty());
2828
//assert!(validation.property_names.is_none());
2929

30+
// Gather up the properties that are required but for which we have no
31+
// schema. In those cases any value will do.
32+
let required_unspecified = validation.required.iter().filter_map(|prop_name| {
33+
validation
34+
.properties
35+
.get(prop_name)
36+
.is_none()
37+
.then_some((prop_name, &Schema::Bool(true)))
38+
});
39+
3040
let mut properties = validation
3141
.properties
3242
.iter()
33-
.map(|(name, ty)| {
34-
// Generate a name we can use for the type of this property
35-
// should there not be a one defined in the schema.
36-
let sub_type_name = type_name
37-
.as_ref()
38-
.map(|base| format!("{}_{}", base, name.to_snake_case()));
39-
self.struct_property(sub_type_name, &validation.required, name, ty)
43+
.chain(required_unspecified)
44+
.filter_map(|(prop_name, schema)| {
45+
match schema {
46+
// TODO We use the schema `false` to indicate an
47+
// unsatisfiable schema. We take a shortcut here and simply
48+
// ignore these. This is wrong in two subtle and important
49+
// ways that we'll need to address at some point. First,
50+
// there are other schemas in some non-trivial,
51+
// non-canonical form that might indicate the same thing.
52+
// We should handle those in the same way. In addition,
53+
// ignoring them isn't really right. We need to actively
54+
// exclude them. Specifically this would look like a custom
55+
// serde::Deserialize implementation that failed in the
56+
// presence of these values.
57+
Schema::Bool(false) => None,
58+
_ => {
59+
// Generate a name we can use for the type of this
60+
// property should there not be one specified by the
61+
// schema itself (i.e. via the title field).
62+
let sub_type_name = type_name
63+
.as_ref()
64+
.map(|base| format!("{}_{}", base, prop_name.to_snake_case()));
65+
Some(self.struct_property(
66+
sub_type_name,
67+
&validation.required,
68+
prop_name,
69+
schema,
70+
))
71+
}
72+
}
4073
})
4174
.collect::<Result<Vec<_>>>()?;
4275

@@ -107,7 +140,7 @@ impl TypeSpace {
107140
Ok((properties, deny_unknown_fields))
108141
}
109142

110-
pub(crate) fn struct_property(
143+
fn struct_property(
111144
&mut self,
112145
type_name: Option<String>,
113146
required: &schemars::Set<String>,

typify-impl/tests/test_github.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,13 @@ fn test_vega() {
5959
}
6060
};
6161
let schema = serde_json::from_value(raw_schema).unwrap();
62-
settings.with_conversion(schema, "MyEnum", [TypeSpaceImpl::FromStr].into_iter());
62+
settings
63+
.with_conversion(schema, "MyEnum", [TypeSpaceImpl::FromStr].into_iter())
64+
// TODO ColorValue has resulted in an extremely expensive type; to
65+
// resolve this we need to do a better job of canonicalizing the schema
66+
// once rather than repeatedly doing quadratic expansions over it.
67+
// Alternatively we can memoize conversions rather than repeating them.
68+
.with_replacement("ColorValue", "ColorValue", [].into_iter());
6369

6470
let mut type_space = TypeSpace::new(&settings);
6571

0 commit comments

Comments
 (0)