Skip to content

Commit dd747b2

Browse files
authored
Merge pull request #565 from boozallen/560-relation-record-spark-schema-validation
#560 spark schemas validation retain the meta model records relations of 1-M multiplicity
2 parents 4728f16 + 0e7d5c3 commit dd747b2

File tree

6 files changed

+106
-62
lines changed

6 files changed

+106
-62
lines changed

foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/BaseRecordRelationDecorator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public String getCapitalizedName() {
133133
*
134134
* @return uncapitalized name
135135
*/
136-
public String getUncapitalizeName() {
136+
public String getUncapitalizedName() {
137137
return StringUtils.uncapitalize(getName());
138138
}
139139
}

foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/java/JavaRecordRelation.java

+8-18
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,6 @@ public JavaRecordRelation(Relation relationToDecorate) {
3131
super(relationToDecorate);
3232
}
3333

34-
/**
35-
* Returns the reference record name formatted into camelcase but starts with a lowercase letter
36-
* @return the reference record name formatted into camelcase but starts with a lowercase letter
37-
*/
38-
public String getLowercaseName() {
39-
char[] name = getName().toCharArray();
40-
name[0] = Character.toLowerCase(name[0]);
41-
return new String(name);
42-
}
43-
4434
/**
4535
* Returns the import for the generating the setters/getters of the reference record.
4636
* @return generated class import
@@ -62,9 +52,9 @@ public Set<String> getGeneratedClassImport() {
6252
public String getRelationSetterSignature() {
6353
String className = getName();
6454
if (this.isOneToManyRelation()) {
65-
return String.format("public void set%s(List<%s> %s)", className, className, this.getLowercaseName());
55+
return String.format("public void set%s(List<%s> %s)", className, className, this.getUncapitalizedName());
6656
} else {
67-
return String.format("public void set%s(%s %s)", className, className, getLowercaseName());
57+
return String.format("public void set%s(%s %s)", className, className, getUncapitalizedName());
6858
}
6959
}
7060

@@ -87,7 +77,7 @@ public String getRelationGetterSignature() {
8777
*/
8878
public String getRelationPropDeclaration() {
8979
String type = String.format(isOneToManyRelation()? "List<%s>": "%s", getName());
90-
return String.format("private %s %s = null;", type, getLowercaseName());
80+
return String.format("private %s %s = null;", type, getUncapitalizedName());
9181
}
9282

9383
/**
@@ -100,9 +90,9 @@ public String getRelationValidate() {
10090
for (%s record : this.%s) {
10191
record.validate();
10292
}""";
103-
return String.format(validate, getName(), getLowercaseName());
93+
return String.format(validate, getName(), getUncapitalizedName());
10494
} else {
105-
return String.format("this.%s.validate();",getLowercaseName());
95+
return String.format("this.%s.validate();",getUncapitalizedName());
10696
}
10797
}
10898

@@ -111,23 +101,23 @@ public String getRelationValidate() {
111101
* @return the required reference record validation logic
112102
*/
113103
public String getRequiredRelationValidate() {
114-
String lowercaseName = getLowercaseName();
104+
String uncapitalizedName = getUncapitalizedName();
115105
if (this.isOneToManyRelation()) {
116106
String validate = """
117107
if (this.%s == null || this.%s.size() == 0) {
118108
throw new ValidationException("Relation record '%s' is required");
119109
} else {
120110
%s
121111
}""";
122-
return String.format(validate, lowercaseName, lowercaseName, getName(), getRelationValidate());
112+
return String.format(validate, uncapitalizedName, uncapitalizedName, getName(), getRelationValidate());
123113
} else {
124114
String validate = """
125115
if (this.%s == null) {
126116
throw new ValidationException("Relation record '%s' is required");
127117
} else {
128118
%s
129119
}""";
130-
return String.format(validate, lowercaseName, getName(), getRelationValidate());
120+
return String.format(validate, uncapitalizedName, getName(), getRelationValidate());
131121
}
132122
}
133123

foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/record.base.java.vm

+3-3
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ public abstract class ${record.capitalizedName}Base {
4949
#if ($record.hasRelations())
5050
#foreach ($relation in $record.relations)
5151
${relation.relationSetterSignature} {
52-
this.${relation.lowercaseName} = ${relation.lowercaseName};
52+
this.${relation.uncapitalizedName} = ${relation.uncapitalizedName};
5353
}
5454

5555
${relation.relationGetterSignature} {
56-
return this.${relation.lowercaseName};
56+
return this.${relation.uncapitalizedName};
5757
}
5858
#end
5959
#end
@@ -105,7 +105,7 @@ public abstract class ${record.capitalizedName}Base {
105105
#if (!$relation.isNullable())
106106
${relation.requiredRelationValidate}
107107
#else
108-
if (this.${relation.lowercaseName} != null) {
108+
if (this.${relation.uncapitalizedName} != null) {
109109
${relation.relationValidate}
110110
}
111111
#end

foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm

+35-16
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
2828

2929
import static org.apache.spark.sql.functions.col;
3030
import static org.apache.spark.sql.functions.lit;
31+
import static org.apache.spark.sql.functions.explode;
3132

3233
/**
3334
* Base implementation of the Spark schema for ${record.capitalizedName}.
@@ -82,15 +83,15 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
8283
DataType ${field.name}Type = getDataType(${columnVars[$field.name]});
8384
#end
8485
#foreach ($relation in $record.relations)
85-
DataType ${relation.uncapitalizeName}Type = getDataType(${relationVars[$relation.name]});
86+
DataType ${relation.uncapitalizedName}Type = getDataType(${relationVars[$relation.name]});
8687
#end
8788

8889
return dataset
8990
#foreach ($field in $record.fields)
9091
.withColumn(${columnVars[$field.name]}, col(${columnVars[$field.name]}).cast(${field.name}Type))#if((!$foreach.hasNext) && ($relationVars == {}));#end
9192
#end
9293
#foreach ($relation in $record.relations)
93-
.withColumn(${relationVars[$relation.name]}, col(${relationVars[$relation.name]}).cast(${relation.uncapitalizeName}Type))#if(!$foreach.hasNext);#end
94+
.withColumn(${relationVars[$relation.name]}, col(${relationVars[$relation.name]}).cast(${relation.uncapitalizedName}Type))#if(!$foreach.hasNext);#end
9495
#end
9596
}
9697
#end
@@ -131,12 +132,6 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
131132
* @return Dataset with appended validation logic
132133
*/
133134
protected Dataset<Row> validateDataFrame(Dataset<Row> data, String columnPrefix) {
134-
for (String headerField : getStructType().fieldNames()) {
135-
if (StringUtils.equals(ARRAY, getDataType(headerField).typeName())) {
136-
throw new NotImplementedException(
137-
"Validation against relations with One to Many multiplicity is not yet implemented");
138-
}
139-
}
140135
Dataset<Row> dataWithValidations = data
141136
#foreach ($field in $record.fields)
142137
#if (${field.isRequired()})
@@ -156,7 +151,7 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
156151
#end
157152
#if (${field.getValidation().getMaxLength()})
158153
#set($max = ${field.getValidation().getMaxLength()} + 1)
159-
.withColumn(${columnVars[$field.name]} + "_LESS_THAN_OR_EQUAL_TO_MAX_LENGTH", col(columnPrefix + ${columnVars[$field.name]}).rlike(("^.{$max},}")).equalTo(lit(false)))
154+
.withColumn(${columnVars[$field.name]} + "_LESS_THAN_OR_EQUAL_TO_MAX_LENGTH", col(columnPrefix + ${columnVars[$field.name]}).rlike(("^.{$max,}")).equalTo(lit(false)))
160155
#end
161156
#foreach ($format in $field.getValidation().getFormats())
162157
#if ($foreach.first)
@@ -172,10 +167,10 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
172167

173168
#foreach($relation in $record.relations)
174169
#if($relation.isOneToManyRelation())
175-
## TODO implement validation for records with one to Many relations
170+
dataWithValidations = dataWithValidations.withColumn(${relationVars[$relation.name]} + "_VALID", lit(validateWith${relation.capitalizedName}Schema(data.select(col(${relationVars[$relation.name]})))));
176171
#else
177-
${relation.capitalizedName}Schema ${relation.uncapitalizeName}Schema = new ${relation.capitalizedName}Schema();
178-
dataWithValidations = dataWithValidations.withColumn(${relationVars[$relation.name]} + "_VALID", lit(!${relation.uncapitalizeName}Schema.validateDataFrame(data.select(col(${relationVars[$relation.name]})), ${relationVars[$relation.name]} + ".").isEmpty()));
172+
${relation.capitalizedName}Schema ${relation.uncapitalizedName}Schema = new ${relation.capitalizedName}Schema();
173+
dataWithValidations = dataWithValidations.withColumn(${relationVars[$relation.name]} + "_VALID", lit(!${relation.uncapitalizedName}Schema.validateDataFrame(data.select(col(${relationVars[$relation.name]})), ${relationVars[$relation.name]} + ".").isEmpty()));
179174
#end
180175
#end
181176

@@ -259,11 +254,11 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
259254
#end
260255
#foreach ($relation in $record.relations)
261256
#if ($relation.isOneToManyRelation())
262-
Row[] ${relation.uncapitalizeName}Rows = (Row[])((WrappedArray<Row>) SparkDatasetUtils.getRowValue(row, "${relation.columnName}")).array();
263-
record.set${relation.capitalizedName}(${relation.capitalizedName}Schema.mapRows(Arrays.asList(${relation.uncapitalizeName}Rows)));
257+
Row[] ${relation.uncapitalizedName}Rows = (Row[])((WrappedArray<Row>) SparkDatasetUtils.getRowValue(row, "${relation.columnName}")).array();
258+
record.set${relation.capitalizedName}(${relation.capitalizedName}Schema.mapRows(Arrays.asList(${relation.uncapitalizedName}Rows)));
264259
#else
265-
${relation.capitalizedName} ${relation.uncapitalizeName}Value = ${relation.capitalizedName}Schema.mapRow((Row) SparkDatasetUtils.getRowValue(row, "${relation.columnName}"));
266-
record.set${relation.capitalizedName}(${relation.uncapitalizeName}Value);
260+
${relation.capitalizedName} ${relation.uncapitalizedName}Value = ${relation.capitalizedName}Schema.mapRow((Row) SparkDatasetUtils.getRowValue(row, "${relation.columnName}"));
261+
record.set${relation.capitalizedName}(${relation.uncapitalizedName}Value);
267262
#end
268263
#end
269264
return record;
@@ -281,4 +276,28 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
281276
}
282277
return ${record.name};
283278
}
279+
280+
#if ($record.hasRelations())
281+
#foreach ($relation in $record.relations)
282+
#if ($relation.isOneToManyRelation())
283+
284+
/**
285+
* Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema
286+
* @param ${relation.uncapitalizedName}Dataset
287+
* @return boolean value to indicate validation result
288+
*/
289+
private boolean validateWith${relation.capitalizedName}Schema(Dataset<Row> ${relation.uncapitalizedName}Dataset) {
290+
${relation.capitalizedName}Schema ${relation.uncapitalizedName}Schema = new ${relation.capitalizedName}Schema();
291+
// flatten ${relation.uncapitalizedName} data
292+
Dataset<Row> flattenDataset = ${relation.uncapitalizedName}Dataset.select(explode(col(${relationVars[$relation.name]})));
293+
294+
// validate ${relation.capitalizedName}Schema
295+
Dataset<Row> validData = ${relation.uncapitalizedName}Schema.validateDataFrame(flattenDataset, "col.");
296+
return flattenDataset.count() == validData.count();
297+
}
298+
299+
300+
#end
301+
#end
302+
#end
284303
}

test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java

+42-17
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
import java.util.Map;
2222

2323
import com.boozallen.aiops.mda.pattern.dictionary.Zipcode;
24+
import com.boozallen.aiops.mda.pattern.record.PersonWithOneToMRelation;
25+
import com.boozallen.aiops.mda.pattern.record.PersonWithOneToMRelationSchema;
2426
import org.apache.commons.lang.StringUtils;
25-
import org.apache.commons.lang3.NotImplementedException;
2627
import org.apache.spark.sql.Dataset;
2728
import org.apache.spark.sql.Row;
2829
import org.apache.spark.sql.SparkSession;
@@ -32,7 +33,6 @@
3233
import com.boozallen.aiops.mda.pattern.record.City;
3334
import com.boozallen.aiops.mda.pattern.record.CitySchema;
3435
import com.boozallen.aiops.mda.pattern.record.Mayor;
35-
import com.boozallen.aiops.mda.pattern.record.MayorSchema;
3636
import com.boozallen.aiops.mda.pattern.record.PersonWithMToOneRelation;
3737
import com.boozallen.aiops.mda.pattern.record.PersonWithMToOneRelationSchema;
3838
import com.boozallen.aiops.mda.pattern.record.PersonWithOneToOneRelation;
@@ -41,7 +41,6 @@
4141
import com.boozallen.aiops.mda.pattern.record.Street;
4242

4343
import io.cucumber.java.Before;
44-
import io.cucumber.java.en.And;
4544
import io.cucumber.java.en.Given;
4645
import io.cucumber.java.en.Then;
4746
import io.cucumber.java.en.When;
@@ -50,10 +49,12 @@ public class SparkSchemaTest {
5049
CitySchema citySchema;
5150
PersonWithOneToOneRelationSchema personWithOneToOneRelationSchema;
5251
PersonWithMToOneRelationSchema personWithMToOneRelationSchema;
52+
PersonWithOneToMRelationSchema personWithOneToMRelationSchema;
5353
SparkSession spark;
5454
Dataset<Row> cityDataSet;
5555
Dataset<Row> personWithOneToOneRelationDataSet;
5656
Dataset<Row> personWithMToOneRelationDataSet;
57+
Dataset<Row> personWithOneToMRelationDataSet;
5758
Dataset<Row> validatedDataSet;
5859
Exception exception;
5960

@@ -67,8 +68,8 @@ public void theRecordExistsWithTheFollowingRelations(Map<String, String> multipl
6768
// Handled with MDA generation
6869
}
6970

70-
@Given("the spark schema is generate for the \"PersonWithOneToOneRelation\" record")
71-
public void theSparkSchemaIsGenerateForThePersonWithOneToOneRelationRecord() {
71+
@Given("the spark schema is generated for the \"PersonWithOneToOneRelation\" record")
72+
public void theSparkSchemaIsGeneratedForThePersonWithOneToOneRelationRecord() {
7273
this.personWithOneToOneRelationSchema = new PersonWithOneToOneRelationSchema();
7374
}
7475

@@ -88,8 +89,8 @@ public void aValidPersonWithOneToOneRelationDataSetExists(String validity) {
8889
this.personWithOneToOneRelationSchema.getStructType());
8990
}
9091

91-
@Given("the spark schema is generate for the \"PersonWithMToOneRelation\" record")
92-
public void theSparkSchemaIsGenerateForThePersonWithMToOneRelationRecord() {
92+
@Given("the spark schema is generated for the \"PersonWithMToOneRelation\" record")
93+
public void theSparkSchemaIsGeneratedForThePersonWithMToOneRelationRecord() {
9394
this.personWithMToOneRelationSchema = new PersonWithMToOneRelationSchema();
9495
}
9596

@@ -109,6 +110,27 @@ public void aValidPersonWithManyToOneRelationDataSetExists(String validity) {
109110
this.personWithMToOneRelationSchema.getStructType());
110111
}
111112

113+
@Given("the spark schema is generated for the \"PersonWithOneToMRelation\" record")
114+
public void theSparkSchemaIsGeneratedForThePersonWithOneToMRelationRecord() {
115+
this.personWithOneToMRelationSchema = new PersonWithOneToMRelationSchema();
116+
}
117+
118+
@Given("a {string} \"PersonWithOneToMRelation\" dataSet exists")
119+
public void aValidPersonWithOneToManyRelationDataSetExists(String validity) {
120+
PersonWithOneToMRelation personWithOneToMRelation = new PersonWithOneToMRelation();
121+
if (StringUtils.equals("valid", validity)){
122+
personWithOneToMRelation.setAddress(List.of(createAddress(), createAddress()));
123+
} else {
124+
Address address = createAddress();
125+
address.setZipcode(new Zipcode("0"));
126+
personWithOneToMRelation.setAddress(List.of(address, createAddress()));
127+
}
128+
129+
List<Row> rows = Collections.singletonList(PersonWithOneToMRelationSchema.asRow(personWithOneToMRelation));
130+
this.personWithOneToMRelationDataSet = spark.createDataFrame(rows,
131+
this.personWithOneToMRelationSchema.getStructType());
132+
}
133+
112134
@Given("a valid \"City\" dataSet exists")
113135
public void aValidDataSetExists() {
114136
List<Row> rows = Collections.singletonList(CitySchema.asRow(createCity()));
@@ -128,8 +150,8 @@ public void aCityDataSetWithAnInvalidRelationExists() {
128150
this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType());
129151
}
130152

131-
@When("the spark schema is generate for the \"City\" record")
132-
public void theSparkSchemaIsGenerateForTheCityRecord() {
153+
@When("the spark schema is generated for the \"City\" record")
154+
public void theSparkSchemaIsGeneratedForTheCityRecord() {
133155
this.citySchema = new CitySchema();
134156
}
135157

@@ -169,6 +191,16 @@ public void sparkSchemaValidationIsPerformedOnThePersonWithMToOneRelationDataSet
169191
}
170192
}
171193

194+
@When("spark schema validation is performed on the \"PersonWithOneToMRelation\" dataSet")
195+
public void sparkSchemaValidationIsPerformedOnThePersonWithOneToMRelationDataSet() {
196+
try {
197+
this.validatedDataSet =
198+
this.personWithOneToMRelationSchema.validateDataFrame(this.personWithOneToMRelationDataSet);
199+
}catch (Exception e) {
200+
this.exception = e;
201+
}
202+
}
203+
172204
@Then("the schema data type for {string} is {string}")
173205
public void theSchemaDataTypeForIs(String record, String type) {
174206
assertEquals("The type for record is not correct", type,
@@ -191,13 +223,6 @@ public void aPOJOCanBeMappedToASparkRow() {
191223
}
192224
}
193225

194-
@Then("the validation fails with NotYetImplementedException")
195-
public void theValidationFailsWithNotYetImplementedException() {
196-
assertNotNull("No exception was thrown", this.exception);
197-
assertNotNull("Throw exception is not of instance NotImplementedException", this.exception instanceof
198-
NotImplementedException ? (this.exception) : null);
199-
}
200-
201226
@Then("the dataSet validation {string}")
202227
public void theDataSetValidationIsSuccessful(String succeed) {
203228
if(StringUtils.equals("fails", succeed)) {
@@ -209,7 +234,7 @@ public void theDataSetValidationIsSuccessful(String succeed) {
209234
}
210235

211236
private City createCity(){
212-
IntegerWithValidation integerWithValidation = new IntegerWithValidation(0);
237+
IntegerWithValidation integerWithValidation = new IntegerWithValidation(100);
213238

214239
List<Street> streets = new ArrayList<>();
215240
Street street = new Street();

0 commit comments

Comments
 (0)