Skip to content

Commit 7e6706d

Browse files
authored
Merge pull request #566 from boozallen/560-relation-record-spark-schema-validation
#560 address pr comments
2 parents dd747b2 + d3951b8 commit 7e6706d

File tree

4 files changed

+4
-31
lines changed

4 files changed

+4
-31
lines changed

DRAFT_RELEASE_NOTES.md

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ Spark and PySpark have been upgraded from version 3.5.2 to 3.5.4.
1515
## Record Relation
1616
To enable nested data records, we have added a new relation feature to the record metamodel. This allows records to reference other records. For more details, refer to the [Record Relation Options](https://boozallen.github.io/aissemble/aissemble/current-dev/record-metamodel.html#_record_relation_options).
1717
Several features are still a work in progress:
18-
- Spark-based validation for records with a One to Many multiplicity. (POJO validation is available.)
1918
- PySpark schema generation for records with any multiplicity
2019

2120
## Helm Charts Resource Specification

foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm

+3-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import static org.apache.spark.sql.functions.explode;
3939
*/
4040
public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
4141

42-
private static final String ARRAY = "array";
4342
#set($columnVars = {})
4443
#foreach ($field in $record.fields)
4544
#set ($columnVars[$field.name] = "${field.upperSnakecaseName}_COLUMN")
@@ -277,12 +276,13 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
277276
return ${record.name};
278277
}
279278

280-
#if ($record.hasRelations())
279+
281280
#foreach ($relation in $record.relations)
282281
#if ($relation.isOneToManyRelation())
283282

284283
/**
285-
* Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema
284+
* Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema.
285+
* A false will be return if any one of the relation records schema validation is failed.
286286
* @param ${relation.uncapitalizedName}Dataset
287287
* @return boolean value to indicate validation result
288288
*/
@@ -299,5 +299,4 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
299299

300300
#end
301301
#end
302-
#end
303302
}

test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java

+1-20
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,6 @@ public void aValidPersonWithOneToManyRelationDataSetExists(String validity) {
131131
this.personWithOneToMRelationSchema.getStructType());
132132
}
133133

134-
@Given("a valid \"City\" dataSet exists")
135-
public void aValidDataSetExists() {
136-
List<Row> rows = Collections.singletonList(CitySchema.asRow(createCity()));
137-
this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType());
138-
}
139-
140134
@Given("a \"City\" dataSet with an invalid relation exists")
141135
public void aCityDataSetWithAnInvalidRelationExists() {
142136
IntegerWithValidation integerWithValidation = new IntegerWithValidation(0);
@@ -163,15 +157,6 @@ public void aSparkDatasetExists() {
163157
this.cityDataSet = this.spark.createDataFrame(cityRows, this.citySchema.getStructType());
164158
}
165159

166-
@When("spark schema validation is performed on the dataSet")
167-
public void sparkSchemaValidationIsPerformedOnTheDataSet() {
168-
try {
169-
this.validatedDataSet = this.citySchema.validateDataFrame(this.cityDataSet);
170-
}catch (Exception e) {
171-
this.exception = e;
172-
}
173-
}
174-
175160
@When("spark schema validation is performed on the \"PersonWithOneToOneRelation\" dataSet")
176161
public void sparkSchemaValidationIsPerformedOnThePersonWithOneToOneRelationDataSet() {
177162
try {
@@ -193,12 +178,8 @@ public void sparkSchemaValidationIsPerformedOnThePersonWithMToOneRelationDataSet
193178

194179
@When("spark schema validation is performed on the \"PersonWithOneToMRelation\" dataSet")
195180
public void sparkSchemaValidationIsPerformedOnThePersonWithOneToMRelationDataSet() {
196-
try {
197-
this.validatedDataSet =
181+
this.validatedDataSet =
198182
this.personWithOneToMRelationSchema.validateDataFrame(this.personWithOneToMRelationDataSet);
199-
}catch (Exception e) {
200-
this.exception = e;
201-
}
202183
}
203184

204185
@Then("the schema data type for {string} is {string}")

test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature

-6
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,6 @@ Feature: Records with relations are generated correctly and function as expected
2222
When a "City" POJO is mapped to a spark dataset using the schema
2323
Then the dataset has the correct values for the relational objects
2424

25-
Scenario: Spark schemas generated fails to validate with not yet implemented exception
26-
Given the spark schema is generated for the "City" record
27-
And a valid "City" dataSet exists
28-
When spark schema validation is performed on the dataSet
29-
Then the dataSet validation "passes"
30-
3125
Scenario Outline: Records with a One to One relation can be validated using the spark schema
3226
Given the spark schema is generated for the "PersonWithOneToOneRelation" record
3327
And a "<validity>" "PersonWithOneToOneRelation" dataSet exists

0 commit comments

Comments
 (0)