From 7f77b61a8221351e895e2eb6da9cfe20877fb0f3 Mon Sep 17 00:00:00 2001 From: techncl Date: Fri, 31 Jan 2025 18:17:12 +0000 Subject: [PATCH] Make max chars in cell error message more helpful --- .../csv/validator/MetaDataValidator.scala | 18 ++++++++++++++---- .../api/metaDataWithALongCellLength.csv | 2 ++ .../api/CsvValidatorMaxCharsPerCellSpec.scala | 12 ++++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 csv-validator-core/src/test/resources/uk/gov/nationalarchives/csv/validator/api/metaDataWithALongCellLength.csv diff --git a/csv-validator-core/src/main/scala/uk/gov/nationalarchives/csv/validator/MetaDataValidator.scala b/csv-validator-core/src/main/scala/uk/gov/nationalarchives/csv/validator/MetaDataValidator.scala index 27e8cafa..1da9584a 100644 --- a/csv-validator-core/src/main/scala/uk/gov/nationalarchives/csv/validator/MetaDataValidator.scala +++ b/csv-validator-core/src/main/scala/uk/gov/nationalarchives/csv/validator/MetaDataValidator.scala @@ -355,15 +355,23 @@ class RowIterator(parser: CsvParser, progress: Option[ProgressFor], maxCharsPerC private var index = 1 private var current = toRow(Try(parser.parseNext())) + private var potentialHeaderRow: Option[Row] = None @throws(classOf[IOException]) override def next(): Row = { val row = current match { - case Success(row) => row + case Success(row) => + if(index == 1 && potentialHeaderRow.isEmpty) potentialHeaderRow = Some(row) // this is here in case the old API is used that doesn't call 'skipHeader' + row case Failure(ex: TextParsingException) if(ex.toString.contains("exceeds the maximum number of characters")) => + val cellLocationMsg = + potentialHeaderRow match { + case Some(headerRow) => s"in the cell located at line: ${ex.getLineIndex}, column: ${headerRow.cells(ex.getColumnIndex).value}," + case None => s"in column ${ex.getColumnIndex + 1} of the header row" + } + val customMessage = - s"The number of characters in the cell located at line: ${ex.getLineIndex + 1}, column: ${ex.getColumnIndex + 1}, " + - s"is larger than the maximum number of characters allowed in a cell ($maxCharsPerCell); increase this limit and re-run." + s"The number of characters $cellLocationMsg is larger than the maximum number of characters allowed in a cell ($maxCharsPerCell); increase this limit and re-run." throw new Exception(customMessage) case Failure(ex) => throw ex } @@ -385,7 +393,9 @@ class RowIterator(parser: CsvParser, progress: Option[ProgressFor], maxCharsPerC @throws(classOf[IOException]) def skipHeader(): Row = { this.index = index - 1 - next() + val row = next() + this.potentialHeaderRow = Some(row) + row } override def hasNext: Boolean = current match { diff --git a/csv-validator-core/src/test/resources/uk/gov/nationalarchives/csv/validator/api/metaDataWithALongCellLength.csv b/csv-validator-core/src/test/resources/uk/gov/nationalarchives/csv/validator/api/metaDataWithALongCellLength.csv new file mode 100644 index 00000000..445be4bf --- /dev/null +++ b/csv-validator-core/src/test/resources/uk/gov/nationalarchives/csv/validator/api/metaDataWithALongCellLength.csv @@ -0,0 +1,2 @@ +col1,col2 +row1Col1,row1Col2LongCellLength \ No newline at end of file diff --git a/csv-validator-core/src/test/scala/uk/gov/nationalarchives/csv/validator/api/CsvValidatorMaxCharsPerCellSpec.scala b/csv-validator-core/src/test/scala/uk/gov/nationalarchives/csv/validator/api/CsvValidatorMaxCharsPerCellSpec.scala index 7679f743..025e9208 100644 --- a/csv-validator-core/src/test/scala/uk/gov/nationalarchives/csv/validator/api/CsvValidatorMaxCharsPerCellSpec.scala +++ b/csv-validator-core/src/test/scala/uk/gov/nationalarchives/csv/validator/api/CsvValidatorMaxCharsPerCellSpec.scala @@ -25,11 +25,19 @@ class CsvValidatorMaxCharsPerCellSpec extends Specification with TestResources { def app(maxChars: Int=4096) = new CsvValidator with AllErrorsMetaDataValidator { val pathSubstitutions: List[(String, String)] = List[(String,String)](); val enforceCaseSensitivePathChecks = false; val trace = false; val skipFileChecks = false; val maxCharsPerCell: Int = maxChars } def parse(filePath: String, maxChars: Int=4096): Schema = app(maxChars).parseSchema(TextFile(Paths.get(filePath))) fold (f => throw new IllegalArgumentException(f.toString()), s => s) - "fail if the number of characters in a cell is more than the maxCharsPerCell number" in { + "fail if the number of characters in a cell in the header is more than the maxCharsPerCell number and indicate the column number" in { val maxCharsAllowed = 2 val validatedNel = app(maxCharsAllowed).validate(TextFile(Paths.get(baseResourcePkgPath).resolve("metaData.csv")), parse(baseResourcePkgPath + "/schema.csvs", maxCharsAllowed), None).swap validatedNel.toList.head.toList must beEqualTo( - List(FailMessage(ValidationError,"java.lang.Exception: The number of characters in the cell located at line: 1, column: 1, is larger than the maximum number of characters allowed in a cell (2); increase this limit and re-run.",None,None)) + List(FailMessage(ValidationError,"java.lang.Exception: The number of characters in column 1 of the header row is larger than the maximum number of characters allowed in a cell (2); increase this limit and re-run.",None,None)) + ) + } + + "fail if the number of characters in a cell in a non-header row is more than the maxCharsPerCell number and indicate the column name" in { + val maxCharsAllowed = 15 + val validatedNel = app(maxCharsAllowed).validate(TextFile(Paths.get(baseResourcePkgPath).resolve("metaDataWithALongCellLength.csv")), parse(baseResourcePkgPath + "/schema.csvs"), None).swap + validatedNel.toList.head.toList must beEqualTo( + List(FailMessage(ValidationError,"java.lang.Exception: The number of characters in the cell located at line: 1, column: col2, is larger than the maximum number of characters allowed in a cell (15); increase this limit and re-run.",None,None)) ) }