Skip to content

Commit

Permalink
Update Java API with 'skipFileChecks' and 'maxCharsPerCell'
Browse files Browse the repository at this point in the history
This commit also deprecates the previous Java API 'validate' methods and replaces them with a ValidateBuilder to make it easier
to add more parameters in the future
  • Loading branch information
techncl committed Jan 31, 2025
1 parent 7f77b61 commit dfc925c
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 34 deletions.
62 changes: 44 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,53 @@ The Javadoc, can be found in either Maven Central or you can build it locally by

Example Java code of using the CSV Validator through the Java API:
```java
Boolean failFast = false;
List<Substitution> pathSubstitutions = new ArrayList<Substitution>();

List<FailMessage> messages = CsvValidator.validate(
"/data/csv/data.csv",
"/data/csv/data-schema.csvs",
failFast,
pathSubstitutions,
true,
false);
Charset csvEncoding = JCharset.forName("UTF-8"); // default is UTF-8
Charset csvSchemaEncoding = JCharset.forName("UTF-8"); // default is UTF-8
boolean failFast = true; // default is false
List<Substitution> pathSubstitutions = new ArrayList<Substitution>(); // default is any empty ArrayList
boolean enforceCaseSensitivePathChecks = true; // default is false
boolean trace = false; // default is false
ProgressCallback progress; // default is null
boolean skipFileChecks = true; // default is false
int maxCharsPerCell = 8096; // default is 4096

// add a substitution path
pathSubstitutions.add(new Substitution("file://something", "/home/xxx"));

CsvValidator.ValidatorBuilder validateWithStringNames = new CsvValidator.ValidatorBuilder(
"/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
"/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs"
)

// alternatively, you can pass in Readers for each file
Reader csvReader = new Reader();
Reader csvSchemaReader = new Reader();
CsvValidator.ValidatorBuilder validateWithReaders = new CsvValidator.ValidatorBuilder(
csvReader, csvSchemaReader
)

List<FailMessage> messages = validateWithStringNames
.csvEncoding()
.csvSchemaEncoding()
.failFast(failFast)
.pathSubstitutions(pathSubstitutions)
.enforceCaseSensitivePathChecks(enforceCaseSensitivePathChecks)
.trace(trace)
.progress(progress)
.skipFileChecks(skipFileChecks)
.maxCharsPerCell(maxCharsPerCell)

if(messages.isEmpty()) {
System.out.println("Completed validation OK");
System.out.println("All worked OK");
} else {
for(FailMessage message : messages) {
if(message instanceof WarningMessage) {
System.out.println("[WARN] " + message.getMessage());
} else {
System.out.println("[ERROR] " + message.getMessage());
}
}
for(FailMessage message : messages) {
if(message instanceof WarningMessage) {
System.out.println("Warning: " + message.getMessage());
} else {
System.out.println("Error: " + message.getMessage());
}
}
}
}
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import static uk.gov.nationalarchives.csv.validator.api.CsvValidator$.MODULE$;

Expand All @@ -19,18 +20,41 @@
*
* <p> A typical invocation sequence:</p>
* <blockquote><pre>{@code
* Boolean failFast = false;
* List<Substitution> pathSubstitutions = new ArrayList<Substitution>();
* Charset csvEncoding = JCharset.forName("UTF-8"); // default is UTF-8
* Charset csvSchemaEncoding = JCharset.forName("UTF-8"); // default is UTF-8
* boolean failFast = true; // default is false
* List<Substitution> pathSubstitutions = new ArrayList<Substitution>(); // default is any empty ArrayList
* boolean enforceCaseSensitivePathChecks = true; // default is false
* boolean trace = false; // default is false
* ProgressCallback progress; // default is null
* boolean skipFileChecks = true; // default is false
* int maxCharsPerCell = 8096; // default is 4096
*
* //add a substitution path
* pathSubstitutions.add(new Substitution("file://something", "/home/xxx"));
*
* List<FailMessage> messages = CsvValidator.validate(
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs",
* failFast,
* pathSubstitutions,
* true);
* CsvValidator.ValidatorBuilder validateWithStringNames = new CsvValidator.ValidatorBuilder(
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs"
* )
*
* // alternatively, you can pass in Readers for each file
* Reader csvReader = new Reader();
* Reader csvSchemaReader = new Reader();
* CsvValidator.ValidatorBuilder validateWithReaders = new CsvValidator.ValidatorBuilder(
* csvReader, csvSchemaReader
* )
*
* List<FailMessage> messages = validateWithStringNames
* .csvEncoding()
* .csvSchemaEncoding()
* .failFast(failFast)
* .pathSubstitutions(pathSubstitutions)
* .enforceCaseSensitivePathChecks(enforceCaseSensitivePathChecks)
* .trace(trace)
* .progress(progress)
* .skipFileChecks(skipFileChecks)
* .maxCharsPerCell(maxCharsPerCell)
*
* if(messages.isEmpty()) {
* System.out.println("All worked OK");
Expand Down Expand Up @@ -63,6 +87,7 @@ public class CsvValidator {
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final String csvSchemaFilename, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return validate(csvFilename, MODULE$.DEFAULT_ENCODING(), csvSchemaFilename, MODULE$.DEFAULT_ENCODING(), failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -82,6 +107,7 @@ public static List<FailMessage> validate(final String csvFilename, final String
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final Charset csvEncoding, final String csvSchemaFilename, final Charset csvSchemaEncoding, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return CsvValidatorJavaBridge.validate(csvFilename, csvEncoding, csvSchemaFilename, csvSchemaEncoding, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -104,6 +130,7 @@ public static List<FailMessage> validate(final String csvFilename, final Charset
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final String csvSchemaFilename, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return validate(csvFilename, MODULE$.DEFAULT_ENCODING(), csvSchemaFilename, MODULE$.DEFAULT_ENCODING(), failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}
Expand All @@ -125,6 +152,7 @@ public static List<FailMessage> validate(final String csvFilename, final String
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final Charset csvEncoding, final String csvSchemaFilename, final Charset csvSchemaEncoding, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return CsvValidatorJavaBridge.validate(csvFilename, csvEncoding, csvSchemaFilename, csvSchemaEncoding, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}
Expand All @@ -142,6 +170,7 @@ public static List<FailMessage> validate(final String csvFilename, final Charset
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final Reader csvData, final Reader csvSchema, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return CsvValidatorJavaBridge.validate(csvData, csvSchema, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -161,8 +190,91 @@ public static List<FailMessage> validate(final Reader csvData, final Reader csvS
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final Reader csvData, final Reader csvSchema, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return CsvValidatorJavaBridge.validate(csvData, csvSchema, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}

static class ValidatorBuilder {
private String csvFileName;
private String csvSchemaFilename;
private Reader csvReader;
private Reader csvSchemaReader;
private Charset csvEncoding = MODULE$.DEFAULT_ENCODING();
private Charset csvSchemaEncoding = MODULE$.DEFAULT_ENCODING();
private boolean failFast = false;
private List<Substitution> pathSubstitutions = new ArrayList<>();
private boolean enforceCaseSensitivePathChecks = false;
private boolean trace = false;
private ProgressCallback progress;
private boolean skipFileChecks = false;
private int maxCharsPerCell = 4096;

private boolean textFileValidation = false;

public ValidatorBuilder(String csvFileName, String csvSchemaFilename) {
this.csvFileName = csvFileName;
this.csvSchemaFilename = csvSchemaFilename;
this.textFileValidation = true;
}

public ValidatorBuilder(Reader csvReader, Reader csvSchemaReader) {
this.csvReader = csvReader;
this.csvSchemaReader = csvSchemaReader;
}

public ValidatorBuilder csvEncoding(Charset encoding) {
this.csvEncoding = encoding;
return this;
}

public ValidatorBuilder csvSchemaEncoding(Charset schemaEncoding) {
this.csvSchemaEncoding = schemaEncoding;
return this;
}

public ValidatorBuilder failFast(boolean failFast) {
this.failFast = failFast;
return this;
}

public ValidatorBuilder pathSubstitutions(List<Substitution> pathSubstitutions) {
this.pathSubstitutions = pathSubstitutions;
return this;
}

public ValidatorBuilder enforceCaseSensitivePathChecks(boolean enforceCaseSensitivePathChecks) {
this.enforceCaseSensitivePathChecks = enforceCaseSensitivePathChecks;
return this;
}

public ValidatorBuilder trace(boolean trace) {
this.trace = trace;
return this;
}

public ValidatorBuilder progress(ProgressCallback progress) {
this.progress = progress;
return this;
}

public ValidatorBuilder skipFileChecks(boolean skipFileChecks) {
this.skipFileChecks = skipFileChecks;
return this;
}

public ValidatorBuilder maxCharsPerCell(int maxCharsPerCell) {
this.maxCharsPerCell = maxCharsPerCell;
return this;
}

public List<FailMessage> build() {
if(textFileValidation) {
return CsvValidatorJavaBridge.validate(this.csvFileName, this.csvEncoding, this.csvSchemaFilename, this.csvSchemaEncoding, this.failFast, this.pathSubstitutions, this.enforceCaseSensitivePathChecks, this.trace, this.progress);
} else {
return CsvValidatorJavaBridge.validate(this.csvReader, this.csvSchemaReader, this.failFast, this.pathSubstitutions, this.enforceCaseSensitivePathChecks, this.trace, this.progress);
}
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,36 @@ object CsvValidatorJavaBridge {

@deprecated
def validate(csvFile: String, csvEncoding: Charset, csvSchemaFile: String, csvSchemaEncoding: Charset, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean): JList[FailMessage] =
validate(csvFile, csvEncoding, true, csvSchemaFile, csvSchemaEncoding, false, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None, false, 4096)
validateTextFile(csvFile, csvEncoding, true, csvSchemaFile, csvSchemaEncoding, false, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None)

@deprecated
def validate(csvFile: String, csvEncoding: Charset, csvSchemaFile: String, csvSchemaEncoding: Charset, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: ProgressCallback): JList[FailMessage] = {
val sProgressCallback = new SProgressCallback {
override def update(complete: this.type#Percentage) = progress.update(complete)
}
validate(csvFile, csvEncoding, true, csvSchemaFile, csvSchemaEncoding, false, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback), false, 4096)
validateTextFile(csvFile, csvEncoding, true, csvSchemaFile, csvSchemaEncoding, false, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback))
}

@deprecated("use latest validate")
def validate(csvFile: String, csvEncoding: Charset, validateCsvEncoding: Boolean, csvSchemaFile: String, csvSchemaEncoding: Charset, validateCsvSchemaEncoding: Boolean, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean): JList[FailMessage] =
validate(csvFile, csvEncoding, validateCsvEncoding, csvSchemaFile, csvSchemaEncoding, validateCsvSchemaEncoding, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None, false, 4096)
validateTextFile(csvFile, csvEncoding, validateCsvEncoding, csvSchemaFile, csvSchemaEncoding, validateCsvSchemaEncoding, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None)

@deprecated("use latest validate")
def validate(csvFile: String, csvEncoding: Charset, validateCsvEncoding: Boolean, csvSchemaFile: String, csvSchemaEncoding: Charset, validateCsvSchemaEncoding: Boolean, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: ProgressCallback): JList[FailMessage] = {
val sProgressCallback = new SProgressCallback {
override def update(complete: this.type#Percentage) = progress.update(complete)
}
validate(csvFile, csvEncoding, validateCsvEncoding, csvSchemaFile, csvSchemaEncoding, validateCsvSchemaEncoding, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback), false, 4096)
validateTextFile(csvFile, csvEncoding, validateCsvEncoding, csvSchemaFile, csvSchemaEncoding, validateCsvSchemaEncoding, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback))
}

private def validate(csvFile: String, csvEncoding: Charset, validateCsvEncoding: Boolean, csvSchemaFile: String, csvSchemaEncoding: Charset, validateCsvSchemaEncoding: Boolean, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: Option[SProgressCallback], skipFileChecks: Boolean, maxCharsPerCellLimit: Int): JList[FailMessage] = {
def validate(csvFile: String, csvEncoding: Charset, validateCsvEncoding: Boolean, csvSchemaFile: String, csvSchemaEncoding: Charset, validateCsvSchemaEncoding: Boolean, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: ProgressCallback, skipFileChecks: Boolean, maxCharsPerCellLimit: Int): JList[FailMessage] = {
val potentialSProgressCallback = Option(progress).map(progress => new SProgressCallback {
override def update(complete: this.type#Percentage) = progress.update(complete)
})
validateTextFile(csvFile, csvEncoding, validateCsvEncoding, csvSchemaFile, csvSchemaEncoding, validateCsvSchemaEncoding, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, potentialSProgressCallback, skipFileChecks, maxCharsPerCellLimit)
}

private def validateTextFile(csvFile: String, csvEncoding: Charset, validateCsvEncoding: Boolean, csvSchemaFile: String, csvSchemaEncoding: Charset, validateCsvSchemaEncoding: Boolean, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: Option[SProgressCallback], skipFileChecks: Boolean=false, maxCharsPerCellLimit: Int=4096): JList[FailMessage] = {

import scala.jdk.CollectionConverters._

Expand Down Expand Up @@ -78,17 +87,26 @@ object CsvValidatorJavaBridge {
}
}

@deprecated("use latest validate")
def validate(csvData: JReader, csvSchema: JReader, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean): JList[FailMessage] =
validate(csvData, csvSchema, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None)
validateReader(csvData, csvSchema, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, None)

@deprecated("use latest validate")
def validate(csvData: JReader, csvSchema: JReader, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: ProgressCallback): JList[FailMessage] = {
val sProgressCallback = new SProgressCallback {
override def update(complete: this.type#Percentage) = progress.update(complete)
}
validate(csvData, csvSchema, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback))
validateReader(csvData, csvSchema, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, Some(sProgressCallback))
}

def validate(csvData: JReader, csvSchema: JReader, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: ProgressCallback, skipFileChecks: Boolean, maxCharsPerCellLimit: Int): JList[FailMessage] = {
val potentialSProgressCallback = if(progress == null) None else Some(new SProgressCallback {
override def update(complete: this.type#Percentage) = progress.update(complete)
})
validateReader(csvData, csvSchema, failFast, pathSubstitutionsList, enforceCaseSensitivePathChecks, trace, potentialSProgressCallback, skipFileChecks, maxCharsPerCellLimit)
}

private def validate(csvData: JReader, csvSchema: JReader, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: Option[SProgressCallback], skipFileChecks: Boolean = false, maxCharsPerCell: Int = 4096): JList[FailMessage] = {
private def validateReader(csvData: JReader, csvSchema: JReader, failFast: Boolean, pathSubstitutionsList: JList[Substitution], enforceCaseSensitivePathChecks: Boolean, trace: Boolean, progress: Option[SProgressCallback], skipFileChecks: Boolean = false, maxCharsPerCell: Int = 4096): JList[FailMessage] = {

import scala.jdk.CollectionConverters._

Expand Down

0 comments on commit dfc925c

Please sign in to comment.