Skip to content

Commit

Permalink
Fix the JSON output.
Browse files Browse the repository at this point in the history
As it was, it was only outputting the last file if there was more than
one in the profile.

The problem with it is that you need to have a [ and ] at the beginning
and end to make it valid json and the objects with commas in between
which is hard to incrementally like we're doing with the csv output.

So I've changed this so that the json array is kept in memory and then
written once everything has finished processing. This is working.

It's possible that someone will have such a huge number of files that
you run out of memory. If that happens, I'll think of something else.
  • Loading branch information
MancunianSam committed Mar 5, 2025
1 parent baeecf2 commit 8f3ab01
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,9 @@ public interface ItemWriter<T> {
*/
void setExportTemplate(ExportTemplate template);

/**
* Writes the json output to the Writer.
*/
void writeJson();

}
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ public void onItem(List<? extends ProfileResourceNode> itemChunk)
cancelled = true;
} finally {
log.info(String.format("Closing export file: %s", destinationDescription));
itemWriter.writeJson();
itemWriter.close();
if (cancelled && destination != null) {
final Path toDelete = Paths.get(destination);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public class ItemWriterImpl implements ItemWriter<ProfileResourceNode> {

private CsvWriter csvWriter;
private Writer writer;
private FormattedDataWriter.OutputJson outputJson;
private ExportOptions options = ExportOptions.ONE_ROW_PER_FILE;
private ExportOutputOptions outputOptions;

Expand Down Expand Up @@ -121,15 +122,15 @@ public void write(List<? extends ProfileResourceNode> nodes) {
switch (options) {
case ONE_ROW_PER_FILE: {
if (outputOptions == ExportOutputOptions.JSON_OUTPUT) {
dataWriter.writeJsonForOneRowPerFile(nodes, allHeaders, this.writer);
dataWriter.writeJsonForOneRowPerFile(nodes, allHeaders, this.outputJson);
} else {
writeOneRowPerFile(nodes, dataWriter);
}
break;
}
case ONE_ROW_PER_FORMAT: {
if (outputOptions == ExportOutputOptions.JSON_OUTPUT) {
dataWriter.writeJsonForOneRowPerFormat(nodes, allHeaders, this.writer);
dataWriter.writeJsonForOneRowPerFormat(nodes, allHeaders, this.outputJson);
} else {
writeOneRowPerFormat(nodes, dataWriter);
}
Expand Down Expand Up @@ -179,6 +180,7 @@ void setCsvWriter(CsvWriter csvWriter) {
@Override
public void open(final Writer outputWriter) {
this.writer = outputWriter;
this.outputJson = new FormattedDataWriter.OutputJson(outputWriter);
final CsvWriterSettings csvWriterSettings = new CsvWriterSettings();
csvWriterSettings.setQuoteAllFields(quoteAllFields);
CsvFormat format = new CsvFormat();
Expand Down Expand Up @@ -291,6 +293,13 @@ public void setExportTemplate(ExportTemplate template) {
this.exportTemplate = template;
}

@Override
public void writeJson() {
if (outputOptions == ExportOutputOptions.JSON_OUTPUT) {
this.outputJson.writeJson();
}
}

private Set<String> getColumnsToWrite(String columnNames) {
if (columnNames != null && !columnNames.isEmpty()) {
String[] columns = columnNames.split(BLANK_SPACE_DELIMITER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
import uk.gov.nationalarchives.droid.profile.ProfileResourceNode;
import uk.gov.nationalarchives.droid.profile.referencedata.Format;

import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -76,8 +75,7 @@ public void writeDataRowsForOneRowPerFile(List<? extends ProfileResourceNode> no
}

@Override
public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer) {
OutputJson outputJson = new OutputJson();
public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson) {
int maxIdCount = getMaxIdentificationCount(nodes);
String hashHeader = headers[WriterConstants.HASH_ARRAY_INDEX];
for (ProfileResourceNode node : nodes) {
Expand All @@ -93,7 +91,7 @@ public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes,
}
outputJson.getArrayNode().add(objectNode);
}
outputJson.writeJson(writer);


}

Expand All @@ -114,8 +112,7 @@ public void writeDataRowsForOneRowPerFormat(List<? extends ProfileResourceNode>
}

@Override
public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer) {
OutputJson outputJson = new OutputJson();
public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson) {
String hashHeader = headers[WriterConstants.HASH_ARRAY_INDEX];
for (ProfileResourceNode node : nodes) {
for (Format format : node.getFormatIdentifications()) {
Expand All @@ -129,7 +126,6 @@ public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> node
outputJson.getArrayNode().add(objectNode);
}
}
outputJson.writeJson(writer);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ public abstract class FormattedDataWriter {

public abstract void writeHeadersForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, CsvWriter csvWriter);
public abstract void writeDataRowsForOneRowPerFile(List<? extends ProfileResourceNode> nodes, CsvWriter csvWriter);
public abstract void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer);
public abstract void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson);
public abstract void writeHeadersForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, CsvWriter csvWriter);
public abstract void writeDataRowsForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, CsvWriter csvWriter);
public abstract void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer);
public abstract void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson);

protected static String nullSafeName(Enum<?> value) {
return value == null ? WriterConstants.EMPTY_STRING : value.toString();
Expand Down Expand Up @@ -127,18 +127,20 @@ protected String[] getCustomisedHeaders() {
return this.customisedHeaders;
}

protected static class OutputJson {
public static class OutputJson {
private final ArrayNode arrayNode;
private final ObjectMapper objectMapper;
private final Writer writer;

public OutputJson() {
public OutputJson(Writer writer) {
this.objectMapper = new ObjectMapper();
this.arrayNode = objectMapper.createArrayNode();
this.writer = writer;
}

public void writeJson(Writer writer) {
public void writeJson() {
try {
objectMapper.writeValue(writer, arrayNode);
writer.write(objectMapper.writeValueAsString(arrayNode));
} catch (IOException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import uk.gov.nationalarchives.droid.profile.ProfileResourceNode;
import uk.gov.nationalarchives.droid.profile.referencedata.Format;

import java.io.Writer;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -69,9 +68,7 @@ public void writeDataRowsForOneRowPerFile(List<? extends ProfileResourceNode> no
}

@Override
public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer) {
OutputJson outputJson = new OutputJson();

public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson) {
int maxIdCount = getMaxIdentificationCount(nodes);
List<String> headersToWrite = getHeadersToWrite(maxIdCount);
Map<Integer, ExportTemplateColumnDef> columnPositions = template.getColumnOrderMap();
Expand All @@ -84,7 +81,6 @@ public void writeJsonForOneRowPerFile(List<? extends ProfileResourceNode> nodes,
}
outputJson.getArrayNode().add(rowNode);
}
outputJson.writeJson(writer);

}

Expand All @@ -102,9 +98,8 @@ public void writeDataRowsForOneRowPerFormat(List<? extends ProfileResourceNode>
}

@Override
public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, Writer writer) {
public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> nodes, String[] headers, OutputJson outputJson) {
super.setCustomisedHeaders(headers);
OutputJson outputJson = new OutputJson();
int maxIdCount = 1;
List<String> headersToWrite = getHeadersToWrite(maxIdCount);
Map<Integer, ExportTemplateColumnDef> columnPositions = template.getColumnOrderMap();
Expand All @@ -119,7 +114,6 @@ public void writeJsonForOneRowPerFormat(List<? extends ProfileResourceNode> node
outputJson.getArrayNode().add(rowNode);
}
}
outputJson.writeJson(writer);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ public synchronized void setFilter(Filter filter) {
@Override
public synchronized void commit() {
try {
itemWriter.writeJson();
writer.flush();
} catch (IOException e) {
LOG.error("Error flushing writer: " + e.getMessage(), e);
Expand Down

0 comments on commit 8f3ab01

Please sign in to comment.