diff --git a/droid-api/pom.xml b/droid-api/pom.xml index 20f117c4b..5a618566c 100644 --- a/droid-api/pom.xml +++ b/droid-api/pom.xml @@ -134,10 +134,26 @@ junit test - - org.hamcrest - hamcrest - test - + + jakarta.xml.bind + jakarta.xml.bind-api + test + + + xml-apis + xml-apis + test + + + org.apache.poi + poi + 5.2.5 + test + + + org.hamcrest + hamcrest + test + diff --git a/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/ContainerApi.java b/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/ContainerApi.java index c56eab86c..4303d67de 100644 --- a/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/ContainerApi.java +++ b/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/ContainerApi.java @@ -34,9 +34,9 @@ import java.io.InputStream; import java.nio.file.Path; -import uk.gov.nationalarchives.droid.container.ContainerFileIdentificationRequestFactory; -import uk.gov.nationalarchives.droid.container.ContainerSignatureFileReader; -import uk.gov.nationalarchives.droid.container.IdentifierEngine; +import uk.gov.nationalarchives.droid.container.*; +import uk.gov.nationalarchives.droid.container.gz.GzIdentifier; +import uk.gov.nationalarchives.droid.container.gz.GzIdentifierEngine; import uk.gov.nationalarchives.droid.container.ole2.Ole2Identifier; import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine; import uk.gov.nationalarchives.droid.container.zip.ZipIdentifier; @@ -70,12 +70,24 @@ private IdentificationRequestFactory requestFactory() { return new ContainerFileIdentificationRequestFactory(); } - private IdentifierEngine identifierEngine() { + private IdentifierEngine zipIdentifierEngine() { ZipIdentifierEngine engine = new ZipIdentifierEngine(); engine.setRequestFactory(requestFactory()); return engine; } + private IdentifierEngine gzIdentifierEngine() { + GzIdentifierEngine engine = new GzIdentifierEngine(); + engine.setRequestFactory(requestFactory()); + return engine; + } + + private Ole2IdentifierEngine ole2IdentifierEngine() { + Ole2IdentifierEngine engine = new Ole2IdentifierEngine(); + engine.setRequestFactory(requestFactory()); + return engine; + } + private ArchiveFormatResolver archiveFormatResolver() { return new ArchiveFormatResolverImpl(); } @@ -84,42 +96,29 @@ private ContainerIdentifierFactory identifierFactory() { return new ContainerIdentifierFactoryImpl(); } - public ZipIdentifier zipIdentifier() { - ZipIdentifier zip = new ZipIdentifier(); - zip.setContainerType("ZIP"); - zip.setContainerIdentifierFactory(identifierFactory()); - zip.setContainerFormatResolver(archiveFormatResolver()); - zip.setDroidCore(droid); - zip.setIdentifierEngine(identifierEngine()); - zip.setSignatureReader(signatureReader()); - - try { - zip.init(); - } catch (SignatureFileException e) { - throw new RuntimeException("Unable to init zip identifier", e); - } - return zip; + public GzIdentifier gzIdentifier() { + return initialiseContainerIdentifier(new GzIdentifier(), gzIdentifierEngine()); } - private Ole2IdentifierEngine ole2IdentifierEngine() { - Ole2IdentifierEngine engine = new Ole2IdentifierEngine(); - engine.setRequestFactory(requestFactory()); - return engine; + public ZipIdentifier zipIdentifier() { + return initialiseContainerIdentifier(new ZipIdentifier(), zipIdentifierEngine()); } public Ole2Identifier ole2Identifier() { - Ole2Identifier ole2 = new Ole2Identifier(); - ole2.setContainerType("OLE2"); - ole2.setContainerIdentifierFactory(identifierFactory()); - ole2.setContainerFormatResolver(archiveFormatResolver()); - ole2.setDroidCore(droid); - ole2.setIdentifierEngine(ole2IdentifierEngine()); - ole2.setSignatureReader(signatureReader()); + return initialiseContainerIdentifier(new Ole2Identifier(), ole2IdentifierEngine()); + } + + private T initialiseContainerIdentifier(T containerIdentifier, U identifierEngine) { + containerIdentifier.setContainerIdentifierFactory(identifierFactory()); + containerIdentifier.setContainerFormatResolver(archiveFormatResolver()); + containerIdentifier.setDroidCore(droid); + containerIdentifier.setIdentifierEngine(identifierEngine); + containerIdentifier.setSignatureReader(signatureReader()); try { - ole2.init(); + containerIdentifier.init(); } catch (SignatureFileException ex) { - throw new RuntimeException("Unable to init Ole2Identifier", ex); + throw new RuntimeException("Unable to init " + containerIdentifier.getClass().getSimpleName(), ex); } - return ole2; + return containerIdentifier; } } diff --git a/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/DroidAPI.java b/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/DroidAPI.java index a832794b4..5c5c7fa30 100644 --- a/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/DroidAPI.java +++ b/droid-api/src/main/java/uk/gov/nationalarchives/droid/internal/api/DroidAPI.java @@ -34,6 +34,7 @@ import java.nio.file.Files; import java.io.IOException; import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.ResourceBundle; @@ -73,6 +74,7 @@ public final class DroidAPI { private static final String ZIP_PUID = "x-fmt/263"; private static final String OLE2_PUID = "fmt/111"; + private static final String GZIP_PUID = "x-fmt/266"; private static final AtomicLong ID_GENERATOR = new AtomicLong(); @@ -82,16 +84,19 @@ public final class DroidAPI { private final ContainerIdentifier ole2Identifier; + private final ContainerIdentifier gzIdentifier; + private final String containerSignatureVersion; private final String binarySignatureVersion; private final String droidVersion; - private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion) { + private DroidAPI(DroidCore droidCore, ContainerIdentifier zipIdentifier, ContainerIdentifier ole2Identifier, ContainerIdentifier gzIdentifier, String containerSignatureVersion, String binarySignatureVersion, String droidVersion) { this.droidCore = droidCore; this.zipIdentifier = zipIdentifier; this.ole2Identifier = ole2Identifier; + this.gzIdentifier = gzIdentifier; this.containerSignatureVersion = containerSignatureVersion; this.binarySignatureVersion = binarySignatureVersion; this.droidVersion = droidVersion; @@ -114,7 +119,7 @@ public static DroidAPI getInstance(final Path binarySignature, final Path contai String containerVersion = StringUtils.substringAfterLast(containerSignature.getFileName().toString(), "-").split("\\.")[0]; String droidVersion = ResourceBundle.getBundle("options").getString("version_no"); ContainerApi containerApi = new ContainerApi(droidCore, containerSignature); - return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion); + return new DroidAPI(droidCore, containerApi.zipIdentifier(), containerApi.ole2Identifier(), containerApi.gzIdentifier(), containerVersion, droidCore.getSigFile().getVersion(), droidVersion); } /** @@ -180,25 +185,20 @@ private IdentificationResultCollection identifyByExtension(final FileSystemIdent } private Optional getContainerPuid(final IdentificationResultCollection binaryResult) { - return binaryResult.getResults().stream().filter(x -> - ZIP_PUID.equals(x.getPuid()) || OLE2_PUID.equals(x.getPuid()) - ).map(IdentificationResult::getPuid).findFirst(); + List containerPuids = Arrays.asList(ZIP_PUID, OLE2_PUID, GZIP_PUID); + return binaryResult.getResults().stream() + .map(IdentificationResult::getPuid) + .filter(containerPuids::contains).findFirst(); } private IdentificationResultCollection handleContainer(final IdentificationResultCollection binaryResult, final FileSystemIdentificationRequest identificationRequest, final String containerPuid) throws IOException { - ContainerIdentifier identifier; - - switch (containerPuid) { - case ZIP_PUID: - identifier = zipIdentifier; - break; - case OLE2_PUID: - identifier = ole2Identifier; - break; - default: - throw new RuntimeException("Unknown container PUID : " + containerPuid); - } + ContainerIdentifier identifier = switch (containerPuid) { + case ZIP_PUID -> zipIdentifier; + case OLE2_PUID -> ole2Identifier; + case GZIP_PUID -> gzIdentifier; + default -> throw new RuntimeException("Unknown container PUID : " + containerPuid); + }; IdentificationResultCollection containerResults = identifier.submit(identificationRequest); droidCore.removeLowerPriorityHits(containerResults); diff --git a/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITest.java b/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITest.java index 6735c3883..ee4957d9a 100644 --- a/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITest.java +++ b/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITest.java @@ -35,10 +35,12 @@ import org.junit.Test; import uk.gov.nationalarchives.droid.core.SignatureParseException; import uk.gov.nationalarchives.droid.core.interfaces.IdentificationMethod; - +import uk.gov.nationalarchives.droid.internal.api.DroidAPITestUtils.ContainerType; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.Optional; import java.util.ResourceBundle; import java.util.stream.Collectors; @@ -47,6 +49,7 @@ import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; +import static uk.gov.nationalarchives.droid.internal.api.DroidAPITestUtils.*; public class DroidAPITest { @@ -62,6 +65,66 @@ public void should_create_non_null_instance_using_test_utility_class() { assertThat(api, is(notNullValue())); } + @Test + public void should_match_gzip_container_file() { + String data = "TEST"; + ContainerType containerType = new ContainerType("GZIP", generateId(),"x-fmt/266"); + DroidAPI api = DroidAPITestUtils.createApiForContainer(new DroidAPITestUtils.ContainerFile(containerType, data, "fmt/12345", Optional.empty())); + try { + List results = api.submit(DroidAPITestUtils.generateGzFile(data)); + assertThat(results, hasSize(1)); + assertThat(results.getFirst().getPuid(), is("fmt/12345")); + assertThat(results.getFirst().getMethod(), is(IdentificationMethod.CONTAINER)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + public void should_match_zip_container_file() { + String data = "TEST"; + ContainerType containerType = new ContainerType("ZIP", generateId(),"x-fmt/263"); + DroidAPI api = DroidAPITestUtils.createApiForContainer(new DroidAPITestUtils.ContainerFile(containerType, data, "fmt/12345", Optional.of(data))); + try { + List results = api.submit(DroidAPITestUtils.generateZipFile(data, data)); + assertThat(results, hasSize(1)); + assertThat(results.getFirst().getPuid(), is("fmt/12345")); + assertThat(results.getFirst().getMethod(), is(IdentificationMethod.CONTAINER)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + public void should_match_ole2_container_file() { + String data = "TEST"; + ContainerType containerType = new ContainerType("OLE2", generateId(),"fmt/111"); + DroidAPI api = DroidAPITestUtils.createApiForContainer(new DroidAPITestUtils.ContainerFile(containerType, data, "fmt/12345", Optional.of(data))); + try { + List results = api.submit(DroidAPITestUtils.generateOle2File(data, data)); + assertThat(results, hasSize(1)); + assertThat(results.getFirst().getPuid(), is("fmt/12345")); + assertThat(results.getFirst().getMethod(), is(IdentificationMethod.CONTAINER)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test(expected = IOException.class) + public void should_throw_an_exception_if_file_cannot_be_read() throws IOException { + api.submit(Path.of("/invalidpath")); + } + + @Test(expected = RuntimeException.class) + public void should_throw_an_exception_if_container_file_cannot_be_read() throws SignatureParseException { + DroidAPI.getInstance(signaturePath, Path.of("/invalidContainerPath")); + } + + @Test(expected = SignatureParseException.class) + public void should_throw_an_exception_if_signature_file_cannot_be_read() throws SignatureParseException { + DroidAPI.getInstance(Path.of("/invalidSignaturePath"), containerPath); + } + @Test public void should_identify_given_file_with_binary_signature() throws IOException { List results = api.submit( @@ -70,7 +133,7 @@ public void should_identify_given_file_with_binary_signature() throws IOExceptio assertThat(results.size(), is(1)); - ApiResult identificationResult = results.get(0); + ApiResult identificationResult = results.getFirst(); assertThat(identificationResult.getPuid(), is("x-fmt/263")); assertThat(identificationResult.getName(), is("ZIP Format")); @@ -85,7 +148,7 @@ public void should_identify_given_file_using_container_signature() throws IOExce assertThat(results.size(), is(1)); - ApiResult identificationResult = results.get(0); + ApiResult identificationResult = results.getFirst(); assertThat(identificationResult.getPuid(), is("fmt/291")); assertThat(identificationResult.getName(), is("OpenDocument Text")); @@ -98,7 +161,7 @@ public void should_identify_given_file_using_file_extension() throws IOException assertThat(results, is(notNullValue())); assertThat(results, hasSize(1)); - ApiResult singleResult = results.get(0); + ApiResult singleResult = results.getFirst(); assertThat(singleResult.getPuid(), is("x-fmt/111")); assertThat(singleResult.getMethod(), is(IdentificationMethod.EXTENSION)); @@ -109,8 +172,8 @@ public void should_report_extension_of_the_file_under_identification_test() thro List resultsWithExtension = api.submit(Paths.get("src/test/resources/test.txt")); List resultsWithoutExtension = api.submit(Paths.get("src/test/resources/word97")); - assertThat(resultsWithExtension.get(0).getExtension(), is("txt")); - assertThat(resultsWithoutExtension.get(0).getExtension(), is("")); + assertThat(resultsWithExtension.getFirst().getExtension(), is("txt")); + assertThat(resultsWithoutExtension.getFirst().getExtension(), is("")); } @Test @@ -127,8 +190,8 @@ public void should_report_all_puids_when_there_are_more_than_one_identification_ public void should_report_when_there_is_an_extension_mismatch() throws IOException { List results = api.submit(Paths.get("src/test/resources/docx-file-as-xls.xlsx")); assertThat(results.size(), is(1)); - assertThat(results.get(0).getPuid(), is("fmt/412")); - assertThat(results.get(0).isFileExtensionMismatch(), is(true)); + assertThat(results.getFirst().getPuid(), is("fmt/412")); + assertThat(results.getFirst().isFileExtensionMismatch(), is(true)); } @Test @@ -160,6 +223,6 @@ public void should_produce_results_for_every_time_a_file_is_submitted_for_identi public void should_identify_fmt_40_correctly_with_container_identification_method() throws IOException { List results = api.submit( Paths.get("../droid-container/src/test/resources/word97.doc")); - assertThat(results.get(0).getName(), is("Microsoft Word Document")); + assertThat(results.getFirst().getName(), is("Microsoft Word Document")); } } diff --git a/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITestUtils.java b/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITestUtils.java index 50585d368..63395760d 100644 --- a/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITestUtils.java +++ b/droid-api/src/test/java/uk/gov/nationalarchives/droid/internal/api/DroidAPITestUtils.java @@ -31,19 +31,286 @@ */ package uk.gov.nationalarchives.droid.internal.api; +import jakarta.xml.bind.JAXBException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.w3c.dom.Document; +import org.w3c.dom.Element; import uk.gov.nationalarchives.droid.core.SignatureParseException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import java.io.ByteArrayInputStream; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Optional; +import java.util.zip.GZIPOutputStream; +import java.util.zip.ZipOutputStream; +import java.util.zip.ZipEntry; /** * Class to create an instance of DroidAPI for testing purpose. * It makes use of hardcoded signature paths for current version */ public class DroidAPITestUtils { + static Path signaturePath = Paths.get("../droid-results/custom_home/signature_files/DROID_SignatureFile_V119.xml"); + static Path containerPath = Paths.get("../droid-results/custom_home/container_sigs/container-signature-20240715.xml"); + public static DroidAPI createApi() throws SignatureParseException { - Path signaturePath = Paths.get("../droid-results/custom_home/signature_files/DROID_SignatureFile_V119.xml"); - Path containerPath = Paths.get("../droid-results/custom_home/container_sigs/container-signature-20240715.xml"); return DroidAPI.getInstance(signaturePath, containerPath); //Create only once instance of Droid. } + + public record ContainerType(String name, String id, String puid) {} + public record ContainerFile(ContainerType containerType, String sequence, String puid, Optional path) {} + + public static String generateId() { + return Long.toString(Math.round(Math.random() * 1000)); + } + + private static Path generateFile(String extension) { + try { + return Files.createTempDirectory("test").resolve("test.%sm".formatted(extension)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static Path generateZipFile(String data, String fileName) { + Path testFile = generateFile("zip"); + + try (FileOutputStream fileOutputStream = new FileOutputStream(testFile.toFile()); + ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream)) { + + ZipEntry zipEntry = new ZipEntry(fileName); + zipOutputStream.putNextEntry(zipEntry); + + zipOutputStream.write(data.getBytes()); + zipOutputStream.closeEntry(); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return testFile; + } + + public static Path generateOle2File(String data, String entryName) { + Path testFile = generateFile("ole2"); + try (POIFSFileSystem fs = new POIFSFileSystem(); + FileOutputStream fos = new FileOutputStream(testFile.toFile())) { + + fs.createDocument(new ByteArrayInputStream(data.getBytes()), entryName); + + fs.writeFilesystem(fos); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return testFile; + } + + public static Path generateGzFile(String data) { + Path outputFilePath = generateFile("gz"); + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFilePath.toFile()); + GZIPOutputStream gzipOutputStream = new GZIPOutputStream(fileOutputStream)) { + gzipOutputStream.write(data.getBytes()); + } catch (IOException e) { + throw new RuntimeException(e); + } + return outputFilePath; + } + + public static DroidAPI createApiForContainer(ContainerFile signatureFile) { + try { + Path containerFilePath = generateContainerSignatureFile(signatureFile); + Path signatureFilePath = generateSignatureFile(signatureFile.puid, signatureFile.containerType); + return DroidAPI.getInstance(signatureFilePath, containerFilePath); + } catch (ParserConfigurationException | IOException | TransformerException | JAXBException | + SignatureParseException e) { + throw new RuntimeException(e); + } + } + + private static Path generateSignatureFile(String puid, ContainerType containerType) throws ParserConfigurationException, IOException, TransformerException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.newDocument(); + + Element root = doc.createElement("FFSignatureFile"); + doc.appendChild(root); + + Element fileFormatCollection = doc.createElement("FileFormatCollection"); + root.appendChild(fileFormatCollection); + + + Element internalSignatureCollection = createContainerInternalSignature(doc, containerType); + root.appendChild(internalSignatureCollection); + + Element fileFormat = doc.createElement("FileFormat"); + fileFormat.setAttribute("ID", generateId()); + fileFormat.setAttribute("PUID", puid); + + Element containerFormat = doc.createElement("FileFormat"); + containerFormat.setAttribute("ID", generateId()); + containerFormat.setAttribute("PUID", containerType.puid); + + Element internalSignatureId = doc.createElement("InternalSignatureID"); + internalSignatureId.setTextContent(containerType.id); + containerFormat.appendChild(internalSignatureId); + + fileFormatCollection.appendChild(fileFormat); + fileFormatCollection.appendChild(containerFormat); + + return getXmlFile(doc, "signatures"); + } + + private static Element createContainerInternalSignature(Document doc, ContainerType containerType) { + String sequenceText = switch (containerType.puid) { + case "x-fmt/266" -> "1F8B08"; + case "fmt/189" -> "5B436F6E74656E745F54797065735D2E786D6C20A2"; + case "x-fmt/263" -> "504B0304"; + case "fmt/111" -> "D0CF11E0A1B11AE1"; + default -> throw new RuntimeException("Unknown container type: " + containerType.name); + }; + + Element internalSignatureCollection = doc.createElement("InternalSignatureCollection"); + + Element internalSignature = doc.createElement("InternalSignature"); + internalSignature.setAttribute("ID", containerType.id); + + Element byteSequence = doc.createElement("ByteSequence"); + byteSequence.setAttribute("Reference", "BOFoffset"); + + Element subsequence = doc.createElement("SubSequence"); + subsequence.setAttribute("MinFragLength", "0"); + subsequence.setAttribute("Position", "1"); + subsequence.setAttribute("SubSeqMaxOffset", "0"); + subsequence.setAttribute("SubSeqMinOffset", "0"); + + Element sequence = doc.createElement("Sequence"); + sequence.setTextContent(sequenceText); + + subsequence.appendChild(sequence); + byteSequence.appendChild(subsequence); + internalSignature.appendChild(byteSequence); + internalSignatureCollection.appendChild(internalSignature); + + return internalSignatureCollection; + } + + private static Path generateContainerSignatureFile(ContainerFile signatureFile) throws JAXBException, IOException, TransformerException, ParserConfigurationException { + String signatureId = generateId(); + + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.newDocument(); + + // Root element + Element root = doc.createElement("ContainerSignatureMapping"); + root.setAttribute("schemaVersion", "1.0"); + root.setAttribute("signatureVersion", "38"); + doc.appendChild(root); + + // ContainerSignatures + Element containerSignatures = doc.createElement("ContainerSignatures"); + root.appendChild(containerSignatures); + + // ContainerSignature + Element containerSignature = doc.createElement("ContainerSignature"); + containerSignature.setAttribute("Id", signatureId); + containerSignature.setAttribute("ContainerType", signatureFile.containerType.name); + containerSignatures.appendChild(containerSignature); + + // Files + Element files = doc.createElement("Files"); + containerSignature.appendChild(files); + + // File + Element file = doc.createElement("File"); + files.appendChild(file); + + if (signatureFile.path.isPresent()) { + Element path = doc.createElement("Path"); + path.setTextContent(signatureFile.path.get()); + file.appendChild(path); + } + + // BinarySignatures + Element binarySignatures = doc.createElement("BinarySignatures"); + file.appendChild(binarySignatures); + + // InternalSignatureCollection + Element internalSignatureCollection = doc.createElement("InternalSignatureCollection"); + binarySignatures.appendChild(internalSignatureCollection); + + // InternalSignature + Element internalSignature = doc.createElement("InternalSignature"); + internalSignature.setAttribute("ID", signatureId); + internalSignatureCollection.appendChild(internalSignature); + + // ByteSequence + Element byteSequence = doc.createElement("ByteSequence"); + byteSequence.setAttribute("Reference", "BOFoffset"); + internalSignature.appendChild(byteSequence); + + // SubSequence + Element subSequence = doc.createElement("SubSequence"); + subSequence.setAttribute("Position", "1"); + subSequence.setAttribute("SubSeqMinOffset", "0"); + subSequence.setAttribute("SubSeqMaxOffset", "1024"); + byteSequence.appendChild(subSequence); + + // Sequence + Element sequence = doc.createElement("Sequence"); + sequence.setTextContent("'%s'".formatted(signatureFile.sequence)); + subSequence.appendChild(sequence); + + // FileFormatMappings + Element fileFormatMappings = doc.createElement("FileFormatMappings"); + root.appendChild(fileFormatMappings); + + // FileFormatMapping + Element fileFormatMapping = doc.createElement("FileFormatMapping"); + fileFormatMapping.setAttribute("signatureId", signatureId); + fileFormatMapping.setAttribute("Puid", signatureFile.puid); + fileFormatMappings.appendChild(fileFormatMapping); + + // TriggerPuids + Element triggerPuids = doc.createElement("TriggerPuids"); + root.appendChild(triggerPuids); + + // TriggerPuid + Element triggerPuid = doc.createElement("TriggerPuid"); + triggerPuid.setAttribute("ContainerType", signatureFile.containerType.name); + triggerPuid.setAttribute("Puid", signatureFile.containerType.puid); + triggerPuids.appendChild(triggerPuid); + + return getXmlFile(doc, "containers"); + } + + private static Path getXmlFile(Document doc, String fileType) throws IOException, TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + + Path containersDirectory = Files.createTempDirectory(fileType); + Path containersFile = containersDirectory.resolve(fileType + ".xml"); + DOMSource source = new DOMSource(doc); + FileWriter writer = new FileWriter(containersFile.toString()); + StreamResult result = new StreamResult(writer); + transformer.transform(source, result); + return containersFile; + } } diff --git a/droid-build-tools/src/main/resources/checkstyle-main.xml b/droid-build-tools/src/main/resources/checkstyle-main.xml index 4bd572999..e1bb8b181 100644 --- a/droid-build-tools/src/main/resources/checkstyle-main.xml +++ b/droid-build-tools/src/main/resources/checkstyle-main.xml @@ -53,7 +53,6 @@ - @@ -156,7 +155,7 @@ - + diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerFile.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerFile.java index 68b9a7d78..b982b2eae 100644 --- a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerFile.java +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerFile.java @@ -57,7 +57,7 @@ public class ContainerFile { @XmlTransient private boolean compileError; - @XmlElement(name = "Path") + @XmlElement(name = "Path", required = false) private String path; //@XmlElement(name = "TextSignature") diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerSignature.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerSignature.java index d55f7b521..594d96ce9 100644 --- a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerSignature.java +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ContainerSignature.java @@ -88,7 +88,8 @@ public Map getFiles() { if (this.filesMap == null) { Map containerFileMap = new HashMap(); for (ContainerFile file : files) { - containerFileMap.put(file.getPath(), file); + String path = file.getPath() == null ? "." : file.getPath(); + containerFileMap.put(path, file); } this.filesMap = containerFileMap; } diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifier.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifier.java new file mode 100644 index 000000000..e391a0f3e --- /dev/null +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifier.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, The National Archives + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the The National Archives nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package uk.gov.nationalarchives.droid.container.gz; + +import uk.gov.nationalarchives.droid.container.AbstractContainerIdentifier; +import uk.gov.nationalarchives.droid.container.ContainerSignatureMatchCollection; +import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest; + +import java.io.IOException; + +public class GzIdentifier extends AbstractContainerIdentifier { + + public GzIdentifier() { + setIdentifierEngine(new GzIdentifierEngine()); + setContainerType("GZIP"); + } + + @Override + protected void process(IdentificationRequest request, ContainerSignatureMatchCollection matches) throws IOException { + getIdentifierEngine().process(request, matches); + } +} diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifierEngine.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifierEngine.java new file mode 100644 index 000000000..9e60b9f09 --- /dev/null +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/gz/GzIdentifierEngine.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, The National Archives + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the The National Archives nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package uk.gov.nationalarchives.droid.container.gz; + +import uk.gov.nationalarchives.droid.container.AbstractIdentifierEngine; +import uk.gov.nationalarchives.droid.container.ContainerSignatureMatch; +import uk.gov.nationalarchives.droid.container.ContainerSignatureMatchCollection; +import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest; +import uk.gov.nationalarchives.droid.core.signature.ByteReader; + +import java.io.IOException; +import java.util.List; +import java.util.zip.GZIPInputStream; + +public class GzIdentifierEngine extends AbstractIdentifierEngine { + + @Override + public void process(IdentificationRequest request, ContainerSignatureMatchCollection matches) throws IOException { + for (String entryName: matches.getAllFileEntries()) { + try (GZIPInputStream stream = new GZIPInputStream(request.getSourceInputStream()); ByteReader reader = newByteReader(stream)) { + List matchList = + matches.getContainerSignatureMatches(); + for (ContainerSignatureMatch match : matchList) { + match.matchBinaryContent(entryName, reader); + } + } + } + } +} diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ole2/Ole2Identifier.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ole2/Ole2Identifier.java index d4ec994e9..bf8d13211 100644 --- a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ole2/Ole2Identifier.java +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/ole2/Ole2Identifier.java @@ -48,6 +48,7 @@ public class Ole2Identifier extends AbstractContainerIdentifier { */ public Ole2Identifier() { setIdentifierEngine(new Ole2IdentifierEngine()); + setContainerType("OLE2"); } @Override diff --git a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/zip/ZipIdentifier.java b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/zip/ZipIdentifier.java index 9ae1ea2dd..038d9493d 100644 --- a/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/zip/ZipIdentifier.java +++ b/droid-container/src/main/java/uk/gov/nationalarchives/droid/container/zip/ZipIdentifier.java @@ -48,6 +48,7 @@ public class ZipIdentifier extends AbstractContainerIdentifier { */ public ZipIdentifier() { setIdentifierEngine(new ZipIdentifierEngine()); + setContainerType("ZIP"); } @Override diff --git a/droid-container/src/test/java/uk/gov/nationalarchives/droid/container/ContainerSignatureSaxParserTest.java b/droid-container/src/test/java/uk/gov/nationalarchives/droid/container/ContainerSignatureSaxParserTest.java index 1cd449e3d..3218cd447 100644 --- a/droid-container/src/test/java/uk/gov/nationalarchives/droid/container/ContainerSignatureSaxParserTest.java +++ b/droid-container/src/test/java/uk/gov/nationalarchives/droid/container/ContainerSignatureSaxParserTest.java @@ -34,6 +34,7 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Optional; @@ -52,44 +53,74 @@ */ public class ContainerSignatureSaxParserTest { + @Test + public void testParseSignatureNoPath() throws Exception { + String xml = + """ + + + + + + + + + + + """; + ContainerSignatureSaxParser parser = new ContainerSignatureSaxParser(); + InputStream in = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)); + + List signatures = parser.parse(in).getContainerSignatures(); + + assertEquals(1, signatures.size()); + + ContainerSignature signatureNoPath = signatures.get(0); + + Map files = signatureNoPath.getFiles(); + assertEquals(1, files.size()); + assertTrue(files.containsKey(".")); + } + @Test public void testParseSignatures() throws Exception { - - String xml = - "" - + "" - + " " - + " " - + " Microsoft Word 97" - + " " - + " " - + " WordDocument" - + " " - + " " - + " " - + " " - + " Microsoft Excel 97" - + " " - + " " - + " Workbook" - + " " - + " " - + " " - + " " - + ""; - + + String xml = """ + + + + + Microsoft Word 97 + + + WordDocument + + + + + Microsoft Excel 97 + + + Workbook + + + + + + """; + ContainerSignatureSaxParser parser = new ContainerSignatureSaxParser(); InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); - + List signatures = parser.parse(in).getContainerSignatures(); assertEquals(2, signatures.size()); - + ContainerSignature wordSignature = signatures.get(0); assertEquals("Microsoft Word 97", wordSignature.getDescription()); assertEquals(9, wordSignature.getId()); assertEquals(1, wordSignature.getFiles().size()); assertEquals("WordDocument", wordSignature.listFiles().get(0).getPath()); - + ContainerSignature excelSignature = signatures.get(1); assertEquals("Microsoft Excel 97", excelSignature.getDescription()); assertEquals(10, excelSignature.getId()); @@ -100,38 +131,40 @@ public void testParseSignatures() throws Exception { @Test public void folderBasedContainerSignaturesShouldKeepThePathsAsPresentedInSignatureFileForContainerFileMap() throws JAXBException, UnsupportedEncodingException, SignatureParseException { String xml = - "" - + "" - + " " - + " " - + " SIARD 2.1" - + " " - + " " - + " header/siardversion/2.1/" - + " " - + " " - + " " - + " " - + " SIARD 2.0" - + " " - + " " - + " header/metadata.xml" - + " " - + " " - + " " - + " " - + " " - + " 'xmlns=\"http://www.bar.admin.ch/xmlns/siard/2.0/metadata.xsd\"'" - + " " - + " " - + " " - + " " - + " " - + " " - + " " - + " " - + " " - + ""; + """ + + + + + SIARD 2.1 + + + header/siardversion/2.1/ + + + + + SIARD 2.0 + + + header/metadata.xml + + + + + + 'xmlns="http://www.bar.admin.ch/xmlns/siard/2.0/metadata.xsd"' + + + + + + + + + + + """; ContainerSignatureSaxParser parser = new ContainerSignatureSaxParser(); InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); List signatures = parser.parse(in).getContainerSignatures(); diff --git a/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveFormatResolverImplTest.java b/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveFormatResolverImplTest.java index 90074f6fa..614a12c09 100644 --- a/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveFormatResolverImplTest.java +++ b/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveFormatResolverImplTest.java @@ -65,7 +65,7 @@ public class ArchiveFormatResolverImplTest { public void testForPuid() { assertEquals("ZIP", formatResolver.forPuid("x-fmt/263")); assertEquals("TAR", formatResolver.forPuid("x-fmt/265")); - assertEquals("GZ", formatResolver.forPuid("x-fmt/266")); + assertEquals("GZIP", formatResolver.forPuid("x-fmt/266")); assertNull(formatResolver.forPuid("")); } } diff --git a/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveHandlerFactoryTest.java b/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveHandlerFactoryTest.java index 215074c51..7713d3b94 100644 --- a/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveHandlerFactoryTest.java +++ b/droid-core-interfaces/src/test/java/uk/gov/nationalarchives/droid/core/interfaces/archive/ArchiveHandlerFactoryTest.java @@ -72,7 +72,7 @@ public void setup() { handlers.put("ZIP", zipHandler); handlers.put("TAR", tarHandler); - handlers.put("GZ", gzHandler); + handlers.put("GZIP", gzHandler); handlers.put("ARC", arcHandler); handlers.put("BZ", bzHandler); @@ -87,7 +87,7 @@ public void testGetEachTypeOfHandler() { assertEquals(zipHandler, factory.getHandler("ZIP")); assertEquals(tarHandler, factory.getHandler("TAR")); - assertEquals(gzHandler, factory.getHandler("GZ")); + assertEquals(gzHandler, factory.getHandler("GZIP")); assertEquals(arcHandler, factory.getHandler("ARC")); assertEquals(bzHandler, factory.getHandler("BZ")); assertEquals(sevenZipHandler, factory.getHandler("7Z")); diff --git a/droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/ByteReader.java b/droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/ByteReader.java index 45b94ec23..2fa79d474 100644 --- a/droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/ByteReader.java +++ b/droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/ByteReader.java @@ -78,7 +78,7 @@ * * @author linb, boreilly */ -public interface ByteReader { +public interface ByteReader extends AutoCloseable { /* Setters for identification status */ /** diff --git a/droid-results/src/main/java/uk/gov/nationalarchives/droid/submitter/SubmissionGateway.java b/droid-results/src/main/java/uk/gov/nationalarchives/droid/submitter/SubmissionGateway.java index ba3fb20c9..9242cab99 100644 --- a/droid-results/src/main/java/uk/gov/nationalarchives/droid/submitter/SubmissionGateway.java +++ b/droid-results/src/main/java/uk/gov/nationalarchives/droid/submitter/SubmissionGateway.java @@ -348,7 +348,7 @@ private String getArchiveFormat(IdentificationResultCollection results) { private boolean isProcessedArchiveOrWebArchiveFormat(String format) { return "ZIP".equals(format) && !processZip || "TAR".equals(format) && !processTar - || "GZ".equals(format) && !processGzip + || "GZIP".equals(format) && !processGzip || "RAR".equals(format) && !processRar || "7Z".equals(format) && !process7zip || "ISO".equals(format) && !processIso diff --git a/droid-results/src/main/resources/META-INF/spring-results.xml b/droid-results/src/main/resources/META-INF/spring-results.xml index e956c534d..5522ff148 100644 --- a/droid-results/src/main/resources/META-INF/spring-results.xml +++ b/droid-results/src/main/resources/META-INF/spring-results.xml @@ -70,6 +70,10 @@ http://www.springframework.org/schema/context http://www.springframework.org/sch + + + + @@ -83,6 +87,15 @@ http://www.springframework.org/schema/context http://www.springframework.org/sch + + + + + + + + + diff --git a/droid-tools/src/main/java/uk/gov/nationalarchives/droid/tools/SigUtils.java b/droid-tools/src/main/java/uk/gov/nationalarchives/droid/tools/SigUtils.java index 8de649395..ccf3b0431 100644 --- a/droid-tools/src/main/java/uk/gov/nationalarchives/droid/tools/SigUtils.java +++ b/droid-tools/src/main/java/uk/gov/nationalarchives/droid/tools/SigUtils.java @@ -64,6 +64,7 @@ import uk.gov.nationalarchives.droid.container.ContainerSignatureMatchCollection; import uk.gov.nationalarchives.droid.container.ContainerSignatureSaxParser; import uk.gov.nationalarchives.droid.container.IdentifierEngine; +import uk.gov.nationalarchives.droid.container.gz.GzIdentifierEngine; import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine; import uk.gov.nationalarchives.droid.container.zip.ZipIdentifierEngine; import uk.gov.nationalarchives.droid.core.IdentificationRequestByteReaderAdapter; @@ -111,6 +112,7 @@ public final class SigUtils { private static final String IO_EXCEPTION_PROCESSING = "IO exception processing: "; private static final int ZIP_SIG_ID = 200; + private static final int GZ_SIG_ID = 201; private static final int OLE2_SIG_ID = 170; static { @@ -128,6 +130,7 @@ public final class SigUtils { } private static final IdentifierEngine ZIP_IDENTIFIER_ENGINE = new ZipIdentifierEngine(); + private static final IdentifierEngine GZ_IDENTIFIER_ENGINE = new GzIdentifierEngine(); private static final IdentifierEngine OLE2_IDENTIFIER_ENGINE = new Ole2IdentifierEngine(); /** @@ -660,9 +663,9 @@ public static ContainerSignatureMatchCollection getContainerMatchCollection(Cont } /** - * Returns an identification engine (zip or ole2) for a file, or null if it can' be recognised. + * Returns an identification engine (zip, ole2 or gzip) for a file, or null if it can't be recognised. * @param filename The filename to get an identifier engine for. - * @return an identification engine (zip or ole2) for a file, or null if it can' be recognised. + * @return an identification engine (zip, ole2 or gzip) for a file, or null if it can't be recognised. * @throws IOException If an IO problem happens reading the file. */ public static IdentifierEngine getContainerIdentifierEngine(String filename) throws IOException { @@ -679,6 +682,10 @@ public static IdentifierEngine getContainerIdentifierEngine(String filename) thr engine = OLE2_IDENTIFIER_ENGINE; break; } + case GZ_SIG_ID: { + engine = GZ_IDENTIFIER_ENGINE; + break; + } default : engine = null; } }