Skip to content

Commit

Permalink
Moved some utility classes from dd-dataverse-ingest to this lib
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Dec 3, 2024
1 parent 477bba1 commit 61827db
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<relativePath />
</parent>
<artifactId>dans-java-utils</artifactId>
<version>1.2.3-SNAPSHOT</version>
<version>1.3.0-SNAPSHOT</version>
<name>DANS Java Utility Classes</name>
<inceptionYear>2021</inceptionYear>
<scm>
Expand Down Expand Up @@ -87,6 +87,11 @@
<artifactId>commons-lang3</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<scope>provided</scope>
</dependency>

<!-- for testing -->
<dependency>
Expand Down
62 changes: 62 additions & 0 deletions src/main/java/nl/knaw/dans/lib/util/MappingLoader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (C) 2021 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.lib.util;

import lombok.Builder;
import lombok.NonNull;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;

/**
* Loads a mapping from a CSV file.
*/
@Builder
public class MappingLoader {
/**
* The path to the CSV file to load the mapping from.
*/
@NonNull
private final Path csvFile;
/**
* The name of the column in the CSV file that contains the keys.
*/
@NonNull
private final String keyColumn;
/**
* The name of the column in the CSV file that contains the values.
*/
@NonNull
private final String valueColumn;

public Map<String, String> load() throws IOException {
try (CSVParser parser = CSVParser.parse(csvFile.toFile(), StandardCharsets.UTF_8, CSVFormat.RFC4180.builder().setHeader().setSkipHeaderRecord(true).build())) {
HashMap<String, String> result = new HashMap<>();

for (CSVRecord record : parser) {
result.put(record.get(keyColumn), record.get(valueColumn));
}

return result;
}
}
}
47 changes: 47 additions & 0 deletions src/main/java/nl/knaw/dans/lib/util/PathIterator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (C) 2021 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.lib.util;

import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;

import java.io.File;
import java.nio.file.Path;
import java.util.Iterator;

/**
* Turns an iterator of Files into an iterator of Paths and provides a count of the number of iterated paths so far.
*/
@RequiredArgsConstructor
public class PathIterator implements Iterator<Path> {
@NonNull
private final Iterator<File> fileIterator;

@Getter
private long iteratedCount = 0;

@Override
public boolean hasNext() {
return fileIterator.hasNext();
}

@Override
public Path next() {
iteratedCount++;
return fileIterator.next().toPath();
}
}
140 changes: 140 additions & 0 deletions src/main/java/nl/knaw/dans/lib/util/PathIteratorZipper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Copyright (C) 2021 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.lib.util;

import lombok.AccessLevel;
import lombok.Builder;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.io.IOUtils;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
* Zips files from an iterator of paths into a zip file up to a maximum number of files and bytes (the first limit reached). The resulting ZIP file can be compressed or not. The files in the ZIP file
* can be renamed. The ZIP file can be overwritten if it already exists.
*/
@Builder
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
public class PathIteratorZipper {
/**
* The root directory of the files to zip.
*/
@NonNull
private final Path rootDir;
/**
* The iterator of paths to the files to zip.
*/
@NonNull
private final Iterator<Path> sourceIterator;
/**
* The path to the target zip file.
*/
@NonNull
private final Path targetZipFile;
/**
* Whether to overwrite the target zip file if it already exists.
*/
@Builder.Default
private final boolean overwrite = true;
/**
* Whether to compress the files in the target zip file.
*/
@Builder.Default
private final boolean compress = false;
/**
* The maximum number of files to include in the target zip file.
*/
@Builder.Default
private final int maxNumberOfFiles = Integer.MAX_VALUE;
/**
* The maximum number of bytes to include in the target zip file.
*/
@Builder.Default
private final long maxNumberOfBytes = 1073741824; // 1 GB
/**
* A map of source file paths to target file paths (relative to the root directory, including the file name). If a source file path is not in the map, the file is zipped with its original file
* path.
*/
@Builder.Default
private final Map<String, String> renameMap = new HashMap<>();

/**
* Zips files from the source iterator into the target zip file.
*
* @return the path to the target zip file.
* @throws IOException if the target zip file already exists and overwrite is false, or if an I/O error occurs while zipping the files.
*/
public Path zip() throws IOException {
if (overwrite && Files.exists(targetZipFile)) {
Files.delete(targetZipFile);
}
else {
if (Files.exists(targetZipFile)) {
throw new IOException("Target zip file already exists: " + targetZipFile);
}
}

try (OutputStream outputStream = Files.newOutputStream(targetZipFile)) {
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(outputStream);
try (ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(bufferedOutputStream)) {
int numberOfFilesAdded = 0;
long numberOfBytesAdded = 0;
while (sourceIterator.hasNext() && numberOfFilesAdded < maxNumberOfFiles && numberOfBytesAdded < maxNumberOfBytes) {
Path path = sourceIterator.next();
if (Files.isRegularFile(path)) {
try {
addFileToZipStream(zipArchiveOutputStream, path);
numberOfFilesAdded++;
numberOfBytesAdded += Files.size(path);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
return targetZipFile;
}
}

private void addFileToZipStream(ZipArchiveOutputStream zipArchiveOutputStream, Path fileToZip) throws IOException {
if (!fileToZip.startsWith(rootDir)) {
throw new IllegalArgumentException("File to zip is not a descendant of root directory: " + fileToZip);
}
String entryName = rootDir.relativize(fileToZip).toString();
if (renameMap.containsKey(entryName)) {
entryName = renameMap.get(entryName);
}
ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(fileToZip, entryName);
zipArchiveEntry.setMethod(compress ? ZipArchiveEntry.STORED : ZipArchiveEntry.DEFLATED);
zipArchiveOutputStream.putArchiveEntry(zipArchiveEntry);
try (InputStream fileInputStream = Files.newInputStream(fileToZip)) {
IOUtils.copy(fileInputStream, zipArchiveOutputStream);
zipArchiveOutputStream.closeArchiveEntry();
}
}
}
Loading

0 comments on commit 61827db

Please sign in to comment.