From ab22ecc23d246aabc0f3d4b59fea6c06be21eb5b Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 26 Jan 2024 23:55:01 +0800 Subject: [PATCH 1/9] factor out method to get a single cutoff grid --- .../RegionalAnalysisController.java | 154 ++++++++++-------- 1 file changed, 83 insertions(+), 71 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index b08cb8a9b..28c8bc65f 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.Locale; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -156,6 +157,79 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + private FileStorageKey getSingleCutoffGrid ( + RegionalAnalysis analysis, + int cutoffIndex, + String destinationPointSetId, + int percentile, + FileStorageFormat fileFormat + ) throws IOException { + String regionalAnalysisId = analysis._id; + int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + LOG.debug( + "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", + cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat + ); + // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. + // We do this even for results generated by older workers, so they will be re-extracted with the new name. + // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. + String singleCutoffKey = String.format( + "%s_%s_P%d_C%d.%s", + regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, + fileFormat.extension.toLowerCase(Locale.ROOT) + ); + FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); + if (!fileStorage.exists(singleCutoffFileStorageKey)) { + // An accessibility grid for this particular cutoff has apparently never been extracted from the + // regional results file before. Extract one and save it for future reuse. Older regional analyses + // did not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The + // filenames of such regional accessibility results will not have a percentile or pointset ID. + // First try the newest form of regional results: multi-percentile, multi-destination-grid. + String multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); + FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (!fileStorage.exists(multiCutoffFileStorageKey)) { + LOG.warn("Falling back to older file name formats for regional results file: " + multiCutoffKey); + // Fall back to second-oldest form: multi-percentile, single destination grid. + multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.destinationPointSetIds.length == 1); + } else { + // Fall back on oldest form of results, single-percentile, single-destination-grid. + multiCutoffKey = regionalAnalysisId + ".access"; + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.travelTimePercentiles.length == 1); + checkArgument(analysis.destinationPointSetIds.length == 1); + } else { + throw AnalysisServerException.notFound("Cannot find original source regional analysis output."); + } + } + } + LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); + + InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); + Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); + + File localFile = FileUtils.createScratchFile(fileFormat.toString()); + FileOutputStream fos = new FileOutputStream(localFile); + + switch (fileFormat) { + case GRID: + grid.write(new GZIPOutputStream(fos)); + break; + case PNG: + grid.writePng(fos); + break; + case GEOTIFF: + grid.writeGeotiff(fos); + break; + } + + fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); + } + } + /** * This used to extract a particular percentile of a regional analysis as a grid file. * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file @@ -167,7 +241,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException // The UUID of the regional analysis for which we want the output data final String regionalAnalysisId = req.params("_id"); // The response file format: PNG, TIFF, or GRID - final String fileFormatExtension = req.params("format"); + final String fileFormatExtension = req.params("format"); // FIXME this may be case sensitive (could make multiple files for different requests) RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), @@ -186,8 +260,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException // For newer analyses that have multiple cutoffs, percentiles, or destination pointsets, these initial values // are coming from deprecated fields, are not meaningful and will be overwritten below from query parameters. int percentile = analysis.travelTimePercentile; - int cutoffMinutes = analysis.cutoffMinutes; - int cutoffIndex = 0; + int cutoffIndex = 0; // For old single-cutoff outputs, we can still select the zeroth grid. String destinationPointSetId = analysis.grid; // Handle newer regional analyses with multiple cutoffs in an array. @@ -196,7 +269,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException if (analysis.cutoffsMinutes != null) { int nCutoffs = analysis.cutoffsMinutes.length; checkState(nCutoffs > 0, "Regional analysis has no cutoffs."); - cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); + int cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); checkState(cutoffIndex >= 0, "Travel time cutoff for this regional analysis must be taken from this list: (%s)", @@ -228,83 +301,22 @@ private Object getRegionalResults (Request req, Response res) throws IOException "Destination gridId must be one of: %s", String.join(",", analysis.destinationPointSetIds)); } - // We started implementing the ability to retrieve and display partially completed analyses. // We eventually decided these should not be available here at the same endpoint as complete, immutable results. - if (broker.findJob(regionalAnalysisId) != null) { throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How can we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - - // The analysis has already completed, results should be stored and retrieved from S3 via redirects. - LOG.debug("Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {}.", - cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId); + // aren't any (only CSV, because origins are freeform). + // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); } - - // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. - // We do this even for results generated by older workers, so they will be re-extracted with the new name. - // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. - String singleCutoffKey = - String.format("%s_%s_P%d_C%d.%s", regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, fileFormatExtension); - - // A lot of overhead here - UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. - FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); - if (!fileStorage.exists(singleCutoffFileStorageKey)) { - // An accessibility grid for this particular cutoff has apparently never been extracted from the - // regional results file before. Extract one and save it for future reuse. Older regional analyses - // did not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The - // filenames of such regional accessibility results will not have a percentile or pointset ID. - // First try the newest form of regional results: multi-percentile, multi-destination-grid. - String multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); - FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (!fileStorage.exists(multiCutoffFileStorageKey)) { - LOG.warn("Falling back to older file name formats for regional results file: " + multiCutoffKey); - // Fall back to second-oldest form: multi-percentile, single destination grid. - multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); - multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (fileStorage.exists(multiCutoffFileStorageKey)) { - checkArgument(analysis.destinationPointSetIds.length == 1); - } else { - // Fall back on oldest form of results, single-percentile, single-destination-grid. - multiCutoffKey = regionalAnalysisId + ".access"; - multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - if (fileStorage.exists(multiCutoffFileStorageKey)) { - checkArgument(analysis.travelTimePercentiles.length == 1); - checkArgument(analysis.destinationPointSetIds.length == 1); - } else { - throw AnalysisServerException.notFound("Cannot find original source regional analysis output."); - } - } - } - LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); - - InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); - Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); - - File localFile = FileUtils.createScratchFile(format.toString()); - FileOutputStream fos = new FileOutputStream(localFile); - - switch (format) { - case GRID: - grid.write(new GZIPOutputStream(fos)); - break; - case PNG: - grid.writePng(fos); - break; - case GEOTIFF: - grid.writeGeotiff(fos); - break; - } - - fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); - } + // Process has a lot of overhead: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. + FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( + analysis, cutoffIndex, destinationPointSetId, percentile, format + ); return JsonUtil.toJsonString( JsonUtil.objectNode().put("url", fileStorage.getURL(singleCutoffFileStorageKey)) ); From 67fbb896c13aacf327d03e8ee8943514c2a35d97 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 01:32:17 +0800 Subject: [PATCH 2/9] proof of concept: return URL of zipped rasters --- .../RegionalAnalysisController.java | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 28c8bc65f..1ca232832 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -21,6 +21,7 @@ import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.cluster.RegionalTask; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.net.HttpHeaders; import com.google.common.primitives.Ints; import com.mongodb.QueryBuilder; import gnu.trove.list.array.TIntArrayList; @@ -35,11 +36,18 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -166,7 +174,7 @@ private FileStorageKey getSingleCutoffGrid ( ) throws IOException { String regionalAnalysisId = analysis._id; int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; - LOG.debug( + LOG.info( "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat ); @@ -228,6 +236,58 @@ private FileStorageKey getSingleCutoffGrid ( fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); } + return singleCutoffFileStorageKey; + } + + private Object getAllRegionalResults (Request req, Response res) throws IOException { + // Get the UUID of the regional analysis for which we want the output data. + final String regionalAnalysisId = req.params("_id"); + RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( + QueryBuilder.start("_id").is(req.params("_id")).get(), + DBProjection.exclude("request.scenario.modifications"), + UserPermissions.from(req) + ).iterator().next(); + if (analysis == null || analysis.deleted) { + throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); + } + List fileStorageKeys = new ArrayList<>(); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + for (String destinationPointSetId : analysis.destinationPointSetIds) { + for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + for (int percentile : analysis.travelTimePercentiles) { + FileStorageKey fileStorageKey = getSingleCutoffGrid( + analysis, + cutoffIndex, + destinationPointSetId, + percentile, + FileStorageFormat.GEOTIFF + ); + fileStorageKeys.add(fileStorageKey); + } + } + } + File tempZipFile = File.createTempFile("regional", ".zip"); + tempZipFile.delete(); // FIXME: zipfs doesn't work on existing empty files, the file has to not exist. + tempZipFile.deleteOnExit(); + Map env = Map.of("create", "true"); + URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); + try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { + for (FileStorageKey fileStorageKey : fileStorageKeys) { + Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); + Path zipPath = zipFilesystem.getPath(storagePath.getFileName().toString()); + Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + } + } + FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); + fileStorage.moveIntoStorage(fileStorageKey, tempZipFile); + String humanReadableName = analysis.name + ".zip"; + res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableName)); + return JsonUtil.toJsonString(JsonUtil.objectNode() + .put("url", fileStorage.getURL(fileStorageKey)) + .put("name", humanReadableName) + ); } /** @@ -563,6 +623,7 @@ public void registerEndpoints (spark.Service sparkService) { sparkService.path("/api/regional", () -> { // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. sparkService.get("/:_id", this::getRegionalAnalysis); + sparkService.get("/:_id/all", this::getAllRegionalResults); sparkService.get("/:_id/grid/:format", this::getRegionalResults); sparkService.get("/:_id/csv/:resultType", this::getCsvResults); sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl); From c0bd7ea622225f898182761a31f5abb016879ced Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 01:32:53 +0800 Subject: [PATCH 3/9] partially fix MIME type lookup by file extension this is not particularly efficient but it's only used in local operation --- .../analysis/controllers/LocalFilesController.java | 4 +++- .../java/com/conveyal/file/FileStorageFormat.java | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java index c92fbbb33..a5ef44f74 100644 --- a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java +++ b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java @@ -33,7 +33,9 @@ private Object getFile (Request req, Response res) throws Exception { FileStorageKey key = new FileStorageKey(category, filename); File file = fileStorage.getFile(key); FileStorageFormat format = FileStorageFormat.fromFilename(filename); - res.type(format.mimeType); + if (format != null) { + res.type(format.mimeType); + } // If the content-encoding is set to gzip, Spark automatically gzips the response. This double-gzips anything // that was already gzipped. Some of our files are already gzipped, and we rely on the the client browser to diff --git a/src/main/java/com/conveyal/file/FileStorageFormat.java b/src/main/java/com/conveyal/file/FileStorageFormat.java index c33569de9..e3b6e0fe0 100644 --- a/src/main/java/com/conveyal/file/FileStorageFormat.java +++ b/src/main/java/com/conveyal/file/FileStorageFormat.java @@ -1,5 +1,7 @@ package com.conveyal.file; +import java.util.Locale; + /** * An enumeration of all the file types we handle as uploads, derived internal data, or work products. * Really this should be a union of several enumerated types (upload/internal/product) but Java does not allow this. @@ -37,7 +39,12 @@ public enum FileStorageFormat { } public static FileStorageFormat fromFilename (String filename) { - String extension = filename.substring(filename.lastIndexOf(".") + 1); - return FileStorageFormat.valueOf(extension.toUpperCase()); + String extension = filename.substring(filename.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT); + for (FileStorageFormat format : FileStorageFormat.values()) { + if (format.extension.equals(extension)) { + return format; + } + } + return null; } } From 980d60eb4d761d9d4812b1fa024580a58364ece8 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 27 Jan 2024 23:56:02 +0800 Subject: [PATCH 4/9] replace UUIDs with human readable filenames --- .../RegionalAnalysisController.java | 118 +++++++++++++----- .../analysis/results/BaseResultWriter.java | 1 + 2 files changed, 87 insertions(+), 32 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 1ca232832..7f6189322 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -45,9 +45,12 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -165,6 +168,11 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + /** + * Get a regional analysis results raster for a single (percentile, cutoff, destination) combination, in one of + * several image file formats. This method was factored out for use from two different API endpoints, one for + * fetching a single grid, and another for fetching grids for all combinations of parameters at once. + */ private FileStorageKey getSingleCutoffGrid ( RegionalAnalysis analysis, int cutoffIndex, @@ -242,54 +250,100 @@ private FileStorageKey getSingleCutoffGrid ( private Object getAllRegionalResults (Request req, Response res) throws IOException { // Get the UUID of the regional analysis for which we want the output data. final String regionalAnalysisId = req.params("_id"); + final UserPermissions userPermissions = UserPermissions.from(req); RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), DBProjection.exclude("request.scenario.modifications"), - UserPermissions.from(req) + userPermissions ).iterator().next(); if (analysis == null || analysis.deleted) { throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); } - List fileStorageKeys = new ArrayList<>(); - if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { - throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); - } - for (String destinationPointSetId : analysis.destinationPointSetIds) { - for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { - for (int percentile : analysis.travelTimePercentiles) { - FileStorageKey fileStorageKey = getSingleCutoffGrid( - analysis, - cutoffIndex, - destinationPointSetId, - percentile, - FileStorageFormat.GEOTIFF + FileStorageKey zippedResultsKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); + // Keep only alphanumeric characters and underscores from user-specified analysis name. + String friendlyAnalysisName = filenameCleanString(analysis.name); + if (!fileStorage.exists(zippedResultsKey)) { + List fileStorageKeys = new ArrayList<>(); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + // Map from cryptic UUIDs to human readable strings that can replace them. + Map friendlyReplacements = new HashMap<>(); + { + // Replace the regional analysis ID with its name. + friendlyReplacements.put(analysis._id, friendlyAnalysisName); + // Replace each destination point set ID with its name. + int d = 0; + for (String destinationPointSetId : analysis.destinationPointSetIds) { + OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( + destinationPointSetId, + userPermissions ); - fileStorageKeys.add(fileStorageKey); + checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); + // Avoid name collisions, prepend an integer. + String friendlyDestinationName = "D%d_%s".formatted(d++, filenameCleanString(opportunityDataset.name)); + friendlyReplacements.put(destinationPointSetId, friendlyDestinationName); } } - } - File tempZipFile = File.createTempFile("regional", ".zip"); - tempZipFile.delete(); // FIXME: zipfs doesn't work on existing empty files, the file has to not exist. - tempZipFile.deleteOnExit(); - Map env = Map.of("create", "true"); - URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); - try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { - for (FileStorageKey fileStorageKey : fileStorageKeys) { - Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); - Path zipPath = zipFilesystem.getPath(storagePath.getFileName().toString()); - Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + // Iterate over all combinations and generate one geotiff grid output for each one. + // TODO check whether origins are freeform: if (analysis.request.originPointSetKey == null) ? + for (String destinationPointSetId : analysis.destinationPointSetIds) { + for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + for (int percentile : analysis.travelTimePercentiles) { + FileStorageKey fileStorageKey = getSingleCutoffGrid( + analysis, + cutoffIndex, + destinationPointSetId, + percentile, + FileStorageFormat.GEOTIFF + ); + fileStorageKeys.add(fileStorageKey); + } + } + } + // Also include any CSV files that were generated. TODO these are gzipped, decompress and re-compress. + // for (String csvStorageName : analysis.resultStorage.values()) { + // LOG.info("Including {} in results zip file.", csvStorageName); + // FileStorageKey csvKey = new FileStorageKey(RESULTS, csvStorageName); + // fileStorageKeys.add(csvKey); + // } + File tempZipFile = File.createTempFile("regional", ".zip"); + // Zipfs can't open existing empty files, the file has to not exist. FIXME: Non-dangerous race condition + // Examining ZipFileSystemProvider reveals a "useTempFile" env parameter, but this is for the individual entries. + // May be better to just use zipOutputStream which would also allow gzip - zip CSV conversion. + tempZipFile.delete(); + Map env = Map.of("create", "true"); + URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); + try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { + for (FileStorageKey fileStorageKey : fileStorageKeys) { + Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); + String nameInZip = storagePath.getFileName().toString(); + // This is so inefficient but it should work. + for (String replacementKey : friendlyReplacements.keySet()) { + nameInZip = nameInZip.replace(replacementKey, friendlyReplacements.get(replacementKey)); + } + Path zipPath = zipFilesystem.getPath(nameInZip); + Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); + } } + fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } - FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); - fileStorage.moveIntoStorage(fileStorageKey, tempZipFile); - String humanReadableName = analysis.name + ".zip"; - res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableName)); + String humanReadableZipName = friendlyAnalysisName + ".zip"; + res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); return JsonUtil.toJsonString(JsonUtil.objectNode() - .put("url", fileStorage.getURL(fileStorageKey)) - .put("name", humanReadableName) + .put("url", fileStorage.getURL(zippedResultsKey)) + .put("name", humanReadableZipName) ); } + private String filenameCleanString (String original) { + String ret = original.replaceAll("\\W+", "_"); + if (ret.length() > 20) { + ret = ret.substring(0, 20); + } + return ret; + } + /** * This used to extract a particular percentile of a regional analysis as a grid file. * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file diff --git a/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java b/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java index df289c9fe..8bcf94d26 100644 --- a/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java +++ b/src/main/java/com/conveyal/analysis/results/BaseResultWriter.java @@ -61,6 +61,7 @@ protected synchronized void finish (String fileName) throws IOException { // There's probably a more elegant way to do this with NIO and without closing the buffer. // That would be Files.copy(File.toPath(),X) or ByteStreams.copy. + // Perhaps better: we could wrap the output buffer in a gzip output stream and zip as we write out. InputStream is = new BufferedInputStream(new FileInputStream(bufferFile)); OutputStream os = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(gzippedResultFile))); ByteStreams.copy(is, os); From 0bee308a87321313a00d307ca1925455b8e30309 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 2 Feb 2024 22:43:58 +0800 Subject: [PATCH 5/9] human-readable name together with download URLs This enables #673, but should be backward compatible with existing UI. JSON responses containing a URL have an accompanying humanName field. This can be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. CSV download still returns text/plain (requires JSON parsing on UI side) --- .../AggregationAreaController.java | 8 ++- .../OpportunityDatasetController.java | 30 ++++----- .../RegionalAnalysisController.java | 67 +++++++++---------- .../java/com/conveyal/file/FileStorage.java | 5 ++ .../com/conveyal/file/UrlWithHumanName.java | 39 +++++++++++ 5 files changed, 94 insertions(+), 55 deletions(-) create mode 100644 src/main/java/com/conveyal/file/UrlWithHumanName.java diff --git a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java index 286fa5593..fae1637b6 100644 --- a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java +++ b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java @@ -10,6 +10,7 @@ import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.progress.Task; import com.fasterxml.jackson.databind.node.ObjectNode; import org.bson.conversions.Bson; @@ -27,6 +28,7 @@ import static com.google.common.base.Preconditions.checkNotNull; import static com.mongodb.client.model.Filters.and; import static com.mongodb.client.model.Filters.eq; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; /** * Stores vector aggregationAreas (used to define the region of a weighted average accessibility metric). @@ -98,10 +100,10 @@ private Collection getAggregationAreas (Request req, Response r } /** Returns a JSON-wrapped URL for the mask grid of the aggregation area whose id matches the path parameter. */ - private ObjectNode getAggregationAreaGridUrl (Request req, Response res) { + private UrlWithHumanName getAggregationAreaGridUrl (Request req, Response res) { AggregationArea aggregationArea = aggregationAreaCollection.findPermittedByRequestParamId(req); - String url = fileStorage.getURL(aggregationArea.getStorageKey()); - return JsonUtil.objectNode().put("url", url); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(aggregationArea.getStorageKey(), aggregationArea.name, "grid"); } @Override diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..c28363e0a 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -18,6 +18,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; @@ -61,6 +62,7 @@ import static com.conveyal.file.FileCategory.GRIDS; import static com.conveyal.r5.analyst.WebMercatorExtents.parseZoom; import static com.conveyal.r5.analyst.progress.WorkProductType.OPPORTUNITY_DATASET; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; /** * Controller that handles fetching opportunity datasets (grids and other pointset formats). @@ -94,10 +96,6 @@ public OpportunityDatasetController ( /** Store upload status objects FIXME trivial Javadoc */ private final List uploadStatuses = new ArrayList<>(); - private ObjectNode getJsonUrl (FileStorageKey key) { - return JsonUtil.objectNode().put("url", fileStorage.getURL(key)); - } - private void addStatusAndRemoveOldStatuses(OpportunityDatasetUploadStatus status) { uploadStatuses.add(status); LocalDateTime now = LocalDateTime.now(); @@ -113,10 +111,11 @@ private Collection getRegionDatasets(Request req, Response r ); } - private Object getOpportunityDataset(Request req, Response res) { + private UrlWithHumanName getOpportunityDataset(Request req, Response res) { OpportunityDataset dataset = Persistence.opportunityDatasets.findByIdFromRequestIfPermitted(req); if (dataset.format == FileStorageFormat.GRID) { - return getJsonUrl(dataset.getStorageKey()); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(dataset.getStorageKey(), dataset.sourceName + "_" + dataset.name, "grid"); } else { // Currently the UI can only visualize grids, not other kinds of datasets (freeform points). // We do generate a rasterized grid for each of the freeform pointsets we create, so ideally we'd redirect @@ -564,9 +563,10 @@ private List createGridsFromShapefile(List fileItems, * Respond to a request with a redirect to a downloadable file. * @param req should specify regionId, opportunityDatasetId, and an available download format (.tiff or .grid) */ - private Object downloadOpportunityDataset (Request req, Response res) throws IOException { + private UrlWithHumanName downloadOpportunityDataset (Request req, Response res) throws IOException { FileStorageFormat downloadFormat; String format = req.params("format"); + res.type(APPLICATION_JSON.asString()); try { downloadFormat = FileStorageFormat.valueOf(format.toUpperCase()); } catch (IllegalArgumentException iae) { @@ -576,38 +576,32 @@ private Object downloadOpportunityDataset (Request req, Response res) throws IOE String regionId = req.params("_id"); String gridKey = format; FileStorageKey storageKey = new FileStorageKey(GRIDS, String.format("%s/%s.grid", regionId, gridKey)); - return getJsonUrl(storageKey); + return fileStorage.getJsonUrl(storageKey, gridKey, "grid"); + } + if (FileStorageFormat.GRID.equals(downloadFormat)) { + return getOpportunityDataset(req, res); } - - if (FileStorageFormat.GRID.equals(downloadFormat)) return getOpportunityDataset(req, res); - final OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdFromRequestIfPermitted(req); - FileStorageKey gridKey = opportunityDataset.getStorageKey(FileStorageFormat.GRID); FileStorageKey formatKey = opportunityDataset.getStorageKey(downloadFormat); - // if this grid is not on S3 in the requested format, try to get the .grid format if (!fileStorage.exists(gridKey)) { throw AnalysisServerException.notFound("Requested grid does not exist."); } - if (!fileStorage.exists(formatKey)) { // get the grid and convert it to the requested format File gridFile = fileStorage.getFile(gridKey); Grid grid = Grid.read(new GZIPInputStream(new FileInputStream(gridFile))); // closes input stream File localFile = FileUtils.createScratchFile(downloadFormat.toString()); FileOutputStream fos = new FileOutputStream(localFile); - if (FileStorageFormat.PNG.equals(downloadFormat)) { grid.writePng(fos); } else if (FileStorageFormat.GEOTIFF.equals(downloadFormat)) { grid.writeGeotiff(fos); } - fileStorage.moveIntoStorage(formatKey, localFile); } - - return getJsonUrl(formatKey); + return fileStorage.getJsonUrl(formatKey, opportunityDataset.sourceName + "_" + opportunityDataset.name, downloadFormat.extension); } /** diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 7f6189322..2d169187f 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -8,6 +8,7 @@ import com.conveyal.analysis.models.AnalysisRequest; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.RegionalAnalysis; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.results.CsvResultType; import com.conveyal.analysis.util.JsonUtil; @@ -20,8 +21,6 @@ import com.conveyal.r5.analyst.PointSet; import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.cluster.RegionalTask; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.net.HttpHeaders; import com.google.common.primitives.Ints; import com.mongodb.QueryBuilder; import gnu.trove.list.array.TIntArrayList; @@ -46,20 +45,21 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.BUNDLES; import static com.conveyal.file.FileCategory.RESULTS; +import static com.conveyal.file.UrlWithHumanName.filenameCleanString; import static com.conveyal.r5.transit.TransportNetworkCache.getScenarioFilename; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; +import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON; +import static org.eclipse.jetty.http.MimeTypes.Type.TEXT_HTML; /** * Spark HTTP handler methods that allow launching new regional analyses, as well as deleting them and fetching @@ -328,28 +328,15 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept } fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } - String humanReadableZipName = friendlyAnalysisName + ".zip"; - res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); - return JsonUtil.toJsonString(JsonUtil.objectNode() - .put("url", fileStorage.getURL(zippedResultsKey)) - .put("name", humanReadableZipName) - ); - } - - private String filenameCleanString (String original) { - String ret = original.replaceAll("\\W+", "_"); - if (ret.length() > 20) { - ret = ret.substring(0, 20); - } - return ret; +// res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); + res.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(zippedResultsKey, analysis.name, "zip"); } /** * This used to extract a particular percentile of a regional analysis as a grid file. - * Now it just gets the single percentile that exists for any one analysis, either from the local buffer file - * for an analysis still in progress, or from S3 for a completed analysis. */ - private Object getRegionalResults (Request req, Response res) throws IOException { + private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { // Get some path parameters out of the URL. // The UUID of the regional analysis for which we want the output data @@ -431,12 +418,20 @@ private Object getRegionalResults (Request req, Response res) throws IOException FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format ); - return JsonUtil.toJsonString( - JsonUtil.objectNode().put("url", fileStorage.getURL(singleCutoffFileStorageKey)) - ); + res.type(APPLICATION_JSON.asString()); + // Substitute human readable name and shorten destination UUID. + // TODO better system for resolving UUID to human readable names in single and multi-grid cases + // Or maybe only allow multi-grid download for saving by end user, and single grid is purely internal. + int firstUnderscore = singleCutoffFileStorageKey.path.indexOf('_'); + int secondUnderscore = singleCutoffFileStorageKey.path.indexOf('_', firstUnderscore + 1); + int lastDot = singleCutoffFileStorageKey.path.lastIndexOf('.'); + String humanName = analysis.name + + singleCutoffFileStorageKey.path.substring(firstUnderscore, firstUnderscore + 6) + + singleCutoffFileStorageKey.path.substring(secondUnderscore, lastDot); + return fileStorage.getJsonUrl(singleCutoffFileStorageKey, humanName, format.extension); } - private String getCsvResults (Request req, Response res) { + private Object getCsvResults (Request req, Response res) { final String regionalAnalysisId = req.params("_id"); final CsvResultType resultType = CsvResultType.valueOf(req.params("resultType").toUpperCase()); // If the resultType parameter received on the API is unrecognized, valueOf throws IllegalArgumentException @@ -458,7 +453,10 @@ private String getCsvResults (Request req, Response res) { FileStorageKey fileStorageKey = new FileStorageKey(RESULTS, storageKey); - res.type("text/plain"); + // TODO handle JSON with human name on UI side + // res.type(APPLICATION_JSON.asString()); + // return fileStorage.getJsonUrl(fileStorageKey, analysis.name, resultType + ".csv"); + res.type(TEXT_HTML.asString()); return fileStorage.getURL(fileStorageKey); } @@ -652,7 +650,7 @@ private RegionalAnalysis updateRegionalAnalysis (Request request, Response respo * Return a JSON-wrapped URL for the file in FileStorage containing the JSON representation of the scenario for * the given regional analysis. */ - private JsonNode getScenarioJsonUrl (Request request, Response response) { + private UrlWithHumanName getScenarioJsonUrl (Request request, Response response) { RegionalAnalysis regionalAnalysis = Persistence.regionalAnalyses.findByIdIfPermitted( request.params("_id"), DBProjection.exclude("request.scenario.modifications"), @@ -663,9 +661,9 @@ private JsonNode getScenarioJsonUrl (Request request, Response response) { final String scenarioId = regionalAnalysis.request.scenarioId; checkNotNull(networkId, "RegionalAnalysis did not contain a network ID."); checkNotNull(scenarioId, "RegionalAnalysis did not contain an embedded request with scenario ID."); - String scenarioUrl = fileStorage.getURL( - new FileStorageKey(BUNDLES, getScenarioFilename(regionalAnalysis.bundleId, scenarioId))); - return JsonUtil.objectNode().put("url", scenarioUrl); + FileStorageKey scenarioKey = new FileStorageKey(BUNDLES, getScenarioFilename(regionalAnalysis.bundleId, scenarioId)); + response.type(APPLICATION_JSON.asString()); + return fileStorage.getJsonUrl(scenarioKey, regionalAnalysis.name, "scenario.json"); } @Override @@ -676,11 +674,12 @@ public void registerEndpoints (spark.Service sparkService) { }); sparkService.path("/api/regional", () -> { // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. + // Wait, do we have any endpoints that write grids into responses? It looks like they all return URLs now. sparkService.get("/:_id", this::getRegionalAnalysis); - sparkService.get("/:_id/all", this::getAllRegionalResults); - sparkService.get("/:_id/grid/:format", this::getRegionalResults); - sparkService.get("/:_id/csv/:resultType", this::getCsvResults); - sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl); + sparkService.get("/:_id/all", this::getAllRegionalResults, toJson); + sparkService.get("/:_id/grid/:format", this::getRegionalResults, toJson); + sparkService.get("/:_id/csv/:resultType", this::getCsvResults, toJson); + sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl, toJson); sparkService.delete("/:_id", this::deleteRegionalAnalysis, toJson); sparkService.post("", this::createRegionalAnalysis, toJson); sparkService.put("/:_id", this::updateRegionalAnalysis, toJson); diff --git a/src/main/java/com/conveyal/file/FileStorage.java b/src/main/java/com/conveyal/file/FileStorage.java index 52de21316..8c5ef0122 100644 --- a/src/main/java/com/conveyal/file/FileStorage.java +++ b/src/main/java/com/conveyal/file/FileStorage.java @@ -94,4 +94,9 @@ default InputStream getInputStream (FileCategory fileCategory, String fileName) } } + default UrlWithHumanName getJsonUrl (FileStorageKey key, String rawHumanName, String humanExtension) { + String url = this.getURL(key); + return UrlWithHumanName.fromCleanedName(url, rawHumanName, humanExtension); + } + } diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java new file mode 100644 index 000000000..c80815b93 --- /dev/null +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -0,0 +1,39 @@ +package com.conveyal.file; + +/** + * Combines a url for downloading a file, which might include a globally unique but human-annoying UUID, with a + * suggested human-readable name for that file when saved by an end user. The humanName may not be globally unique, + * so is only appropriate for cases where it doesn't need to be machine discoverable using a UUID. The humanName can + * be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. + * Instances of this record are intended to be serialized to JSON as an HTTP API response. + * // TODO make this into a class with factory methods and move static cleanFilename method here. + */ +public class UrlWithHumanName { + public final String url; + public final String humanName; + + public UrlWithHumanName (String url, String humanName) { + this.url = url; + this.humanName = humanName; + } + + private static final int TRUNCATE_FILENAME_CHARS = 220; + + /** + * Given an arbitrary string, make it safe for use as a friendly human-readable filename. + * This can yield non-unique strings and is intended for files downloaded by end users that do not need to be + * machine-discoverable by unique IDs. + */ + public static String filenameCleanString (String original) { + String ret = original.replaceAll("\\W+", "_"); + if (ret.length() > TRUNCATE_FILENAME_CHARS) { + ret = ret.substring(0, TRUNCATE_FILENAME_CHARS); + } + return ret; + } + + public static UrlWithHumanName fromCleanedName (String url, String rawHumanName, String humanExtension) { + String humanName = UrlWithHumanName.filenameCleanString(rawHumanName) + "." + humanExtension; + return new UrlWithHumanName(url, humanName); + } +} From 5e1ccd0c197958a06dad2c542724d7cf1c7ca10a Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 16 Feb 2024 23:55:47 +0800 Subject: [PATCH 6/9] Remove obsolete comment --- src/main/java/com/conveyal/file/UrlWithHumanName.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java index c80815b93..8cf447c75 100644 --- a/src/main/java/com/conveyal/file/UrlWithHumanName.java +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -6,7 +6,6 @@ * so is only appropriate for cases where it doesn't need to be machine discoverable using a UUID. The humanName can * be used as the download attribute of an HTML link, or as the attachment name in a content-disposition header. * Instances of this record are intended to be serialized to JSON as an HTTP API response. - * // TODO make this into a class with factory methods and move static cleanFilename method here. */ public class UrlWithHumanName { public final String url; From ac21c90362c100b0c0e84754d5e55e6e4c2b551f Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 00:25:39 +0800 Subject: [PATCH 7/9] human readable names without string chopping also use last instead of first five chars of UUID --- .../RegionalAnalysisController.java | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 2d169187f..0b1af4d35 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -267,7 +267,7 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); } - // Map from cryptic UUIDs to human readable strings that can replace them. + // Map from cryptic UUIDs to human-readable strings that can replace them. Map friendlyReplacements = new HashMap<>(); { // Replace the regional analysis ID with its name. @@ -414,21 +414,22 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); } - // Process has a lot of overhead: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. + // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format ); + // Create alternative human-readable filename. Similar to internal storage key name, but using a shortened + // ID for the destinations and the user-specified name for the project. This project name may contain invalid + // characters, but will be sanitized by getJsonUrl. We use the end of the dataset ID rather than the beginning + // because it's more likely to be different when several datasets are created in rapid succession. + String shortDestinationId = destinationPointSetId.substring( + destinationPointSetId.length() - 5, destinationPointSetId.length() + ); + int cutoffMinutes = + analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + String rawHumanName = String.format("%s_%s_P%d_C%d", analysis.name, shortDestinationId, percentile, cutoffMinutes); res.type(APPLICATION_JSON.asString()); - // Substitute human readable name and shorten destination UUID. - // TODO better system for resolving UUID to human readable names in single and multi-grid cases - // Or maybe only allow multi-grid download for saving by end user, and single grid is purely internal. - int firstUnderscore = singleCutoffFileStorageKey.path.indexOf('_'); - int secondUnderscore = singleCutoffFileStorageKey.path.indexOf('_', firstUnderscore + 1); - int lastDot = singleCutoffFileStorageKey.path.lastIndexOf('.'); - String humanName = analysis.name + - singleCutoffFileStorageKey.path.substring(firstUnderscore, firstUnderscore + 6) + - singleCutoffFileStorageKey.path.substring(secondUnderscore, lastDot); - return fileStorage.getJsonUrl(singleCutoffFileStorageKey, humanName, format.extension); + return fileStorage.getJsonUrl(singleCutoffFileStorageKey, rawHumanName, format.extension.toLowerCase(Locale.ROOT)); } private Object getCsvResults (Request req, Response res) { From b8f359ab042f855c8b326a836f4071a3d4fa4e02 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 17:48:48 +0800 Subject: [PATCH 8/9] convert and validate FileStorageFormat immediately Eliminates poorly named intermediate string variable and need for explanation in outdated comments. --- .../RegionalAnalysisController.java | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 0b1af4d35..896b53801 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -341,8 +341,13 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I // Get some path parameters out of the URL. // The UUID of the regional analysis for which we want the output data final String regionalAnalysisId = req.params("_id"); - // The response file format: PNG, TIFF, or GRID - final String fileFormatExtension = req.params("format"); // FIXME this may be case sensitive (could make multiple files for different requests) + // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there + // aren't any (only CSV, because origins are freeform). + // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? + FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); + if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { + throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); + } RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), @@ -407,13 +412,6 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (broker.findJob(regionalAnalysisId) != null) { throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); - if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { - throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); - } // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( analysis, cutoffIndex, destinationPointSetId, percentile, format @@ -674,8 +672,6 @@ public void registerEndpoints (spark.Service sparkService) { sparkService.get("/:regionId/regional/running", this::getRunningAnalyses, toJson); }); sparkService.path("/api/regional", () -> { - // For grids, no transformer is supplied: render raw bytes or input stream rather than transforming to JSON. - // Wait, do we have any endpoints that write grids into responses? It looks like they all return URLs now. sparkService.get("/:_id", this::getRegionalAnalysis); sparkService.get("/:_id/all", this::getAllRegionalResults, toJson); sparkService.get("/:_id/grid/:format", this::getRegionalResults, toJson); From b4933c609e9df751e1d11232bc50a02abf4f5b2a Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 21 Mar 2024 22:41:20 +0800 Subject: [PATCH 9/9] reusable logic to derive human readable names --- .../RegionalAnalysisController.java | 189 +++++++++--------- .../java/com/conveyal/file/FileStorage.java | 6 + .../com/conveyal/file/UrlWithHumanName.java | 13 +- 3 files changed, 105 insertions(+), 103 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 896b53801..a60eccd2b 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -6,9 +6,9 @@ import com.conveyal.analysis.components.broker.Broker; import com.conveyal.analysis.components.broker.JobStatus; import com.conveyal.analysis.models.AnalysisRequest; +import com.conveyal.analysis.models.Model; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.RegionalAnalysis; -import com.conveyal.file.UrlWithHumanName; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.results.CsvResultType; import com.conveyal.analysis.util.JsonUtil; @@ -16,6 +16,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.file.UrlWithHumanName; import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; @@ -44,7 +45,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -168,20 +168,33 @@ private int getIntQueryParameter (Request req, String parameterName, int default } } + /** + * Associate a storage key with a human-readable name. + * Currently, this record type is only used within the RegionalAnalysisController class. + */ + private record HumanKey(FileStorageKey storageKey, String humanName) { }; + /** * Get a regional analysis results raster for a single (percentile, cutoff, destination) combination, in one of * several image file formats. This method was factored out for use from two different API endpoints, one for * fetching a single grid, and another for fetching grids for all combinations of parameters at once. + * It returns the unique FileStorageKey for those results, associated with a non-unique human-readable name. */ - private FileStorageKey getSingleCutoffGrid ( + private HumanKey getSingleCutoffGrid ( RegionalAnalysis analysis, - int cutoffIndex, - String destinationPointSetId, + OpportunityDataset destinations, + int cutoffMinutes, int percentile, FileStorageFormat fileFormat ) throws IOException { - String regionalAnalysisId = analysis._id; - int cutoffMinutes = analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; + final String regionalAnalysisId = analysis._id; + final String destinationPointSetId = destinations._id; + // Selecting the zeroth cutoff still makes sense for older analyses that don't allow an array of N cutoffs. + int cutoffIndex = 0; + if (analysis.cutoffsMinutes != null) { + cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); + checkState(cutoffIndex >= 0); + } LOG.info( "Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {} in format {}.", cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId, fileFormat @@ -244,69 +257,40 @@ private FileStorageKey getSingleCutoffGrid ( fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); } - return singleCutoffFileStorageKey; + String analysisHumanName = humanNameForEntity(analysis); + String destinationHumanName = humanNameForEntity(destinations); + String resultHumanFilename = filenameCleanString( + String.format("%s_%s_P%d_C%d", analysisHumanName, destinationHumanName, percentile, cutoffMinutes) + ) + "." + fileFormat.extension.toLowerCase(Locale.ROOT); + // Note that the returned human filename already contains the appropriate extension. + return new HumanKey(singleCutoffFileStorageKey, resultHumanFilename); } private Object getAllRegionalResults (Request req, Response res) throws IOException { - // Get the UUID of the regional analysis for which we want the output data. final String regionalAnalysisId = req.params("_id"); final UserPermissions userPermissions = UserPermissions.from(req); - RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( - QueryBuilder.start("_id").is(req.params("_id")).get(), - DBProjection.exclude("request.scenario.modifications"), - userPermissions - ).iterator().next(); - if (analysis == null || analysis.deleted) { - throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); + final RegionalAnalysis analysis = getAnalysis(regionalAnalysisId, userPermissions); + if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { + throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); + } + if (analysis.request.originPointSetKey != null) { + throw AnalysisServerException.badRequest("Batch result download only available for gridded origins."); } FileStorageKey zippedResultsKey = new FileStorageKey(RESULTS, analysis._id + "_ALL.zip"); - // Keep only alphanumeric characters and underscores from user-specified analysis name. - String friendlyAnalysisName = filenameCleanString(analysis.name); if (!fileStorage.exists(zippedResultsKey)) { - List fileStorageKeys = new ArrayList<>(); - if (analysis.cutoffsMinutes == null || analysis.travelTimePercentiles == null || analysis.destinationPointSetIds == null) { - throw AnalysisServerException.badRequest("Batch result download is not available for legacy regional results."); - } - // Map from cryptic UUIDs to human-readable strings that can replace them. - Map friendlyReplacements = new HashMap<>(); - { - // Replace the regional analysis ID with its name. - friendlyReplacements.put(analysis._id, friendlyAnalysisName); - // Replace each destination point set ID with its name. - int d = 0; - for (String destinationPointSetId : analysis.destinationPointSetIds) { - OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( - destinationPointSetId, - userPermissions - ); - checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); - // Avoid name collisions, prepend an integer. - String friendlyDestinationName = "D%d_%s".formatted(d++, filenameCleanString(opportunityDataset.name)); - friendlyReplacements.put(destinationPointSetId, friendlyDestinationName); - } - } - // Iterate over all combinations and generate one geotiff grid output for each one. - // TODO check whether origins are freeform: if (analysis.request.originPointSetKey == null) ? + // Iterate over all dest, cutoff, percentile combinations and generate one geotiff grid output for each one. + List humanKeys = new ArrayList<>(); for (String destinationPointSetId : analysis.destinationPointSetIds) { - for (int cutoffIndex = 0; cutoffIndex < analysis.cutoffsMinutes.length; cutoffIndex++) { + OpportunityDataset destinations = getDestinations(destinationPointSetId, userPermissions); + for (int cutoffMinutes : analysis.cutoffsMinutes) { for (int percentile : analysis.travelTimePercentiles) { - FileStorageKey fileStorageKey = getSingleCutoffGrid( - analysis, - cutoffIndex, - destinationPointSetId, - percentile, - FileStorageFormat.GEOTIFF + HumanKey gridKey = getSingleCutoffGrid( + analysis, destinations, cutoffMinutes, percentile, FileStorageFormat.GEOTIFF ); - fileStorageKeys.add(fileStorageKey); + humanKeys.add(gridKey); } } } - // Also include any CSV files that were generated. TODO these are gzipped, decompress and re-compress. - // for (String csvStorageName : analysis.resultStorage.values()) { - // LOG.info("Including {} in results zip file.", csvStorageName); - // FileStorageKey csvKey = new FileStorageKey(RESULTS, csvStorageName); - // fileStorageKeys.add(csvKey); - // } File tempZipFile = File.createTempFile("regional", ".zip"); // Zipfs can't open existing empty files, the file has to not exist. FIXME: Non-dangerous race condition // Examining ZipFileSystemProvider reveals a "useTempFile" env parameter, but this is for the individual entries. @@ -315,49 +299,68 @@ private Object getAllRegionalResults (Request req, Response res) throws IOExcept Map env = Map.of("create", "true"); URI uri = URI.create("jar:file:" + tempZipFile.getAbsolutePath()); try (FileSystem zipFilesystem = FileSystems.newFileSystem(uri, env)) { - for (FileStorageKey fileStorageKey : fileStorageKeys) { - Path storagePath = fileStorage.getFile(fileStorageKey).toPath(); - String nameInZip = storagePath.getFileName().toString(); - // This is so inefficient but it should work. - for (String replacementKey : friendlyReplacements.keySet()) { - nameInZip = nameInZip.replace(replacementKey, friendlyReplacements.get(replacementKey)); - } - Path zipPath = zipFilesystem.getPath(nameInZip); + for (HumanKey key : humanKeys) { + Path storagePath = fileStorage.getFile(key.storageKey).toPath(); + Path zipPath = zipFilesystem.getPath(key.humanName); Files.copy(storagePath, zipPath, StandardCopyOption.REPLACE_EXISTING); } } fileStorage.moveIntoStorage(zippedResultsKey, tempZipFile); } -// res.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"%s\"".formatted(humanReadableZipName)); res.type(APPLICATION_JSON.asString()); - return fileStorage.getJsonUrl(zippedResultsKey, analysis.name, "zip"); + String analysisHumanName = humanNameForEntity(analysis); + return fileStorage.getJsonUrl(zippedResultsKey, analysisHumanName, "zip"); } /** - * This used to extract a particular percentile of a regional analysis as a grid file. + * Given an Entity, make a human-readable name for the entity composed of its user-supplied name as well as + * the most rapidly changing digits of its ID to disambiguate in case multiple entities have the same name. + * It is also possible to find the exact entity in many web UI fields using this suffix of its ID. */ - private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { + private static String humanNameForEntity (Model entity) { + // Most or all IDs encountered are MongoDB ObjectIDs. The first four and middle five bytes are slow-changing + // and would not disambiguate between data sets. Only the 3-byte counter at the end will be sure to change. + // See https://www.mongodb.com/docs/manual/reference/method/ObjectId/ + final String id = entity._id; + checkArgument(id.length() > 6, "ID had too few characters."); + String shortId = id.substring(id.length() - 6, id.length()); + String humanName = "%s_%s".formatted(filenameCleanString(entity.name), shortId); + return humanName; + } - // Get some path parameters out of the URL. - // The UUID of the regional analysis for which we want the output data - final String regionalAnalysisId = req.params("_id"); - // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there - // aren't any (only CSV, because origins are freeform). - // How should we determine whether this analysis is expected to have no gridded results and cleanly return a 404? - FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); - if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { - throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); - } + /** Fetch destination OpportunityDataset from database, followed by a check that it was present. */ + private static OpportunityDataset getDestinations (String destinationPointSetId, UserPermissions userPermissions) { + OpportunityDataset opportunityDataset = + Persistence.opportunityDatasets.findByIdIfPermitted(destinationPointSetId, userPermissions); + checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); + return opportunityDataset; + } + /** Fetch RegionalAnalysis from database by ID, followed by a check that it was present and not deleted. */ + private static RegionalAnalysis getAnalysis (String analysisId, UserPermissions userPermissions) { RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( - QueryBuilder.start("_id").is(req.params("_id")).get(), + QueryBuilder.start("_id").is(analysisId).get(), DBProjection.exclude("request.scenario.modifications"), - UserPermissions.from(req) + userPermissions ).iterator().next(); - if (analysis == null || analysis.deleted) { throw AnalysisServerException.notFound("The specified regional analysis is unknown or has been deleted."); } + return analysis; + } + + /** Extract a particular percentile and cutoff of a regional analysis in one of several different raster formats. */ + private UrlWithHumanName getRegionalResults (Request req, Response res) throws IOException { + // It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there + // aren't any (only CSV, because origins are freeform). How should we determine whether this analysis is + // expected to have no gridded results and cleanly return a 404? + final String regionalAnalysisId = req.params("_id"); + FileStorageFormat format = FileStorageFormat.valueOf(req.params("format").toUpperCase()); + if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { + throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"geotiff\"."); + } + final UserPermissions userPermissions = UserPermissions.from(req); + RegionalAnalysis analysis = getAnalysis(regionalAnalysisId, userPermissions); // Which channel to extract from results with multiple values per origin (for different travel time cutoffs) // and multiple output files per analysis (for different percentiles of travel time and/or different @@ -366,7 +369,7 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I // For newer analyses that have multiple cutoffs, percentiles, or destination pointsets, these initial values // are coming from deprecated fields, are not meaningful and will be overwritten below from query parameters. int percentile = analysis.travelTimePercentile; - int cutoffIndex = 0; // For old single-cutoff outputs, we can still select the zeroth grid. + int cutoffMinutes = analysis.cutoffMinutes; String destinationPointSetId = analysis.grid; // Handle newer regional analyses with multiple cutoffs in an array. @@ -375,9 +378,8 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I if (analysis.cutoffsMinutes != null) { int nCutoffs = analysis.cutoffsMinutes.length; checkState(nCutoffs > 0, "Regional analysis has no cutoffs."); - int cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); - cutoffIndex = new TIntArrayList(analysis.cutoffsMinutes).indexOf(cutoffMinutes); - checkState(cutoffIndex >= 0, + cutoffMinutes = getIntQueryParameter(req, "cutoff", analysis.cutoffsMinutes[nCutoffs / 2]); + checkArgument(new TIntArrayList(analysis.cutoffsMinutes).contains(cutoffMinutes), "Travel time cutoff for this regional analysis must be taken from this list: (%s)", Ints.join(", ", analysis.cutoffsMinutes) ); @@ -413,21 +415,10 @@ private UrlWithHumanName getRegionalResults (Request req, Response res) throws I throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); } // Significant overhead here: UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. - FileStorageKey singleCutoffFileStorageKey = getSingleCutoffGrid( - analysis, cutoffIndex, destinationPointSetId, percentile, format - ); - // Create alternative human-readable filename. Similar to internal storage key name, but using a shortened - // ID for the destinations and the user-specified name for the project. This project name may contain invalid - // characters, but will be sanitized by getJsonUrl. We use the end of the dataset ID rather than the beginning - // because it's more likely to be different when several datasets are created in rapid succession. - String shortDestinationId = destinationPointSetId.substring( - destinationPointSetId.length() - 5, destinationPointSetId.length() - ); - int cutoffMinutes = - analysis.cutoffsMinutes != null ? analysis.cutoffsMinutes[cutoffIndex] : analysis.cutoffMinutes; - String rawHumanName = String.format("%s_%s_P%d_C%d", analysis.name, shortDestinationId, percentile, cutoffMinutes); + OpportunityDataset destinations = getDestinations(destinationPointSetId, userPermissions); + HumanKey gridKey = getSingleCutoffGrid(analysis, destinations, cutoffMinutes, percentile, format); res.type(APPLICATION_JSON.asString()); - return fileStorage.getJsonUrl(singleCutoffFileStorageKey, rawHumanName, format.extension.toLowerCase(Locale.ROOT)); + return fileStorage.getJsonUrl(gridKey.storageKey, gridKey.humanName); } private Object getCsvResults (Request req, Response res) { diff --git a/src/main/java/com/conveyal/file/FileStorage.java b/src/main/java/com/conveyal/file/FileStorage.java index 8c5ef0122..f9bd687cc 100644 --- a/src/main/java/com/conveyal/file/FileStorage.java +++ b/src/main/java/com/conveyal/file/FileStorage.java @@ -99,4 +99,10 @@ default UrlWithHumanName getJsonUrl (FileStorageKey key, String rawHumanName, St return UrlWithHumanName.fromCleanedName(url, rawHumanName, humanExtension); } + /** This assumes the humanFileName is already a complete filename (cleaned and truncated with any extension). */ + default UrlWithHumanName getJsonUrl (FileStorageKey key, String humanFileName) { + String url = this.getURL(key); + return new UrlWithHumanName(url, humanFileName); + } + } diff --git a/src/main/java/com/conveyal/file/UrlWithHumanName.java b/src/main/java/com/conveyal/file/UrlWithHumanName.java index 8cf447c75..f7a49c933 100644 --- a/src/main/java/com/conveyal/file/UrlWithHumanName.java +++ b/src/main/java/com/conveyal/file/UrlWithHumanName.java @@ -19,14 +19,19 @@ public UrlWithHumanName (String url, String humanName) { private static final int TRUNCATE_FILENAME_CHARS = 220; /** - * Given an arbitrary string, make it safe for use as a friendly human-readable filename. - * This can yield non-unique strings and is intended for files downloaded by end users that do not need to be - * machine-discoverable by unique IDs. + * Given an arbitrary string, make it safe for use in a friendly human-readable filename. This can yield non-unique + * strings and is intended for files downloaded by end users that do not need to be machine-discoverable by unique + * IDs. A length of up to 255 characters will work with most filesystems and within ZIP files. In all names we + * generate, the end of the name more uniquely identifies it (contains a fragment of a hex object ID or contains + * the distinguishing factors such as cutoff and percentile for files within a ZIP archive). Therefore, we truncate + * to a suffix rather than a prefix when the name is too long. We keep the length somewhat under 255 in case some + * other short suffix needs to be appended before use as a filename. + * Note that this will strip dot characters out of the string, so any dot and extension must be suffixed later. */ public static String filenameCleanString (String original) { String ret = original.replaceAll("\\W+", "_"); if (ret.length() > TRUNCATE_FILENAME_CHARS) { - ret = ret.substring(0, TRUNCATE_FILENAME_CHARS); + ret = ret.substring(ret.length() - TRUNCATE_FILENAME_CHARS, ret.length()); } return ret; }