Skip to content

Commit 757058b

Browse files
Bugfix in URI conversion when working with directories (deephaven#5493)
1 parent cdd0e08 commit 757058b

File tree

5 files changed

+101
-23
lines changed

5 files changed

+101
-23
lines changed

Base/src/main/java/io/deephaven/base/FileUtils.java

+30-19
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ public boolean accept(File dir, String name) {
3939

4040
public static final Pattern REPEATED_URI_SEPARATOR_PATTERN = Pattern.compile("//+");
4141

42+
public static final String FILE_URI_SCHEME = "file";
43+
4244
/**
4345
* Cleans the specified path. All files and subdirectories in the path will be deleted. (ie you'll be left with an
4446
* empty directory).
@@ -82,7 +84,7 @@ public static void deleteRecursively(File file) {
8284
/**
8385
* Move files accepted by a filter from their relative path under source to the same relative path under
8486
* destination. Creates missing destination subdirectories as needed.
85-
*
87+
*
8688
* @param source Must be a directory.
8789
* @param destination Must be a directory if it exists.
8890
* @param filter Applied to normal files, only. We recurse on directories automatically.
@@ -129,7 +131,7 @@ private static void moveRecursivelyInternal(File source, File destination, FileF
129131

130132
/**
131133
* Recursive delete method that copes with .nfs files. Uses the file's parent as the trash directory.
132-
*
134+
*
133135
* @param file
134136
*/
135137
public static void deleteRecursivelyOnNFS(File file) {
@@ -138,7 +140,7 @@ public static void deleteRecursivelyOnNFS(File file) {
138140

139141
/**
140142
* Recursive delete method that copes with .nfs files.
141-
*
143+
*
142144
* @param trashFile Filename to move regular files to before deletion. .nfs files may be created in its parent
143145
* directory.
144146
* @param fileToBeDeleted File or directory at which to begin recursive deletion.
@@ -169,7 +171,7 @@ public static void deleteRecursivelyOnNFS(final File trashFile, final File fileT
169171

170172
/**
171173
* Scan directory recursively to find all files
172-
*
174+
*
173175
* @param dir
174176
* @return
175177
*/
@@ -282,21 +284,36 @@ public static URI convertToURI(final String source, final boolean isDirectory) {
282284
URI uri;
283285
try {
284286
uri = new URI(source);
287+
if (uri.getScheme() == null) {
288+
// Convert to a "file" URI
289+
return convertToURI(new File(source), isDirectory);
290+
}
291+
if (uri.getScheme().equals(FILE_URI_SCHEME)) {
292+
return convertToURI(new File(uri), isDirectory);
293+
}
294+
String path = uri.getPath();
295+
final boolean endsWithSlash = path.charAt(path.length() - 1) == URI_SEPARATOR_CHAR;
296+
if (!isDirectory && endsWithSlash) {
297+
throw new IllegalArgumentException("Non-directory URI should not end with a slash: " + uri);
298+
}
299+
boolean isUpdated = false;
300+
if (isDirectory && !endsWithSlash) {
301+
path = path + URI_SEPARATOR_CHAR;
302+
isUpdated = true;
303+
}
285304
// Replace two or more consecutive slashes in the path with a single slash
286-
final String path = uri.getPath();
287305
if (path.contains(REPEATED_URI_SEPARATOR)) {
288-
final String canonicalizedPath = REPEATED_URI_SEPARATOR_PATTERN.matcher(path).replaceAll(URI_SEPARATOR);
289-
uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), canonicalizedPath,
290-
uri.getQuery(), uri.getFragment());
306+
path = REPEATED_URI_SEPARATOR_PATTERN.matcher(path).replaceAll(URI_SEPARATOR);
307+
isUpdated = true;
308+
}
309+
if (isUpdated) {
310+
uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), path, uri.getQuery(),
311+
uri.getFragment());
291312
}
292313
} catch (final URISyntaxException e) {
293314
// If the URI is invalid, assume it's a file path
294315
return convertToURI(new File(source), isDirectory);
295316
}
296-
if (uri.getScheme() == null) {
297-
// Convert to a "file" URI
298-
return convertToURI(new File(source), isDirectory);
299-
}
300317
return uri;
301318
}
302319

@@ -314,17 +331,11 @@ public static URI convertToURI(final File file, final boolean isDirectory) {
314331
if (File.separatorChar != URI_SEPARATOR_CHAR) {
315332
absPath = absPath.replace(File.separatorChar, URI_SEPARATOR_CHAR);
316333
}
317-
if (absPath.charAt(0) != URI_SEPARATOR_CHAR) {
318-
absPath = URI_SEPARATOR_CHAR + absPath;
319-
}
320334
if (isDirectory && absPath.charAt(absPath.length() - 1) != URI_SEPARATOR_CHAR) {
321335
absPath = absPath + URI_SEPARATOR_CHAR;
322336
}
323-
if (absPath.startsWith(REPEATED_URI_SEPARATOR)) {
324-
absPath = REPEATED_URI_SEPARATOR + absPath;
325-
}
326337
try {
327-
return new URI("file", null, absPath, null);
338+
return new URI(FILE_URI_SCHEME, null, absPath, null);
328339
} catch (final URISyntaxException e) {
329340
throw new IllegalStateException("Failed to convert file to URI: " + file, e);
330341
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//
2+
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
3+
//
4+
package io.deephaven.base;
5+
6+
import java.io.File;
7+
import java.io.IOException;
8+
import java.net.URISyntaxException;
9+
import java.nio.file.Path;
10+
11+
import junit.framework.TestCase;
12+
import org.junit.Assert;
13+
14+
public class FileUtilsTest extends TestCase {
15+
16+
public void testConvertToFileURI() throws IOException {
17+
final File currentDir = new File("").getAbsoluteFile();
18+
fileUriTestHelper(currentDir.toString(), true, currentDir.toURI().toString());
19+
20+
final File someFile = new File(currentDir, "tempFile");
21+
fileUriTestHelper(someFile.getPath(), false, someFile.toURI().toString());
22+
23+
// Check if trailing slash gets added for a directory
24+
final String expectedDirURI = "file:" + currentDir.getPath() + "/path/to/directory/";
25+
fileUriTestHelper(currentDir.getPath() + "/path/to/directory", true, expectedDirURI);
26+
27+
// Check if multiple slashes get normalized
28+
fileUriTestHelper(currentDir.getPath() + "////path///to////directory////", true, expectedDirURI);
29+
30+
// Check if multiple slashes in the beginning get normalized
31+
fileUriTestHelper("////" + currentDir.getPath() + "/path/to/directory", true, expectedDirURI);
32+
33+
// Check for bad inputs for files with trailing slashes
34+
final String expectedFileURI = someFile.toURI().toString();
35+
fileUriTestHelper(someFile.getPath() + "/", false, expectedFileURI);
36+
Assert.assertEquals(expectedFileURI,
37+
FileUtils.convertToURI("file:" + someFile.getPath() + "/", false).toString());
38+
}
39+
40+
private static void fileUriTestHelper(final String filePath, final boolean isDirectory,
41+
final String expectedURIString) {
42+
Assert.assertEquals(expectedURIString, FileUtils.convertToURI(filePath, isDirectory).toString());
43+
Assert.assertEquals(expectedURIString, FileUtils.convertToURI(new File(filePath), isDirectory).toString());
44+
Assert.assertEquals(expectedURIString, FileUtils.convertToURI(Path.of(filePath), isDirectory).toString());
45+
}
46+
47+
public void testConvertToS3URI() throws URISyntaxException {
48+
Assert.assertEquals("s3://bucket/key", FileUtils.convertToURI("s3://bucket/key", false).toString());
49+
50+
// Check if trailing slash gets added for a directory
51+
Assert.assertEquals("s3://bucket/key/".toString(), FileUtils.convertToURI("s3://bucket/key", true).toString());
52+
53+
// Check if multiple slashes get normalized
54+
Assert.assertEquals("s3://bucket/key/", FileUtils.convertToURI("s3://bucket///key///", true).toString());
55+
56+
try {
57+
FileUtils.convertToURI("", false);
58+
Assert.fail("Expected IllegalArgumentException");
59+
} catch (IllegalArgumentException expected) {
60+
}
61+
62+
try {
63+
FileUtils.convertToURI("s3://bucket/key/", false);
64+
Assert.fail("Expected IllegalArgumentException");
65+
} catch (IllegalArgumentException expected) {
66+
}
67+
}
68+
}

extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.apache.parquet.schema.MessageType;
1919
import org.jetbrains.annotations.NotNull;
2020

21-
import java.io.File;
2221
import java.io.IOException;
2322
import java.util.ArrayList;
2423
import java.util.HashMap;

extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
2828
import java.util.stream.Stream;
2929

30-
import static io.deephaven.extensions.trackedfile.TrackedSeekableChannelsProviderPlugin.FILE_URI_SCHEME;
30+
import static io.deephaven.base.FileUtils.FILE_URI_SCHEME;
3131

3232
/**
3333
* {@link SeekableChannelsProvider} implementation that is constrained by a Deephaven {@link TrackedFileHandleFactory}.

extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProviderPlugin.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212

1313
import java.net.URI;
1414

15+
import static io.deephaven.base.FileUtils.FILE_URI_SCHEME;
16+
1517
/**
1618
* {@link SeekableChannelsProviderPlugin} implementation used for reading files from local disk.
1719
*/
1820
@AutoService(SeekableChannelsProviderPlugin.class)
1921
public final class TrackedSeekableChannelsProviderPlugin implements SeekableChannelsProviderPlugin {
2022

21-
static final String FILE_URI_SCHEME = "file";
22-
2323
@Override
2424
public boolean isCompatible(@NotNull final URI uri, @Nullable final Object object) {
2525
return FILE_URI_SCHEME.equals(uri.getScheme());

0 commit comments

Comments
 (0)