Skip to content

Commit 914482c

Browse files
authored
Add ingest from S3 bucket support (#227)
1 parent d722363 commit 914482c

File tree

6 files changed

+404
-0
lines changed

6 files changed

+404
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package au.org.ala.images
2+
3+
import com.amazonaws.auth.AWSStaticCredentialsProvider
4+
import com.amazonaws.auth.BasicAWSCredentials
5+
import com.amazonaws.services.s3.AmazonS3ClientBuilder
6+
import com.google.common.io.ByteSource
7+
8+
/**
9+
* Accepts a URL in the form s3://bucketname/key and returns a ByteSource for the object in the bucket with the given key.
10+
*
11+
* Authentication is supported by providing the access key and secret key in the userInfo part of the URL in the form accessKey:secretKey
12+
* or by the default AWS credentials provider chain if no userinfo is provided.
13+
*
14+
* This is not intended to be used with image service managed objects but for ingesting images from S3.
15+
*/
16+
class S3ByteSource extends ByteSource {
17+
18+
private URI url
19+
20+
S3ByteSource(URI url) {
21+
if (url.scheme != 's3') {
22+
throw new IllegalArgumentException("URL scheme must be 's3' for S3ByteSource")
23+
}
24+
if (!url.path) {
25+
throw new IllegalArgumentException("URL path is required for S3ByteSource")
26+
}
27+
if (!url.host) {
28+
throw new IllegalArgumentException("URL host is required for S3ByteSource")
29+
}
30+
if (url.userInfo && !url.userInfo.contains(':')) {
31+
throw new IllegalArgumentException("URL userInfo must be in the form 'accessKey:secretKey'")
32+
}
33+
this.url = url
34+
}
35+
36+
@Override
37+
InputStream openStream() throws IOException {
38+
def client
39+
if (url.userInfo) {
40+
def parts = url.userInfo.split(':')
41+
client = AmazonS3ClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(parts[0], parts[1]))).build()
42+
} else {
43+
client = AmazonS3ClientBuilder.standard().build()
44+
}
45+
46+
def bucketname = url.host
47+
def key = url.path
48+
49+
// if (url.host.matches('s3\\.(.*\\.)?amazonaws\\.com')) {
50+
// bucketname = url.path.substring(1, url.path.indexOf('/', 1))
51+
// key = url.path.substring(url.path.indexOf('/', 1) + 1)
52+
// } else {
53+
// bucketname = url.host.substring(0, url.host.indexOf('.'))
54+
// key = url.path.substring(1)
55+
// }
56+
// def path = url.path
57+
// if (path.startsWith('/')) {
58+
// path = path.substring(1)
59+
// }
60+
return client.getObject(bucketname, key).getObjectContent()
61+
}
62+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
package au.org.ala.images;
2+
3+
import com.amazonaws.auth.AWSStaticCredentialsProvider;
4+
import com.amazonaws.auth.BasicAWSCredentials;
5+
import com.amazonaws.services.s3.AmazonS3;
6+
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
7+
import com.amazonaws.services.s3.model.S3Object;
8+
import org.apache.logging.log4j.util.Strings;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
import java.io.IOException;
13+
import java.io.InputStream;
14+
import java.net.URI;
15+
import java.net.URISyntaxException;
16+
import java.net.URL;
17+
import java.net.URLConnection;
18+
import java.util.regex.Pattern;
19+
20+
/**
21+
* Support for non standard s3:// URLs, the URL should be in the form
22+
* s3://accesskey:secretKey@bucketname/path
23+
*
24+
* The accesskey and secretKey are optional, if not provided the default credentials will be used.
25+
*/
26+
public class S3URLConnection extends URLConnection {
27+
28+
private static final Logger log = LoggerFactory.getLogger(S3URLConnection.class);
29+
30+
private S3Object object;
31+
32+
private String endpoint;
33+
34+
private boolean pathStyleAccessEnabled = false;
35+
36+
/**
37+
* Constructs a URL connection to the specified URL. A connection to
38+
* the object referenced by the URL is not created.
39+
*
40+
* @param url the specified URL.
41+
*/
42+
protected S3URLConnection(URL url) {
43+
super(url);
44+
45+
if (!url.getProtocol().equals("s3")) {
46+
throw new IllegalArgumentException("URL must use s3 protocol");
47+
}
48+
49+
if (Strings.isBlank(url.getHost())) {
50+
throw new IllegalArgumentException("URL must have a bucket name");
51+
}
52+
53+
// TODO support path based.
54+
//https://bucket-name.s3.region-code.amazonaws.com/key-name
55+
var host = url.getHost();
56+
if (!host.matches("(.*)\\.s3\\.(.*\\.)?amazonaws\\.com")) {
57+
throw new IllegalArgumentException("URL host must be in the form bucketname.s3.region-code.amazonaws.com");
58+
}
59+
60+
if (Strings.isBlank(url.getPath())) {
61+
throw new IllegalArgumentException("URL must have a key");
62+
}
63+
64+
if (url.getUserInfo() != null && url.getUserInfo().split(":").length != 2) {
65+
throw new IllegalArgumentException("URL user info must be in the form accesskey:secretKey");
66+
}
67+
}
68+
69+
@Override
70+
public void connect() throws IOException {
71+
AmazonS3 client;
72+
URI uri;
73+
74+
try {
75+
uri = url.toURI();
76+
} catch (URISyntaxException e) {
77+
throw new IOException(e);
78+
}
79+
80+
var host = uri.getHost();
81+
var pattern = Pattern.compile("(.*)\\.s3\\.(.*\\.)?amazonaws\\.com");
82+
83+
var matcher = pattern.matcher(host);
84+
String bucket = Strings.EMPTY;
85+
String region = Strings.EMPTY;
86+
if (matcher.find()) {
87+
var groupCount = matcher.groupCount();
88+
bucket = matcher.group(1);
89+
region = matcher.group(2);
90+
}
91+
if (!host.matches("(.*)\\.s3\\.(.*\\.)?amazonaws\\.com")) {
92+
throw new IllegalArgumentException("URL host must be in the form bucketname.s3.region-code.amazonaws.com");
93+
}
94+
95+
var builder = AmazonS3ClientBuilder.standard();
96+
if (Strings.isNotBlank(uri.getUserInfo())) {
97+
var parts = uri.getUserInfo().split(":");
98+
builder = builder.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(parts[0], parts[1])));
99+
}
100+
if (Strings.isNotBlank(region)) {
101+
if (Strings.isNotBlank(endpoint)) {
102+
builder = builder.withEndpointConfiguration(new AmazonS3ClientBuilder.EndpointConfiguration(endpoint, region));
103+
} else {
104+
builder = builder.withRegion(region);
105+
}
106+
}
107+
if (pathStyleAccessEnabled) {
108+
builder = builder.withPathStyleAccessEnabled(true);
109+
}
110+
client = builder.build();
111+
112+
113+
var key = uri.getPath();
114+
if (key.startsWith("/")) {
115+
key = key.substring(1);
116+
}
117+
118+
log.trace("Connecting to s3 bucket: {} key: {}", bucket, key);
119+
object = client.getObject(bucket, key);
120+
121+
// if (url.host.matches('s3\\.(.*\\.)?amazonaws\\.com')) {
122+
// bucketname = url.path.substring(1, url.path.indexOf('/', 1))
123+
// key = url.path.substring(url.path.indexOf('/', 1) + 1)
124+
// } else {
125+
// bucketname = url.host.substring(0, url.host.indexOf('.'))
126+
// key = url.path.substring(1)
127+
// }
128+
129+
}
130+
131+
public void disconnect() {
132+
if (object != null) {
133+
try {
134+
object.close();
135+
} catch (IOException e) { /* ignored */ }
136+
object = null;
137+
}
138+
}
139+
140+
private void ensureConnected() {
141+
if (object == null) {
142+
try {
143+
connect();
144+
} catch (IOException e) {
145+
throw new RuntimeException(e);
146+
}
147+
}
148+
}
149+
150+
@Override
151+
public String getContentEncoding() {
152+
ensureConnected();
153+
return object.getObjectMetadata().getContentEncoding();
154+
}
155+
156+
@Override
157+
public long getContentLengthLong() {
158+
ensureConnected();
159+
return object.getObjectMetadata().getContentLength();
160+
}
161+
162+
@Override
163+
public String getContentType() {
164+
ensureConnected();
165+
return object.getObjectMetadata().getContentType();
166+
}
167+
168+
@Override
169+
public long getExpiration() {
170+
ensureConnected();
171+
return object.getObjectMetadata().getExpirationTime().getTime();
172+
}
173+
174+
@Override
175+
public long getLastModified() {
176+
ensureConnected();
177+
return object.getObjectMetadata().getLastModified().getTime();
178+
}
179+
180+
@Override
181+
public InputStream getInputStream() throws IOException {
182+
ensureConnected();
183+
return object.getObjectContent();
184+
}
185+
186+
@Override
187+
public String getHeaderField(String name) {
188+
ensureConnected();
189+
return object.getObjectMetadata().getRawMetadataValue(name).toString();
190+
}
191+
192+
/**
193+
* For testing, allows overriding the default amazon s3 endpoint
194+
*
195+
* @return the endpoint
196+
*/
197+
public String getEndpoint() {
198+
return endpoint;
199+
}
200+
201+
/**
202+
* For testing, allows overriding the default amazon s3 endpoint
203+
*
204+
* @param endpoint the new endpoint to use, eg localstack.local
205+
*/
206+
public void setEndpoint(String endpoint) {
207+
this.endpoint = endpoint;
208+
}
209+
210+
public boolean isPathStyleAccessEnabled() {
211+
return pathStyleAccessEnabled;
212+
}
213+
214+
public void setPathStyleAccessEnabled(boolean pathStyleAccessEnabled) {
215+
this.pathStyleAccessEnabled = pathStyleAccessEnabled;
216+
}
217+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package au.org.ala.images;
2+
3+
import java.io.IOException;
4+
import java.net.URL;
5+
import java.net.URLConnection;
6+
import java.net.URLStreamHandler;
7+
8+
/**
9+
* Support s3:// URLs
10+
*/
11+
public class S3URLStreamHandler extends URLStreamHandler {
12+
13+
@Override
14+
protected URLConnection openConnection(URL u) throws IOException {
15+
return new S3URLConnection(u);
16+
}
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package au.org.ala.images;
2+
3+
import java.net.URLStreamHandler;
4+
import java.net.spi.URLStreamHandlerProvider;
5+
6+
/**
7+
* Support s3:// URLs
8+
*/
9+
public class S3URLStreamHandlerProvider extends URLStreamHandlerProvider {
10+
11+
@Override
12+
public URLStreamHandler createURLStreamHandler(String protocol) {
13+
if ("s3".equals(protocol)) {
14+
return new S3URLStreamHandler();
15+
}
16+
return null;
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
au.org.ala.images.S3URLStreamHandlerProvider

0 commit comments

Comments
 (0)