Skip to content

Commit 9d8bb4f

Browse files
committed
CMR-7108: Changed S3 link validation to only allow lowercase s3://
1 parent ae102ce commit 9d8bb4f

File tree

7 files changed

+20
-17
lines changed

7 files changed

+20
-17
lines changed

ingest-app/docs/api.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,7 @@ supported metadata formats:
11011101
- S3 url in OnlineAccessURLs for ECHO10 format
11021102
- S3 url in RelatedUrls for UMM-G format
11031103

1104-
The S3 url value provided in the granule bulk update request can be comma-separated urls. Each url must start with s3:// (case-insensitive). During bulk update, the provided S3 urls in the request will overwrite any existing S3 links already in the granule metadata.
1104+
The S3 url value provided in the granule bulk update request can be comma-separated urls. Each url must start with s3:// (case-sensitive). This lowercase s3:// naming convention is to make the s3 links compatible with AWS S3 API. During bulk update, the provided S3 urls in the request will overwrite any existing S3 links already in the granule metadata.
11051105

11061106
Example: Add/update OPeNDAP url for 3 granules under PROV1.
11071107

ingest-app/src/cmr/ingest/services/granule_bulk_update/s3/s3_util.clj

+2-3
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,12 @@
1010

1111
(defn validate-url
1212
"Validate the given S3 url for granule bulk update. It can be multiple urls
13-
separated by comma, and each url must be started with s3:// (case insensitive).
13+
separated by comma, and each url must be started with s3:// (case sensitive).
1414
Returns the parsed urls in a list."
1515
[input-url]
1616
(let [urls (map string/trim (string/split input-url #","))]
1717
(doseq [url urls]
18-
(when-not (or (string/starts-with? url "s3://")
19-
(string/starts-with? url "S3://"))
18+
(when-not (string/starts-with? url "s3://")
2019
(errors/throw-service-errors
2120
:invalid-data
2221
[(str "Invalid URL value, each S3 url must start with s3://, but was " url)])))

ingest-app/test/cmr/ingest/services/granule_bulk_update/s3/s3_util_test.clj

+9-5
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,22 @@
66

77
(deftest validate-url-test
88
(testing "validate url"
9-
(is (= ["s3://abc/foo" "S3://abc/bar"]
10-
(s3-util/validate-url "s3://abc/foo, S3://abc/bar"))))
9+
(is (= ["s3://abc/foo" "s3://abc/bar"]
10+
(s3-util/validate-url "s3://abc/foo, s3://abc/bar"))))
1111

1212
(testing "validate url error scenarios"
1313
(are3 [url-value re]
1414
(is (thrown-with-msg?
1515
Exception re (s3-util/validate-url url-value)))
1616

1717
"invalid s3 link"
18-
"http://example.com/foo"
19-
#"Invalid URL value, each S3 url must start with s3://, but was http://example.com/foo"
18+
"S3://abc/foo"
19+
#"Invalid URL value, each S3 url must start with s3://, but was S3://abc/foo"
2020

2121
"invalid s3 link in multiple urls"
2222
"s3://abc/foo,http://example.com/bar,s3://abc/baz"
23-
#"Invalid URL value, each S3 url must start with s3://, but was http://example.com/bar")))
23+
#"Invalid URL value, each S3 url must start with s3://, but was http://example.com/bar"
24+
25+
"invalid s3 links in multiple urls, only report the first error"
26+
"S3://abc/foo,http://example.com/bar,s3://abc/baz"
27+
#"Invalid URL value, each S3 url must start with s3://, but was S3://abc/foo")))

system-int-test/test/cmr/system_int_test/ingest/granule_bulk_update/granule_bulk_update_test.clj

+4-4
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@
163163
:operation "UPDATE_FIELD"
164164
:update-field "S3Link"
165165
:updates [["SC:AE_5DSno.002:30500511" "s3://url30500511"]
166-
["SC:AE_5DSno.002:30500512" "S3://url1, S3://url2,S3://url3"]
166+
["SC:AE_5DSno.002:30500512" "s3://url1, s3://url2,s3://url3"]
167167
["SC:AE_5DSno.002:30500514" "s3://url30500514"]]}
168168
response (ingest/bulk-update-granules "PROV1" bulk-update bulk-update-options)
169169
{:keys [status task-id]} response]
@@ -207,7 +207,7 @@
207207
(let [bulk-update {:name "add s3 links"
208208
:operation "UPDATE_FIELD"
209209
:update-field "S3Link"
210-
:updates [["SC:AE_5DSno.002:30500511" "S3://url30500511"]
210+
:updates [["SC:AE_5DSno.002:30500511" "s3://url30500511"]
211211
["SC:AE_5DSno.002:30500512" "s3://url30500512"]
212212
["SC:non-existent-ur" "s3://url30500513"]]}
213213
response (ingest/bulk-update-granules "PROV1" bulk-update bulk-update-options)
@@ -236,7 +236,7 @@
236236
:operation "UPDATE_FIELD"
237237
:update-field "S3Link"
238238
:updates [["SC:AE_5DSno.002:30500511" "https://foo"]
239-
["SC:AE_5DSno.002:30500512" "s3://foo,https://bar"]]}
239+
["SC:AE_5DSno.002:30500512" "s3://foo,S3://bar"]]}
240240
response (ingest/bulk-update-granules "PROV1" bulk-update bulk-update-options)
241241
{:keys [status task-id]} response]
242242
(index/wait-until-indexed)
@@ -253,7 +253,7 @@
253253
:status-message "Invalid URL value, each S3 url must start with s3://, but was https://foo"}
254254
{:granule-ur "SC:AE_5DSno.002:30500512"
255255
:status "FAILED"
256-
:status-message "Invalid URL value, each S3 url must start with s3://, but was https://bar"}]
256+
:status-message "Invalid URL value, each S3 url must start with s3://, but was S3://bar"}]
257257
granule-statuses))))))))
258258

259259
(deftest add-opendap-url

umm-spec-lib/resources/example-data/umm-json/granule/v1.6/GranuleExample.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@
251251
"Size": 10,
252252
"SizeUnit": "MB"
253253
}, {
254-
"URL": "S3://aws.com/asdf/asdf/dataproduct.nc",
254+
"URL": "s3://aws.com/asdf/asdf/dataproduct.nc",
255255
"Type": "GET DATA VIA DIRECT ACCESS",
256256
"Description": "ISLSCP II EARTH RADIATION BUDGET EXPERIMENT (ERBE) MONTHLY ALBEDO, 1986-1990",
257257
"Format": "NETCDF-4",

umm-spec-lib/src/cmr/umm_spec/test/umm_g/expected_util.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@
194194
:size 10})
195195
(umm-c/map->RelatedURL
196196
{:type "GET DATA VIA DIRECT ACCESS"
197-
:url "S3://aws.com/asdf/asdf/dataproduct.nc"
197+
:url "s3://aws.com/asdf/asdf/dataproduct.nc"
198198
:description "ISLSCP II EARTH RADIATION BUDGET EXPERIMENT (ERBE) MONTHLY ALBEDO, 1986-1990"
199199
:mime-type "application/x-netcdf"
200200
:title "ISLSCP II EARTH RADIATION BUDGET EXPERIMENT (ERBE) MONTHLY ALBEDO, 1986-1990"

umm-spec-lib/test/cmr/umm_spec/test/migration/version/granule.clj

+2-2
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@
595595
:Type "GET DATA"
596596
:Subtype "OPENDAP DATA"
597597
:MimeType "application/x-hdf5"}
598-
{:URL "S3://amazon.something.com/get-data"
598+
{:URL "s3://amazon.something.com/get-data"
599599
:Type "GET DATA"
600600
:Format "NETCDF-4"
601601
:MimeType "application/x-netcdf"}]})
@@ -616,7 +616,7 @@
616616
:Type "GET DATA"
617617
:Subtype "OPENDAP DATA"
618618
:MimeType "application/x-hdf5"}
619-
{:URL "S3://amazon.something.com/get-data"
619+
{:URL "s3://amazon.something.com/get-data"
620620
:Type "GET DATA VIA DIRECT ACCESS"
621621
:Format "NETCDF-4"
622622
:MimeType "application/x-netcdf"}]})

0 commit comments

Comments
 (0)