From 984153752a0040ea14ad98e44618fb27e12c72ca Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Fri, 14 Feb 2025 08:18:15 +0100 Subject: [PATCH 1/9] remove references to decrypted endpoint /s3-encrypted --- download/download.go | 2 +- download/download_test.go | 4 ++-- htsget/htsget_test.go | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/download/download.go b/download/download.go index 1ba2a073..05051777 100644 --- a/download/download.go +++ b/download/download.go @@ -401,7 +401,7 @@ func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (strin if pubKeyBase64 == "" { url = baseURL + "/files/" + datasetFiles[idx].FileID } else { - url = baseURL + "/s3-encrypted/" + dataset + "/" + filename + url = baseURL + "/s3/" + dataset + "/" + filename } return url, datasetFiles[idx].FilePath, nil diff --git a/download/download_test.go b/download/download_test.go index 4cad790f..3130c1e7 100644 --- a/download/download_test.go +++ b/download/download_test.go @@ -171,7 +171,7 @@ func (suite *TestSuite) TestDownloadUrl() { //----------------------------------------------- // Test using a nonempty public key // Test with valid base_url, token, dataset, and filename - expectedURL = baseURL + "/s3-encrypted/" + datasetID + "/" + filepath + expectedURL = baseURL + "/s3/" + datasetID + "/" + filepath pubKey := "test-public-key" url, _, err = getFileIDURL(baseURL, token, pubKey, datasetID, filepath) assert.NoError(suite.T(), err) @@ -179,7 +179,7 @@ func (suite *TestSuite) TestDownloadUrl() { // Test with url as dataset datasetID = "https://doi.example/another/url/001" - expectedURL = baseURL + "/s3-encrypted/" + datasetID + "/" + filepath + expectedURL = baseURL + "/s3/" + datasetID + "/" + filepath url, _, err = getFileIDURL(baseURL, token, pubKey, datasetID, filepath) assert.NoError(suite.T(), err) assert.Equal(suite.T(), expectedURL, url) diff --git a/htsget/htsget_test.go b/htsget/htsget_test.go index 32bb3bb7..b8af352a 100644 --- a/htsget/htsget_test.go +++ b/htsget/htsget_test.go @@ -134,7 +134,7 @@ KKj6NUcJGZ2/HeqkYbxm57ZaFLP5cIHsdK+0nQubFVs= "url": "data:;base64,Y3J5cHQ0Z2gBAAAAAgAAAA==" }, { - "url": "http://localhost/s3-encrypted/DATASET0001/htsnexus_test_NA12878.bam.c4gh", + "url": "http://localhost/s3/DATASET0001/htsnexus_test_NA12878.bam.c4gh", "headers": { "Range": "bytes=16-123", "accept-encoding": "gzip", @@ -148,7 +148,7 @@ KKj6NUcJGZ2/HeqkYbxm57ZaFLP5cIHsdK+0nQubFVs= "url": "data:;base64,ZAAAAAAAAAB7zX5e64IzHWf5/X8nkdCKpwsX0eT4/AHU77sh2+EdIXwkSEyPQ5ZP2+vRHvytn6H1hf63Wo7gPdDc59KZfz+10kjywPqQUXYOoSbeQ6cxx2dxmf2nSwSd2Wh1jA==" }, { - "url": "http://localhost/s3-encrypted/DATASET0001/htsnexus_test_NA12878.bam.c4gh", + "url": "http://localhost/s3/DATASET0001/htsnexus_test_NA12878.bam.c4gh", "headers": { "Range": "bytes=124-1049147", "accept-encoding": "gzip", @@ -159,7 +159,7 @@ KKj6NUcJGZ2/HeqkYbxm57ZaFLP5cIHsdK+0nQubFVs= } }, { - "url": "http://localhost/s3-encrypted/DATASET0001/htsnexus_test_NA12878.bam.c4gh", + "url": "http://localhost/s3/DATASET0001/htsnexus_test_NA12878.bam.c4gh", "headers": { "Range": "bytes=2557120-2598042", "accept-encoding": "gzip", From e48ce833b771bfa6e503bfc943b62ab37b9d9ded Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Fri, 14 Feb 2025 08:20:58 +0100 Subject: [PATCH 2/9] Revert "use last version that supports /s3-encrypted" This reverts commit cf2298453e1fbe67e0652d50f8ef41d91eab1a9e. --- testing/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/docker-compose.yml b/testing/docker-compose.yml index d6834ff3..e304e6e8 100644 --- a/testing/docker-compose.yml +++ b/testing/docker-compose.yml @@ -201,7 +201,7 @@ services: - GRPC_PORT=50051 - GRPC_HOST=reencrypt - APP_SERVEUNENCRYPTEDDATA=true - image: "ghcr.io/neicnordic/sensitive-data-archive:v0.3.179-download" #this is the last version that supports /s3-encrypted + image: "ghcr.io/neicnordic/sensitive-data-archive:${TAG}-download" volumes: - ./archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6:/tmp/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 mem_limit: 256m From f7d7992f43398374c088e59f6361391626a16c9c Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Fri, 14 Feb 2025 14:27:38 +0100 Subject: [PATCH 3/9] stop using files endpoint --- download/download.go | 8 +------- download/download_test.go | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/download/download.go b/download/download.go index 05051777..c705622d 100644 --- a/download/download.go +++ b/download/download.go @@ -396,13 +396,7 @@ func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (strin return "", "", fmt.Errorf("File not found in dataset %s", filename) } - var url string - // If no public key is provided, retrieve the unencrypted file - if pubKeyBase64 == "" { - url = baseURL + "/files/" + datasetFiles[idx].FileID - } else { - url = baseURL + "/s3/" + dataset + "/" + filename - } + url := baseURL + "/s3/" + dataset + "/" + filename return url, datasetFiles[idx].FilePath, nil } diff --git a/download/download_test.go b/download/download_test.go index 3130c1e7..b49eea78 100644 --- a/download/download_test.go +++ b/download/download_test.go @@ -138,7 +138,7 @@ func (suite *TestSuite) TestDownloadUrl() { token := suite.accessToken datasetID := "test-dataset" filepath := "path/to/file1" - expectedURL := "https://some/url/files/file1id" + expectedURL := "https://some/url/s3/test-dataset/path/to/file1.c4gh" //----------------------------------------------- // Test with an empty public key From 88e42a8ed3b7435ec36a4b52f76e4994988efe64 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 18 Feb 2025 13:29:13 +0100 Subject: [PATCH 4/9] make sure public user key is used --- download/download.go | 50 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/download/download.go b/download/download.go index c705622d..7a855c89 100644 --- a/download/download.go +++ b/download/download.go @@ -47,7 +47,8 @@ Required options: -url The url of the download server. Optional options: - -pubkey Encrypt downloaded files server-side using the specified public key. + -pubkey Key to use for encrypting downloaded files server-side. + This key must be given here or in the config file. -outdir Directory to save the downloaded files. If not specified, files will be saved in the current directory. -dataset Download all files in the dataset specified by '-dataset-id'. @@ -188,6 +189,10 @@ func datasetCase(token string) error { if err != nil { return err } + pubKeyBase64, err := getPublicKey64() + if err != nil { + return err + } // Loop through the files and download them for _, file := range files { // Download URL for the file @@ -218,6 +223,10 @@ func recursiveCase(token string) error { } dirPaths = append(dirPaths, path) } + pubKeyBase64, err := getPublicKey64() + if err != nil { + return err + } var missingPaths []string // Loop over all the files of the dataset and // check if the provided path is part of their filepath. @@ -269,15 +278,9 @@ func fileCase(token string, fileList bool) error { files = append(files, Args.Args()...) } - *pubKeyPath = strings.TrimSpace(*pubKeyPath) - var pubKeyBase64 string - if *pubKeyPath != "" { - // Read the public key - pubKey, err := os.ReadFile(*pubKeyPath) - if err != nil { - return fmt.Errorf("failed to read public key, reason: %v", err) - } - pubKeyBase64 = base64.StdEncoding.EncodeToString(pubKey) + pubKeyBase64, err := getPublicKey64() + if err != nil { + return err } // Loop through the files and download them @@ -504,3 +507,30 @@ func GetURLsFile(urlsFilePath string) (urlsList []string, err error) { return urlsList, scanner.Err() } + +func AnonymizeFilepath(filePath string) string { + filePathSplit := strings.Split(filePath, "/") + if strings.Contains(filePathSplit[0], "_") { + _, err := mail.ParseAddress(strings.ReplaceAll(filePathSplit[0], "_", "@")) + if err == nil { + filePath = strings.Join(filePathSplit[1:], "/") + } + } + + return filePath +} + +func getPublicKey64() (string, error) { + *pubKeyPath = strings.TrimSpace(*pubKeyPath) + var pubKeyBase64 string + if *pubKeyPath != "" { + // Read the public key + pubKey, err := os.ReadFile(*pubKeyPath) + if err != nil { + return "", fmt.Errorf("failed to read public key, reason: %v", err) + } + pubKeyBase64 = base64.StdEncoding.EncodeToString(pubKey) + } + + return pubKeyBase64, nil +} From 203bae050ab68c1ce0107c4411cd5186d11172f8 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 18 Feb 2025 13:31:17 +0100 Subject: [PATCH 5/9] stop using files endpoint --- download/download.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/download/download.go b/download/download.go index 7a855c89..b91d83ad 100644 --- a/download/download.go +++ b/download/download.go @@ -196,8 +196,12 @@ func datasetCase(token string) error { // Loop through the files and download them for _, file := range files { // Download URL for the file - fileURL := *URL + "/files/" + file.FileID - err = downloadFile(fileURL, token, "", file.FilePath) + fileName := AnonymizeFilepath(file.FilePath) + fileURL := *URL + "/s3/" + file.DatasetID + "/" + fileName + if err != nil { + return err + } + err = downloadFile(fileURL, token, pubKeyBase64, file.FilePath) if err != nil { return err } @@ -236,8 +240,9 @@ func recursiveCase(token string) error { for _, file := range files { if strings.Contains(file.FilePath, dirPath) { pathExists = true - fileURL := *URL + "/files/" + file.FileID - err = downloadFile(fileURL, token, "", file.FilePath) + fileName := AnonymizeFilepath(file.FilePath) + fileURL := *URL + "/s3/" + file.DatasetID + "/" + fileName + err = downloadFile(fileURL, token, pubKeyBase64, file.FilePath) if err != nil { return err } @@ -303,13 +308,7 @@ func fileCase(token string, fileList bool) error { func downloadFile(uri, token, pubKeyBase64, filePath string) error { // Check if the file path contains a userID and if it does, // do not keep it in the file path - filePathSplit := strings.Split(filePath, "/") - if strings.Contains(filePathSplit[0], "_") { - _, err := mail.ParseAddress(strings.ReplaceAll(filePathSplit[0], "_", "@")) - if err == nil { - filePath = strings.Join(filePathSplit[1:], "/") - } - } + filePath = AnonymizeFilepath(filePath) outFilename := filePath if *outDir != "" { @@ -399,9 +398,10 @@ func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (strin return "", "", fmt.Errorf("File not found in dataset %s", filename) } - url := baseURL + "/s3/" + dataset + "/" + filename + fileName := AnonymizeFilepath(datasetFiles[idx].FilePath) + url := baseURL + "/s3/" + dataset + "/" + fileName - return url, datasetFiles[idx].FilePath, nil + return url, fileName, nil } func GetDatasets(baseURL, token string) ([]string, error) { From 9903fe1a0333b939e5d3d236ad6ebfd15697fe5b Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 18 Feb 2025 13:33:33 +0100 Subject: [PATCH 6/9] integration tests: always work with encrypted data --- .github/integration/tests/40_download.sh | 43 +++++++++++++++--------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/.github/integration/tests/40_download.sh b/.github/integration/tests/40_download.sh index 1176c036..54668681 100755 --- a/.github/integration/tests/40_download.sh +++ b/.github/integration/tests/40_download.sh @@ -1,9 +1,18 @@ #!/bin/bash set -e +# Create a user key pair +if ( yes "" | ./sda-cli createKey user_key ) ; then + echo "Created a user key pair for downloading encrypted files" +else + echo "Failed to create a user key pair for downloading encrypted files" + exit 1 +fi + # Download file by using the sda-cli download command -./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh +./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh +C4GH_PASSWORD="" ./sda-cli decrypt -key user_key.sec.pem test-download/main/subfolder/dummy_data.c4gh # Check if file exists in the path if [ ! -f "test-download/main/subfolder/dummy_data" ]; then echo "Downloaded file not found" @@ -20,13 +29,13 @@ fi rm -r test-download # Download whole dataset by using the sda-cli download command -./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-dataset --dataset +./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-dataset --dataset filepaths="download-dataset/main/subfolder/dummy_data download-dataset/main/subfolder2/dummy_data2 download-dataset/main/subfolder2/random/dummy_data3" # Check if all the files of the dataset have been downloaded for filepath in $filepaths; do - if [ ! -f "$filepath" ]; then + if [ ! -f "$filepath.c4gh" ]; then echo "File $filepath does not exist" exit 1 fi @@ -34,13 +43,6 @@ done rm -r download-dataset -# Create a user key pair -if ( yes "" | ./sda-cli createKey user_key ) ; then - echo "Created a user key pair for downloading encrypted files" -else - echo "Failed to create a user key pair for downloading encrypted files" - exit 1 -fi # Download encrypted file by using the sda-cli download command ./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh @@ -69,13 +71,13 @@ fi # Download recursively a folder echo "Downloading content of folder" -./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-folder --recursive main/subfolder2 +./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-folder --recursive main/subfolder2 folderpaths="download-folder/main/subfolder2/dummy_data2 download-folder/main/subfolder2/random/dummy_data3" # Check if the content of the folder has been downloaded for folderpath in $folderpaths; do - if [ ! -f "$folderpath" ]; then + if [ ! -f "$folderpath.c4gh" ]; then echo "Content of folder $folderpath is missing" exit 1 fi @@ -84,14 +86,15 @@ done rm -r download-folder # Download dataset by providing the dataset id -./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-fileid urn:neic:001-001 +./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-fileid urn:neic:001-001 # Check if file exists in the path -if [ ! -f "download-fileid/main/subfolder/dummy_data" ]; then +if [ ! -f "download-fileid/main/subfolder/dummy_data.c4gh" ]; then echo "Downloaded file by using the file id not found" exit 1 fi +C4GH_PASSWORD="" ./sda-cli decrypt -key user_key.sec.pem download-fileid/main/subfolder/dummy_data.c4gh # Check the first line of the file first_line_id=$(head -n 1 download-fileid/main/subfolder/dummy_data) if [[ $first_line_id != *"THIS FILE IS JUST DUMMY DATA"* ]]; then @@ -103,10 +106,10 @@ rm -r download-fileid # Download the file paths content of a text file echo "Downloading content of a text file" -./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-from-file --from-file testing/file-list.txt +./sda-cli -config testing/s3cmd-download.conf download -pubkey user_key.pub.pem -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-from-file --from-file testing/file-list.txt # Check if the content of the text file has been downloaded -content_paths="download-from-file/main/subfolder/dummy_data download-from-file/main/subfolder2/dummy_data2" +content_paths="download-from-file/main/subfolder/dummy_data.c4gh download-from-file/main/subfolder2/dummy_data2.c4gh" for content_path in $content_paths; do if [ ! -f "$content_path" ]; then @@ -115,6 +118,7 @@ for content_path in $content_paths; do fi done +C4GH_PASSWORD="" ./sda-cli decrypt -key user_key.sec.pem download-from-file/main/subfolder/dummy_data.c4gh # Check the first line of the file first_line_file=$(head -n 1 download-from-file/main/subfolder/dummy_data) if [[ $first_line_file != *"THIS FILE IS JUST DUMMY DATA"* ]]; then @@ -122,7 +126,14 @@ if [[ $first_line_file != *"THIS FILE IS JUST DUMMY DATA"* ]]; then exit 1 fi +# Make sure files cannot be downloaded without giving a public key +if ./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh; then + echo "Downloaded a file without using a public key" + exit 1 +fi + rm -r download-from-file rm -r test-download + echo "Integration tests for sda-cli download finished successfully" \ No newline at end of file From a43e865ef546bef6eef38c879d803efec848fe0a Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 18 Feb 2025 13:55:00 +0100 Subject: [PATCH 7/9] tests: test getfileidurl with file id --- download/download_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/download/download_test.go b/download/download_test.go index b49eea78..a8f48614 100644 --- a/download/download_test.go +++ b/download/download_test.go @@ -170,10 +170,12 @@ func (suite *TestSuite) TestDownloadUrl() { //----------------------------------------------- // Test using a nonempty public key - // Test with valid base_url, token, dataset, and filename + // Test with valid base_url, token, dataset, and fileid + filepath = "path/to/file1.c4gh" + fileid := "file1id" expectedURL = baseURL + "/s3/" + datasetID + "/" + filepath pubKey := "test-public-key" - url, _, err = getFileIDURL(baseURL, token, pubKey, datasetID, filepath) + url, _, err = getFileIDURL(baseURL, token, pubKey, datasetID, fileid) assert.NoError(suite.T(), err) assert.Equal(suite.T(), expectedURL, url) From 38380aa9011097814b85d0ae40043084b13ac6ae Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 20 Feb 2025 15:34:07 +0100 Subject: [PATCH 8/9] tests: clarify output --- .github/integration/tests/40_download.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/integration/tests/40_download.sh b/.github/integration/tests/40_download.sh index 54668681..d6511dd6 100755 --- a/.github/integration/tests/40_download.sh +++ b/.github/integration/tests/40_download.sh @@ -128,8 +128,10 @@ fi # Make sure files cannot be downloaded without giving a public key if ./sda-cli -config testing/s3cmd-download.conf download -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh; then - echo "Downloaded a file without using a public key" - exit 1 + echo "Downloaded a file without using a public key" + exit 1 +else + echo "Error expected, continue." fi rm -r download-from-file From 43e6ae2ac1076cc7596b5ff643ee4f5f39e18023 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 20 Feb 2025 15:36:38 +0100 Subject: [PATCH 9/9] refactor: move helper functions to helper module --- download/download.go | 56 ++++++++------------------------------------ helpers/helpers.go | 33 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 46 deletions(-) diff --git a/download/download.go b/download/download.go index b91d83ad..727056fa 100644 --- a/download/download.go +++ b/download/download.go @@ -2,14 +2,12 @@ package download import ( "bufio" - "encoding/base64" "encoding/json" "errors" "flag" "fmt" "io" "net/http" - "net/mail" "net/url" "os" "path/filepath" @@ -78,6 +76,8 @@ var recursiveDownload = Args.Bool("recursive", false, "Download content of the f var fromFile = Args.Bool("from-file", false, "Download files from file list.") +var pubKeyBase64 string + // necessary for mocking in testing var getResponseBody = getBody @@ -149,6 +149,10 @@ func Download(args []string, configPath string) error { if err != nil { return err } + pubKeyBase64, err = helpers.GetPublicKey64(pubKeyPath) + if err != nil { + return err + } switch { // Case where the user is setting the -dataset flag @@ -189,14 +193,10 @@ func datasetCase(token string) error { if err != nil { return err } - pubKeyBase64, err := getPublicKey64() - if err != nil { - return err - } // Loop through the files and download them for _, file := range files { // Download URL for the file - fileName := AnonymizeFilepath(file.FilePath) + fileName := helpers.AnonymizeFilepath(file.FilePath) fileURL := *URL + "/s3/" + file.DatasetID + "/" + fileName if err != nil { return err @@ -227,10 +227,6 @@ func recursiveCase(token string) error { } dirPaths = append(dirPaths, path) } - pubKeyBase64, err := getPublicKey64() - if err != nil { - return err - } var missingPaths []string // Loop over all the files of the dataset and // check if the provided path is part of their filepath. @@ -240,7 +236,7 @@ func recursiveCase(token string) error { for _, file := range files { if strings.Contains(file.FilePath, dirPath) { pathExists = true - fileName := AnonymizeFilepath(file.FilePath) + fileName := helpers.AnonymizeFilepath(file.FilePath) fileURL := *URL + "/s3/" + file.DatasetID + "/" + fileName err = downloadFile(fileURL, token, pubKeyBase64, file.FilePath) if err != nil { @@ -283,11 +279,6 @@ func fileCase(token string, fileList bool) error { files = append(files, Args.Args()...) } - pubKeyBase64, err := getPublicKey64() - if err != nil { - return err - } - // Loop through the files and download them for _, filePath := range files { fileIDURL, apiFilePath, err := getFileIDURL(*URL, token, pubKeyBase64, *datasetID, filePath) @@ -308,7 +299,7 @@ func fileCase(token string, fileList bool) error { func downloadFile(uri, token, pubKeyBase64, filePath string) error { // Check if the file path contains a userID and if it does, // do not keep it in the file path - filePath = AnonymizeFilepath(filePath) + filePath = helpers.AnonymizeFilepath(filePath) outFilename := filePath if *outDir != "" { @@ -398,7 +389,7 @@ func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (strin return "", "", fmt.Errorf("File not found in dataset %s", filename) } - fileName := AnonymizeFilepath(datasetFiles[idx].FilePath) + fileName := helpers.AnonymizeFilepath(datasetFiles[idx].FilePath) url := baseURL + "/s3/" + dataset + "/" + fileName return url, fileName, nil @@ -507,30 +498,3 @@ func GetURLsFile(urlsFilePath string) (urlsList []string, err error) { return urlsList, scanner.Err() } - -func AnonymizeFilepath(filePath string) string { - filePathSplit := strings.Split(filePath, "/") - if strings.Contains(filePathSplit[0], "_") { - _, err := mail.ParseAddress(strings.ReplaceAll(filePathSplit[0], "_", "@")) - if err == nil { - filePath = strings.Join(filePathSplit[1:], "/") - } - } - - return filePath -} - -func getPublicKey64() (string, error) { - *pubKeyPath = strings.TrimSpace(*pubKeyPath) - var pubKeyBase64 string - if *pubKeyPath != "" { - // Read the public key - pubKey, err := os.ReadFile(*pubKeyPath) - if err != nil { - return "", fmt.Errorf("failed to read public key, reason: %v", err) - } - pubKeyBase64 = base64.StdEncoding.EncodeToString(pubKey) - } - - return pubKeyBase64, nil -} diff --git a/helpers/helpers.go b/helpers/helpers.go index 0e446ab2..86678185 100644 --- a/helpers/helpers.go +++ b/helpers/helpers.go @@ -1,12 +1,14 @@ package helpers import ( + "encoding/base64" "encoding/json" "encoding/xml" "errors" "flag" "fmt" "io" + "net/mail" "os" "path/filepath" "regexp" @@ -473,3 +475,34 @@ func CheckValidChars(filename string) error { return nil } + +// AnonymizeFilepath checks if the filepath has a prefixed user ID +// strips that, and then returns the filepath +func AnonymizeFilepath(filePath string) string { + filePathSplit := strings.Split(filePath, "/") + if strings.Contains(filePathSplit[0], "_") { + // prefixed user IDs are email adresses with '@' replaced by '_' + _, err := mail.ParseAddress(strings.ReplaceAll(filePathSplit[0], "_", "@")) + if err == nil { + filePath = strings.Join(filePathSplit[1:], "/") + } + } + + return filePath +} + +// Reads the public key and encodes it in base64 +func GetPublicKey64(pubKeyPath *string) (string, error) { + *pubKeyPath = strings.TrimSpace(*pubKeyPath) + var pubKeyBase64 string + if *pubKeyPath != "" { + // Read the public key + pubKey, err := os.ReadFile(*pubKeyPath) + if err != nil { + return "", fmt.Errorf("failed to read public key, reason: %v", err) + } + pubKeyBase64 = base64.StdEncoding.EncodeToString(pubKey) + } + + return pubKeyBase64, nil +}