diff --git a/.github/integration/setup/setup.sh b/.github/integration/setup/setup.sh index 9af5f2d0..e39ce90a 100755 --- a/.github/integration/setup/setup.sh +++ b/.github/integration/setup/setup.sh @@ -16,9 +16,29 @@ if [ ! -f "dummy.ega.nbis.se.pem" ]; then chmod 644 keys/dummy.ega.nbis.se.pub dummy.ega.nbis.se.pem fi +cp s3cmd-template.conf s3cmd.conf output=$(python sign_jwt.py) echo "access_token=$output" >> s3cmd.conf +# Create crypt4gh keys for testing the download service +cat << EOF > c4gh.pub.pem +-----BEGIN CRYPT4GH PUBLIC KEY----- +avFAerx0ZWuJE6fTI8S/0wv3yMo1n3SuNTV6zvKdxQc= +-----END CRYPT4GH PUBLIC KEY----- +EOF + +chmod 444 c4gh.pub.pem + +cat << EOF > c4gh.sec.pem +-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY----- +YzRnaC12MQAGc2NyeXB0ABQAAAAAwAs5mVkXda50vqeYv6tbkQARY2hhY2hhMjBf +cG9seTEzMDUAPAd46aTuoVWAe+fMGl3VocCKCCWmgFUsFIHejJoWxNwy62c1L/Vc +R9haQsAPfJMLJSvUXStJ04cyZnDHSw== +-----END CRYPT4GH ENCRYPTED PRIVATE KEY----- +EOF + +chmod 444 c4gh.sec.pem + # get latest image tag for s3inbox latest_tag=$(curl -s https://api.github.com/repos/neicnordic/sensitive-data-archive/tags | jq -r '.[0].name') @@ -66,4 +86,77 @@ do echo "waiting for buckets to be created" sleep 10 done +# Populate database with for testing the download service +# Insert entry in sda.files +file_id=$(docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.files (stable_id, submission_user, \ + submission_file_path, submission_file_size, archive_file_path, \ + archive_file_size, decrypted_file_size, backup_path, header, \ + encryption_method) VALUES ('urn:neic:001-002', 'integration-test', '5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8_elixir-europe.org/main/subfolder/dummy_data.c4gh', \ + 1048729, '4293c9a7-dc50-46db-b79a-27ddc0dad1c6', 1049081, 1048605, \ + '', '637279707434676801000000010000006c000000000000006af1407abc74656b8913a7d323c4bfd30bf7c8ca359f74ae35357acef29dc5073799e207ec5d022b2601340585ff082565e55fbff5b6cdbbbe6b12a0d0a19ef325a219f8b62344325e22c8d26a8e82e45f053f4dcee10c0ec4bb9e466d5253f139dcd4be', 'CRYPT4GH') RETURNING id;" | xargs) + +if [ -z "$file_id" ]; then + echo "Failed to insert file entry into database" + exit 1 +fi + +# Insert entry in sda.file_event_log +docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.file_event_log (file_id, event) \ + VALUES ('$file_id', 'ready');" + +# Insert entries in sda.checksums +docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \ + VALUES ('$file_id', '06bb0a514b26497b4b41b30c547ad51d059d57fb7523eb3763cfc82fdb4d8fb7', 'SHA256', 'UNENCRYPTED');" + +docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \ + VALUES ('$file_id', '5e9c767958cc3f6e8d16512b8b8dcab855ad1e04e05798b86f50ef600e137578', 'SHA256', 'UPLOADED');" + +docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \ + VALUES ('$file_id', '74820dbcf9d30f8ccd1ea59c17d5ec8a714aabc065ae04e46ad82fcf300a731e', 'SHA256', 'ARCHIVED');" + +# Insert dataset in sda.datasets +dataset_id=$(docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.datasets (stable_id) VALUES ('https://doi.example/ty009.sfrrss/600.45asasga') \ + ON CONFLICT (stable_id) DO UPDATE \ + SET stable_id=excluded.stable_id RETURNING id;") + +if [ -z "$dataset_id" ]; then + echo "Failed to insert dataset entry into database" + exit 1 +fi + +# Add file to dataset +docker run --rm --name client --network testing_default \ + neicnordic/pg-client:latest \ + postgresql://postgres:rootpasswd@postgres:5432/sda \ + -t -q -c "INSERT INTO sda.file_dataset (file_id, dataset_id) \ + VALUES ('$file_id', $dataset_id);" + +# Add file to archive +s3cmd -c directS3 put archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 s3://archive/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 + +# Get the correct token form mockoidc +token=$(curl "http://localhost:8002/tokens" | jq -r '.[0]') + +# Create s3cmd-download.conf file for download +cp s3cmd-template.conf s3cmd-download.conf +echo "access_token=$token" >> s3cmd-download.conf + docker ps diff --git a/.github/integration/tests/tests.sh b/.github/integration/tests/tests.sh index 92b97138..bac829e4 100755 --- a/.github/integration/tests/tests.sh +++ b/.github/integration/tests/tests.sh @@ -321,4 +321,22 @@ fi rm -r downloads +# Download file by using the sda download service +./sda-cli sda-download -config testing/s3cmd-download.conf -dataset https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh + +# check if file exists in the path +if [ ! -f "test-download/main/subfolder/dummy_data" ]; then + echo "Downloaded file not found" + exit 1 +fi + +# check the first line of that file +first_line=$(head -n 1 test-download/main/subfolder/dummy_data) +if [[ $first_line != *"THIS FILE IS JUST DUMMY DATA"* ]]; then + echo "First line does not contain the expected string" + exit 1 +fi + +rm -r test-download + echo "Integration test finished successfully" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 01999d3c..5c5d072e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -28,7 +28,7 @@ jobs: fi - name: Run GoReleaser - uses: goreleaser/goreleaser-action@v5.1.0 + uses: goreleaser/goreleaser-action@v6.0.0 with: version: latest args: release --rm-dist diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1646d80a..1aae81d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,7 @@ jobs: run: go test -v -coverprofile=coverage -covermode=atomic ./... - name: Codecov - uses: codecov/codecov-action@v4.4.1 + uses: codecov/codecov-action@v4.5.0 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage diff --git a/README.md b/README.md index fc6db4ed..267bd46e 100644 --- a/README.md +++ b/README.md @@ -174,9 +174,14 @@ If no config is given by the user, the tool will look for a previous login from ## Download -The SDA/BP archive enables for downloading files and datasets in a secure manner. That can be achieved using the `sda-cli` tool and the process consists of the following two steps +The SDA/BP archive enables for downloading files and datasets in a secure manner. That can be achieved using the `sda-cli` tool and and it can be done in two ways: +- by downloading from a S3 bucket (`./sda-cli download`) +- by using the download API (`./sda-cli sda-download`) -### Create keys +### Download from S3 bucket +This process consists of the following two steps: create keys and downloading the file. These steps are explained in the following sections. + +#### Create keys In order to make sure that the files are downloaded from the archive in a secure manner, the user is supposed to create the key pair that the files will be encrypted with. The key pair can be created using the following command: ```bash @@ -186,7 +191,7 @@ where `` is the base name of the key files. This command will crea **NOTE:** Make sure to keep these keys safe. Losing the keys could lead to sensitive data leaks. -### Download file +#### Download file The `sda-cli` tool allows for downloading file(s)/datasets. The URLs of the respective dataset files that are available for downloading are stored in a file named `urls_list.txt`. `sda-cli` allows to download files only by using such a file or the URL where it is stored. There are three different ways to pass the location of the file to the tool, similar to the [dataset size section](#get-dataset-size): 1. a direct URL to `urls_list.txt` or a file with a different name but containing the locations of the dataset files @@ -204,6 +209,19 @@ The tool also allows for selecting a folder where the files will be downloaded, ``` **Note**: If needed, the user can download a selection of files from an available dataset by providing a customized `urls_list.txt` file. +### Download using the download API + +The download API allows for downloading files from the archive and it requires the user to have access to the dataset, therefore a [configuration file](#download-the-configuration-file) needs to be downloaded before starting the downloading of the files. +For downloading files the user also needs to know the download service URL, the dataset ID and the path of the file. Given those four arguments files can be downloaded using the following command: +```bash +./sda-cli sda-download -config -dataset -url ... +``` +where `` the file downloaded in the [previous step](#download-the-configuration-file), `` the ID of the dataset and `` the path of the file in the dataset. +The tool also allows for downloading multiple files at once, by listing their filepaths separated with space and it also allows for selecting a folder where the files will be downloaded, using the `outdir` argument: +```bash +./sda-cli sda-download -config -dataset -url -outdir ... +``` + ## Decrypt file Given that the instructions in the [download section](#download) have been followed, the key pair and the data files should be stored in some location. The last step is to decrypt the files in order to access their content. That can be achieved using the following command: diff --git a/go.mod b/go.mod index e026a784..0f15d373 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NBISweden/sda-cli go 1.22.3 require ( - github.com/aws/aws-sdk-go v1.53.10 + github.com/aws/aws-sdk-go v1.54.6 github.com/manifoldco/promptui v0.9.0 github.com/neicnordic/crypt4gh v1.12.0 github.com/sirupsen/logrus v1.9.3 diff --git a/go.sum b/go.sum index b08cf6ea..ba652801 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,6 @@ golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20190308174544-00c44ba9c14f/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4= golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= diff --git a/main.go b/main.go index dd80a10e..cf89267e 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ import ( "github.com/NBISweden/sda-cli/htsget" "github.com/NBISweden/sda-cli/list" "github.com/NBISweden/sda-cli/login" + sdaDownload "github.com/NBISweden/sda-cli/sda_download" "github.com/NBISweden/sda-cli/upload" "github.com/NBISweden/sda-cli/version" log "github.com/sirupsen/logrus" @@ -44,6 +45,7 @@ var Commands = map[string]commandInfo{ "list": {list.Args, list.Usage, list.ArgHelp}, "htsget": {htsget.Args, htsget.Usage, htsget.ArgHelp}, "login": {login.Args, login.Usage, login.ArgHelp}, + "sda-download": {sdaDownload.Args, sdaDownload.Usage, sdaDownload.ArgHelp}, "version": {version.Args, version.Usage, version.ArgHelp}, } @@ -74,6 +76,8 @@ func main() { err = htsget.Htsget(args) case "login": err = login.NewLogin(args) + case "sda-download": + err = sdaDownload.SdaDownload(args) case "version": err = version.Version(Version) default: diff --git a/sda_download/sda_download.go b/sda_download/sda_download.go new file mode 100644 index 00000000..19dc29f3 --- /dev/null +++ b/sda_download/sda_download.go @@ -0,0 +1,262 @@ +package sdadownload + +import ( + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "net/http" + "net/mail" + "net/url" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/NBISweden/sda-cli/helpers" + "github.com/vbauerster/mpb/v8" + "github.com/vbauerster/mpb/v8/decor" +) + +// Help text and command line flags. + +// Usage text that will be displayed as command line help text when using the +// `help download` command +var Usage = ` +USAGE: %s sda-download -config -dataset -url (-outdir ) [filepath(s)] + +sda-download: + Downloads files from the Sensitive Data Archive (SDA) by using APIs from the given url. The user + must have been granted access to the datasets (visas) that are to be downloaded. + The files will be downloaded in the current directory, if outdir is not defined. +` + +// ArgHelp is the suffix text that will be displayed after the argument list in +// the module help +var ArgHelp = ` + [dataset] + The ID of the dataset that the file is part of. + [uri] + All flagless arguments will be used as sda-download uri. + [filepath(s)] + The filepath of the file to download.` + +// Args is a flagset that needs to be exported so that it can be written to the +// main program help +var Args = flag.NewFlagSet("sda-download", flag.ExitOnError) + +var configPath = Args.String("config", "", "S3 config file to use for downloading.") + +var datasetID = Args.String("dataset", "", "Dataset ID for the file to download") + +var URL = Args.String("url", "", "The url of the sda-download server") + +var outDir = Args.String("outdir", "", "Directory for downloaded files.") + +// necessary for mocking in testing +var getResponseBody = getBody + +// File struct represents the file metadata +type File struct { + FileID string `json:"fileId"` + DatasetID string `json:"datasetId"` + DisplayFileName string `json:"displayFileName"` + FilePath string `json:"filePath"` + FileName string `json:"fileName"` + FileSize int `json:"fileSize"` + DecryptedFileSize int `json:"decryptedFileSize"` + DecryptedFileChecksum string `json:"decryptedFileChecksum"` + DecryptedFileChecksumType string `json:"decryptedFileChecksumType"` + FileStatus string `json:"fileStatus"` + CreatedAt string `json:"createdAt"` + LastModified string `json:"lastModified"` +} + +// SdaDownload function downloads files from the SDA by using the +// download's service APIs +func SdaDownload(args []string) error { + var files []string + // Call ParseArgs to take care of all the flag parsing + err := helpers.ParseArgs(args, Args) + if err != nil { + return fmt.Errorf("failed parsing arguments, reason: %v", err) + } + + if *datasetID == "" || *URL == "" || *configPath == "" { + return fmt.Errorf("missing required arguments, dataset, config and url are required") + } + + // Check that input file/folder list is not empty + if len(Args.Args()) == 0 { + return errors.New("no files to download") + } + + files = append(files, Args.Args()...) + + // Get the configuration file or the .sda-cli-session + config, err := helpers.GetAuth(*configPath) + if err != nil { + return err + } + + // Check if the token has expired + err = helpers.CheckTokenExpiration(config.AccessToken) + if err != nil { + return err + } + + // Loop through the files and download them + for _, filePath := range files { + fileIDURL, err := getFileIDURL(*URL, config.AccessToken, *datasetID, filePath) + if err != nil { + return err + } + + // Check if the file path contains a userID and if it does, + // do not keep it in the file path + filePathSplit := strings.Split(filePath, "/") + if strings.Contains(filePathSplit[0], "_") { + _, err := mail.ParseAddress(strings.ReplaceAll(filePathSplit[0], "_", "@")) + if err == nil { + filePath = strings.Join(filePathSplit[1:], "/") + } + } + + outFilename := filePath + if *outDir != "" { + outFilename = *outDir + "/" + filePath + } + + err = downloadFile(fileIDURL, config.AccessToken, outFilename) + if err != nil { + return err + } + } + + return nil +} + +// downloadFile downloads the file by using the download URL +func downloadFile(uri, token, filePath string) error { + filePath = strings.TrimSuffix(filePath, ".c4gh") + // Get the file body + body, err := getResponseBody(uri, token) + if err != nil { + return fmt.Errorf("failed to get file for download, reason: %v", err) + } + + // Create the directory if it does not exist + fileDir := filepath.Dir(filePath) + err = os.MkdirAll(fileDir, os.ModePerm) + if err != nil { + return fmt.Errorf("failed to create directory, reason: %v", err) + } + + outfile, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("failed to create file, reason: %v", err) + } + defer outfile.Close() + + // Create a new progress container + p := mpb.New() + + // Create a new progress bar with the length of the body + bar := p.AddBar(int64(len(body)), + mpb.PrependDecorators( + decor.CountersKibiByte("% .2f / % .2f"), + ), + ) + + // Create a proxy reader + reader := strings.NewReader(string(body)) + proxyReader := bar.ProxyReader(reader) + + fmt.Printf("Downloading file to %s\n", filePath) + // Copy from the proxy reader (which updates the progress bar) to the file + _, err = io.Copy(outfile, proxyReader) + if err != nil { + return fmt.Errorf("failed to write file, reason: %v", err) + } + + // Wait for the progress bar to finish + p.Wait() + + return nil +} + +// getFileIDURL gets the datset files, parses the JSON response to get the file ID +// and returns the download URL for the file +func getFileIDURL(baseURL, token, dataset, filename string) (string, error) { + // Sanitize the base_url + u, err := url.ParseRequestURI(baseURL) + if err != nil || u.Scheme == "" { + return "", err + } + + // Make the url for listing files + filesURL := baseURL + "/metadata/datasets/" + dataset + "/files" + + // Get the response body from the files API + body, err := getResponseBody(filesURL, token) + if err != nil { + return "", fmt.Errorf("failed to get files, reason: %v", err) + } + + // Parse the JSON response + var files []File + err = json.Unmarshal(body, &files) + if err != nil { + return "", fmt.Errorf("failed to parse file list JSON, reason: %v", err) + } + + // Get the file ID for the filename + var idx int + switch { + case strings.Contains(filename, "/"): + idx = slices.IndexFunc(files, func(f File) bool { return strings.Contains(f.FilePath, filename) }) + default: + idx = slices.IndexFunc(files, func(f File) bool { return strings.Contains(f.FileID, filename) }) + } + + if idx == -1 { + return "", fmt.Errorf("File not found in dataset %s", filename) + } + + return baseURL + "/files/" + files[idx].FileID, nil +} + +// getBody gets the body of the response from the URL +func getBody(url, token string) ([]byte, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request, reason: %v", err) + } + + // Add headers + req.Header.Add("Authorization", "Bearer "+token) + req.Header.Add("Content-Type", "application/json") + + // Send the request + client := &http.Client{} + res, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to get response, reason: %v", err) + } + + // Check the status code + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("server returned status %d", res.StatusCode) + } + + // Read the response body + resBody, err := io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body, reason: %v", err) + } + + defer res.Body.Close() + + return resBody, nil +} diff --git a/sda_download/sda_download_test.go b/sda_download/sda_download_test.go new file mode 100644 index 00000000..b2645c26 --- /dev/null +++ b/sda_download/sda_download_test.go @@ -0,0 +1,183 @@ +package sdadownload + +import ( + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +type TestSuite struct { + suite.Suite + accessToken string +} + +func createConfigFile(fileName, token string) os.File { + // Create conf file for sda-cli + var confFile = fmt.Sprintf(` + access_token = %[1]s + host_base = inbox.dummy.org + encoding = UTF-8 + host_bucket = inbox.dummy.org + multipart_chunk_size_mb = 50 + secret_key = dummy + access_key = dummy + use_https = False + check_ssl_certificate = False + check_ssl_hostname = False + socket_timeout = 30 + human_readable_sizes = True + guess_mime_type = True + encrypt = False + `, token) + + // Create config file + configPath, err := os.CreateTemp(os.TempDir(), fileName) + if err != nil { + log.Panic(err) + } + + // Write config file + err = os.WriteFile(configPath.Name(), []byte(confFile), 0600) + if err != nil { + log.Printf("failed to write temp config file, %v", err) + } + + return *configPath +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(TestSuite)) +} + +func (suite *TestSuite) SetupTest() { + suite.accessToken = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6ImtleXN0b3JlLUNIQU5HRS1NRSJ9.eyJqdGkiOiJWTWpfNjhhcEMxR2FJbXRZdFExQ0ciLCJzdWIiOiJkdW1teSIsImlzcyI6Imh0dHA6Ly9vaWRjOjkwOTAiLCJpYXQiOjE3MDc3NjMyODksImV4cCI6MTg2NTU0NzkxOSwic2NvcGUiOiJvcGVuaWQgZ2E0Z2hfcGFzc3BvcnRfdjEgcHJvZmlsZSBlbWFpbCIsImF1ZCI6IlhDNTZFTDExeHgifQ.ZFfIAOGeM2I5cvqr1qJV74qU65appYjpNJVWevGHjGA5Xk_qoRMFJXmG6AiQnYdMKnJ58sYGNjWgs2_RGyw5NyM3-pgP7EKHdWU4PrDOU84Kosg4IPMSFxbBRAEjR5X04YX_CLYW2MFk_OyM9TIln522_JBVT_jA5WTTHSmBRHntVArYYHvQdF-oFRiqL8JXWlsUBh3tqQ33sZdqd9g64YhTk9a5lEC42gn5Hg9Hm_qvkl5orzEqIg7x9z5706IBE4Zypco5ohrAKsEbA8EKbEBb0jigGgCslQNde2owUyKIkvZYmxHA78X5xpymMp9K--PgbkyMS9GtA-YwOHPs-w" +} + +func (suite *TestSuite) TestNoFiles() { + + confPath := createConfigFile("s3cmd-download.conf", suite.accessToken) + + os.Args = []string{"sda-download", "-dataset", "TES01", "-config", confPath.Name(), "-url", "https://some/url"} + + err := SdaDownload(os.Args) + assert.EqualError(suite.T(), err, "no files to download") +} + +func (suite *TestSuite) TestInvalidUrl() { + + confPath := createConfigFile("s3cmd.conf", suite.accessToken) + + os.Args = []string{"sda-download", "-dataset", "TES01", "-config", confPath.Name(), "-url", "https://some/url", "file1", "file2"} + + err := SdaDownload(os.Args) + assert.Contains(suite.T(), err.Error(), "failed to get files, reason: failed to get response, reason: Get \"https://some/url/metadata/datasets/TES01/files\": dial tcp: lookup some") +} + +func (suite *TestSuite) TestGetBody() { + // Create a test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + // Set the response status code + w.WriteHeader(http.StatusOK) + // Set the response body + fmt.Fprint(w, "test response") + })) + defer server.Close() + + // Make a request to the test server + body, err := getBody(server.URL, "test-token") + if err != nil { + suite.T().Errorf("getBody returned an error: %v", err) + } + + // Check the response body + expectedBody := "test response" + if string(body) != expectedBody { + suite.T().Errorf("getBody returned incorrect response body, got: %s, want: %s", string(body), expectedBody) + } +} + +func (suite *TestSuite) TestDownloadUrl() { + // Mock getBody function + defer func() { getResponseBody = getBody }() + getResponseBody = func(_, _ string) ([]byte, error) { + return []byte(`[ + { + "fileId": "file1id", + "datasetId": "TES01", + "displayName": "file1", + "filePath": "path/to/file1", + "fileName": "4293c9a7-re60-46ac-b79a-40ddc0ddd1c6" + } + ]`), nil + } + + baseURL := "https://some/url" + token := suite.accessToken + dataset := "test-dataset" + filepath := "path/to/file1" + expectedURL := "https://some/url/files/file1id" + + // Test with valid base_url, token, dataset, and filename + url, err := getFileIDURL(baseURL, token, dataset, filepath) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), expectedURL, url) + + // Test with url as dataset + dataset = "https://doi.example/another/url/001" + _, err = getFileIDURL(baseURL, token, dataset, filepath) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), expectedURL, url) + + // Test with filename not in response + filepath = "path/to/file2" + _, err = getFileIDURL(baseURL, token, dataset, filepath) + assert.Error(suite.T(), err) + + // Test with fileID + filepath = "file1id" + _, err = getFileIDURL(baseURL, token, dataset, filepath) + assert.NoError(suite.T(), err) + + // Testr with bad URL + _, err = getFileIDURL("some/url", token, dataset, filepath) + assert.Error(suite.T(), err) +} + +func (suite *TestSuite) TestDownloadFile() { + // Create a temporary directory for testing + tempDir := suite.T().TempDir() + + // Create a temporary file for testing + tempFile := filepath.Join(tempDir, "dummy-file.txt") + err := os.WriteFile(tempFile, []byte("test content"), 0600) + require.NoError(suite.T(), err) + + // Create a test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + // Set the response status code + w.WriteHeader(http.StatusOK) + // Set the response body + fmt.Fprint(w, "dummy response") + })) + defer server.Close() + + // Call the downloadFile function + err = downloadFile(server.URL, "test-token", tempFile) + require.NoError(suite.T(), err) + + // Read the downloaded file + downloadedContent, err := os.ReadFile(tempFile) + require.NoError(suite.T(), err) + + // Check if the downloaded content matches the expected content + expectedContent := "dummy response" + assert.Equal(suite.T(), expectedContent, string(downloadedContent)) +} diff --git a/testing/archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 b/testing/archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 new file mode 100755 index 00000000..7a882b0a Binary files /dev/null and b/testing/archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 differ diff --git a/testing/docker-compose.yml b/testing/docker-compose.yml index 5b65cc27..e304e6e8 100644 --- a/testing/docker-compose.yml +++ b/testing/docker-compose.yml @@ -28,6 +28,7 @@ services: /usr/bin/mc rm -r --force s3/test; /usr/bin/mc mb s3/test; /usr/bin/mc mb s3/download; + /usr/bin/mc mb s3/archive; /usr/bin/mc policy set public s3/download; " rabbitmq: @@ -156,6 +157,70 @@ services: - shared:/shared ports: - 8080:8080 + # mock oidc server for testing the download endpoint + mockauth: + command: + - /bin/sh + - -c + - | + pip install --upgrade pip + pip install aiohttp Authlib + python -u /mockoidc.py + container_name: mockauth + image: python:3.10-slim + volumes: + - ./mockoidc/mockoidc.py:/mockoidc.py + mem_limit: 256m + ports: + - "8002:8000" + restart: always + download: + command: sda-download + container_name: download + depends_on: + postgres: + condition: service_healthy + s3_backend: + condition: service_healthy + mockauth: + condition: service_started + environment: + - ARCHIVE_URL=http://s3 + - ARCHIVE_TYPE=s3 + - ARCHIVE_PORT=9000 + - ARCHIVE_ACCESSKEY=ElixirID + - ARCHIVE_SECRETKEY=987654321 + - ARCHIVE_BUCKET=archive + - DB_HOST=postgres + - DB_PORT=5432 + - DB_USER=postgres + - DB_PASSWORD=rootpasswd + - DB_DATABASE=sda + - DB_SSLMODE=disable + - OIDC_CONFIGURATION_URL=http://mockauth:8000/.well-known/openid-configuration + - GRPC_PORT=50051 + - GRPC_HOST=reencrypt + - APP_SERVEUNENCRYPTEDDATA=true + image: "ghcr.io/neicnordic/sensitive-data-archive:${TAG}-download" + volumes: + - ./archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6:/tmp/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 + mem_limit: 256m + ports: + - "8080:8080" + restart: always + reencrypt: + image: ghcr.io/neicnordic/sensitive-data-archive:${TAG} + command: [ sda-reencrypt ] + container_name: reencrypt + environment: + - LOG_LEVEL=debug + - C4GH_PASSPHRASE=oaagCP1YgAZeEyl2eJAkHv9lkcWXWFgm + - C4GH_FILEPATH=/dev_utils/c4gh.sec.pem + ports: + - "50051:50051" + restart: always + volumes: + - ./:/dev_utils/ volumes: data: dbdata: diff --git a/testing/mockoidc/mockoidc.py b/testing/mockoidc/mockoidc.py new file mode 100644 index 00000000..e0c8f833 --- /dev/null +++ b/testing/mockoidc/mockoidc.py @@ -0,0 +1,303 @@ +"""Mock OAUTH2 aiohttp.web server.""" + +from aiohttp import web +from authlib.jose import jwt, RSAKey +from typing import Tuple, Union +import ssl +from pathlib import Path + +HTTP_PROTOCOL = "http" + + +def _set_ssl() -> Union[ssl.SSLContext, None]: + global HTTP_PROTOCOL + here = Path(__file__) + ssl_cert = here.parent / "certs" / "mockauth.pem" + ssl_key = here.parent / "certs" / "mockauth-key.pem" + ssl_context: Union[ssl.SSLContext, None] + if ssl_key.is_file() and ssl_cert.is_file(): + ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + ssl_context.load_cert_chain(str(ssl_cert), str(ssl_key)) + ssl_context.check_hostname = False + HTTP_PROTOCOL = "https" + else: + ssl_context = None + + return ssl_context + + +def _generate_token() -> Tuple: + """Generate RSA Key pair to be used to sign token and the JWT Token itself.""" + global HTTP_PROTOCOL + + key = RSAKey.generate_key(is_private=True) + # we set no `exp` and other claims as they are optional in a real scenario these should be set + # See available claims here: https://www.iana.org/assignments/jwt/jwt.xhtml + # the important claim is the "authorities" + header = { + "jku": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/keyset", + "kid": "rsa1", + "alg": "RS256", + "typ": "JWT", + } + trusted_payload = { + "sub": "requester@demo.org", + "aud": ["aud2", "aud3"], + "azp": "azp", + "scope": "openid ga4gh_passport_v1", + "iss": "https://demo.example", + "exp": 9999999999, + "iat": 1561621913, + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", + } + untrusted_payload = { + "sub": "requester@demo.org", + "aud": ["aud2", "aud3"], + "azp": "azp", + "scope": "openid ga4gh_passport_v1", + "iss": "https://demo2.example", + "exp": 9999999999, + "iat": 1561621913, + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", + } + empty_payload = { + "sub": "requester@demo.org", + "iss": "https://demo.example", + "exp": 99999999999, + "iat": 1547794655, + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", + } + # Craft passports + passport_terms = { + "iss": "https://demo1.example", + "sub": "requester@demo.org", + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331, + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1", + } + # passport for dataset permissions 1 + passport_dataset1 = { + "iss": "https://demo.example", + "sub": "requester@demo.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://doi.example/ty009.sfrrss/600.45asasga", + "source": "https://doi.example/no_org", + "by": "self", + "asserted": 1568699331, + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55", + } + # passport for dataset permissions 1 + passport_dataset2 = { + "iss": "https://demo2.example", + "sub": "requester@demo.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://doi.example/ty009.sfrrss/600.45asasga", + "source": "https://doi.example/no_org", + "by": "self", + "asserted": 1568699331, + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55", + } + + public_jwk = key.as_dict(is_private=False) + private_jwk = dict(key) + + # token that contains demo dataset and trusted visas + trusted_token = jwt.encode(header, trusted_payload, private_jwk).decode("utf-8") + + # token that contains demo dataset and untrusted visas + untrusted_token = jwt.encode(header, untrusted_payload, private_jwk).decode("utf-8") + + # empty token + empty_userinfo = jwt.encode(header, empty_payload, private_jwk).decode("utf-8") + + # general terms that illustrates another visatype: AcceptedTermsAndPolicies + visa_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode("utf-8") + + # visa that contains demo dataset + visa_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode( + "utf-8" + ) + + # visa that contains demo dataset but issue that is not trusted + visa_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode( + "utf-8" + ) + return ( + public_jwk, + trusted_token, + empty_userinfo, + untrusted_token, + visa_terms_encoded, + visa_dataset1_encoded, + visa_dataset2_encoded, + ) + + +async def fixed_response(request: web.Request) -> web.Response: + global HTTP_PROTOCOL + WELL_KNOWN = { + "issuer": f"{HTTP_PROTOCOL}://mockauth:8000", + "authorization_endpoint": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/authorize", + "registration_endpoint": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/register", + "token_endpoint": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/token", + "userinfo_endpoint": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/userinfo", + "jwks_uri": f"{HTTP_PROTOCOL}://mockauth:8000/idp/profile/oidc/keyset", + "response_types_supported": [ + "code", + "id_token", + "token id_token", + "code id_token", + "code token", + "code token id_token", + ], + "subject_types_supported": ["public", "pairwise"], + "grant_types_supported": [ + "authorization_code", + "implicit", + "refresh_token", + "urn:ietf:params:oauth:grant-type:device_code", + ], + "id_token_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "id_token_encryption_enc_values_supported": ["A128CBC-HS256"], + "id_token_signing_alg_values_supported": [ + "RS256", + "RS384", + "RS512", + "HS256", + "HS384", + "HS512", + "ES256", + ], + "userinfo_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "userinfo_encryption_enc_values_supported": ["A128CBC-HS256"], + "userinfo_signing_alg_values_supported": [ + "RS256", + "RS384", + "RS512", + "HS256", + "HS384", + "HS512", + "ES256", + ], + "request_object_signing_alg_values_supported": [ + "none", + "RS256", + "RS384", + "RS512", + "HS256", + "HS384", + "HS512", + "ES256", + "ES384", + "ES512", + ], + "token_endpoint_auth_methods_supported": [ + "client_secret_basic", + "client_secret_post", + "client_secret_jwt", + "private_key_jwt", + ], + "claims_parameter_supported": True, + "request_parameter_supported": True, + "request_uri_parameter_supported": True, + "require_request_uri_registration": True, + "display_values_supported": ["page"], + "scopes_supported": ["openid"], + "response_modes_supported": ["query", "fragment", "form_post"], + "claims_supported": [ + "aud", + "iss", + "sub", + "iat", + "exp", + "acr", + "auth_time", + "ga4gh_passport_v1", + "remoteUserIdentifier", + ], + } + return web.json_response(WELL_KNOWN) + + +async def jwk_response(request: web.Request) -> web.Response: + """Mock JSON Web Key server.""" + keys = [DATA[0]] + keys[0]["kid"] = "rsa1" + data = {"keys": keys} + return web.json_response(data) + + +async def tokens_response(request: web.Request) -> web.Response: + """Serve generated tokens.""" + # trusted visas, empty token, untrusted visas + data = [DATA[1], DATA[2], DATA[3]] + return web.json_response(data) + + +async def userinfo(request: web.Request) -> web.Response: + """Mock an authentication to ELIXIR AAI for GA4GH claims.""" + _bearer = request.headers.get("Authorization").split(" ")[1] + if _bearer == DATA[2]: + print("empty token requested") + data = {} + return web.json_response(data) + if _bearer == DATA[1]: + print("ga4gh token requested, trusted") + data = {"ga4gh_passport_v1": [DATA[4], DATA[5]]} + return web.json_response(data) + if _bearer == DATA[3]: + print("ga4gh token requested, untrusted") + data = {"ga4gh_passport_v1": [DATA[4], DATA[6]]} + return web.json_response(data) + + +def init() -> web.Application: + """Start server.""" + + app = web.Application() + app.router.add_get("/idp/profile/oidc/keyset", jwk_response) + app.router.add_get("/tokens", tokens_response) + app.router.add_get("/idp/profile/oidc/userinfo", userinfo) + app.router.add_get("/.well-known/openid-configuration", fixed_response) + return app + + +if __name__ == "__main__": + ssl_context = _set_ssl() + DATA = _generate_token() + web.run_app(init(), port=8000, ssl_context=ssl_context) diff --git a/testing/s3cmd.conf b/testing/s3cmd-template.conf similarity index 100% rename from testing/s3cmd.conf rename to testing/s3cmd-template.conf