Skip to content

Commit

Permalink
Merge branch 'main' into feature/htsget
Browse files Browse the repository at this point in the history
  • Loading branch information
pahatz authored Jun 27, 2024
2 parents f9021e9 + 360b5d8 commit ede3255
Show file tree
Hide file tree
Showing 14 changed files with 952 additions and 8 deletions.
93 changes: 93 additions & 0 deletions .github/integration/setup/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,29 @@ if [ ! -f "dummy.ega.nbis.se.pem" ]; then
chmod 644 keys/dummy.ega.nbis.se.pub dummy.ega.nbis.se.pem
fi

cp s3cmd-template.conf s3cmd.conf
output=$(python sign_jwt.py)
echo "access_token=$output" >> s3cmd.conf

# Create crypt4gh keys for testing the download service
cat << EOF > c4gh.pub.pem
-----BEGIN CRYPT4GH PUBLIC KEY-----
avFAerx0ZWuJE6fTI8S/0wv3yMo1n3SuNTV6zvKdxQc=
-----END CRYPT4GH PUBLIC KEY-----
EOF

chmod 444 c4gh.pub.pem

cat << EOF > c4gh.sec.pem
-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY-----
YzRnaC12MQAGc2NyeXB0ABQAAAAAwAs5mVkXda50vqeYv6tbkQARY2hhY2hhMjBf
cG9seTEzMDUAPAd46aTuoVWAe+fMGl3VocCKCCWmgFUsFIHejJoWxNwy62c1L/Vc
R9haQsAPfJMLJSvUXStJ04cyZnDHSw==
-----END CRYPT4GH ENCRYPTED PRIVATE KEY-----
EOF

chmod 444 c4gh.sec.pem

# get latest image tag for s3inbox
latest_tag=$(curl -s https://api.github.com/repos/neicnordic/sensitive-data-archive/tags | jq -r '.[0].name')

Expand Down Expand Up @@ -66,4 +86,77 @@ do echo "waiting for buckets to be created"
sleep 10
done

# Populate database with for testing the download service
# Insert entry in sda.files
file_id=$(docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.files (stable_id, submission_user, \
submission_file_path, submission_file_size, archive_file_path, \
archive_file_size, decrypted_file_size, backup_path, header, \
encryption_method) VALUES ('urn:neic:001-002', 'integration-test', '5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8_elixir-europe.org/main/subfolder/dummy_data.c4gh', \
1048729, '4293c9a7-dc50-46db-b79a-27ddc0dad1c6', 1049081, 1048605, \
'', '637279707434676801000000010000006c000000000000006af1407abc74656b8913a7d323c4bfd30bf7c8ca359f74ae35357acef29dc5073799e207ec5d022b2601340585ff082565e55fbff5b6cdbbbe6b12a0d0a19ef325a219f8b62344325e22c8d26a8e82e45f053f4dcee10c0ec4bb9e466d5253f139dcd4be', 'CRYPT4GH') RETURNING id;" | xargs)

if [ -z "$file_id" ]; then
echo "Failed to insert file entry into database"
exit 1
fi

# Insert entry in sda.file_event_log
docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.file_event_log (file_id, event) \
VALUES ('$file_id', 'ready');"

# Insert entries in sda.checksums
docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \
VALUES ('$file_id', '06bb0a514b26497b4b41b30c547ad51d059d57fb7523eb3763cfc82fdb4d8fb7', 'SHA256', 'UNENCRYPTED');"

docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \
VALUES ('$file_id', '5e9c767958cc3f6e8d16512b8b8dcab855ad1e04e05798b86f50ef600e137578', 'SHA256', 'UPLOADED');"

docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.checksums (file_id, checksum, type, source) \
VALUES ('$file_id', '74820dbcf9d30f8ccd1ea59c17d5ec8a714aabc065ae04e46ad82fcf300a731e', 'SHA256', 'ARCHIVED');"

# Insert dataset in sda.datasets
dataset_id=$(docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.datasets (stable_id) VALUES ('https://doi.example/ty009.sfrrss/600.45asasga') \
ON CONFLICT (stable_id) DO UPDATE \
SET stable_id=excluded.stable_id RETURNING id;")

if [ -z "$dataset_id" ]; then
echo "Failed to insert dataset entry into database"
exit 1
fi

# Add file to dataset
docker run --rm --name client --network testing_default \
neicnordic/pg-client:latest \
postgresql://postgres:rootpasswd@postgres:5432/sda \
-t -q -c "INSERT INTO sda.file_dataset (file_id, dataset_id) \
VALUES ('$file_id', $dataset_id);"

# Add file to archive
s3cmd -c directS3 put archive_data/4293c9a7-dc50-46db-b79a-27ddc0dad1c6 s3://archive/4293c9a7-dc50-46db-b79a-27ddc0dad1c6

# Get the correct token form mockoidc
token=$(curl "http://localhost:8002/tokens" | jq -r '.[0]')

# Create s3cmd-download.conf file for download
cp s3cmd-template.conf s3cmd-download.conf
echo "access_token=$token" >> s3cmd-download.conf

docker ps
18 changes: 18 additions & 0 deletions .github/integration/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,22 @@ fi

rm -r downloads

# Download file by using the sda download service
./sda-cli sda-download -config testing/s3cmd-download.conf -dataset https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir test-download main/subfolder/dummy_data.c4gh

# check if file exists in the path
if [ ! -f "test-download/main/subfolder/dummy_data" ]; then
echo "Downloaded file not found"
exit 1
fi

# check the first line of that file
first_line=$(head -n 1 test-download/main/subfolder/dummy_data)
if [[ $first_line != *"THIS FILE IS JUST DUMMY DATA"* ]]; then
echo "First line does not contain the expected string"
exit 1
fi

rm -r test-download

echo "Integration test finished successfully"
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
fi
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v5.1.0
uses: goreleaser/goreleaser-action@v6.0.0
with:
version: latest
args: release --rm-dist
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
run: go test -v -coverprofile=coverage -covermode=atomic ./...

- name: Codecov
uses: codecov/codecov-action@v4.4.1
uses: codecov/codecov-action@v4.5.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage
Expand Down
24 changes: 21 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,14 @@ If no config is given by the user, the tool will look for a previous login from

## Download

The SDA/BP archive enables for downloading files and datasets in a secure manner. That can be achieved using the `sda-cli` tool and the process consists of the following two steps
The SDA/BP archive enables for downloading files and datasets in a secure manner. That can be achieved using the `sda-cli` tool and and it can be done in two ways:
- by downloading from a S3 bucket (`./sda-cli download`)
- by using the download API (`./sda-cli sda-download`)

### Create keys
### Download from S3 bucket
This process consists of the following two steps: create keys and downloading the file. These steps are explained in the following sections.

#### Create keys

In order to make sure that the files are downloaded from the archive in a secure manner, the user is supposed to create the key pair that the files will be encrypted with. The key pair can be created using the following command:
```bash
Expand All @@ -186,7 +191,7 @@ where `<keypair_name>` is the base name of the key files. This command will crea

**NOTE:** Make sure to keep these keys safe. Losing the keys could lead to sensitive data leaks.

### Download file
#### Download file

The `sda-cli` tool allows for downloading file(s)/datasets. The URLs of the respective dataset files that are available for downloading are stored in a file named `urls_list.txt`. `sda-cli` allows to download files only by using such a file or the URL where it is stored. There are three different ways to pass the location of the file to the tool, similar to the [dataset size section](#get-dataset-size):
1. a direct URL to `urls_list.txt` or a file with a different name but containing the locations of the dataset files
Expand All @@ -204,6 +209,19 @@ The tool also allows for selecting a folder where the files will be downloaded,
```
**Note**: If needed, the user can download a selection of files from an available dataset by providing a customized `urls_list.txt` file.

### Download using the download API

The download API allows for downloading files from the archive and it requires the user to have access to the dataset, therefore a [configuration file](#download-the-configuration-file) needs to be downloaded before starting the downloading of the files.
For downloading files the user also needs to know the download service URL, the dataset ID and the path of the file. Given those four arguments files can be downloaded using the following command:
```bash
./sda-cli sda-download -config <configuration_file> -dataset <datasetID> -url <download-service-URL> <filepath_1_to_download> <filepath_2_to_download> ...
```
where `<configuration_file>` the file downloaded in the [previous step](#download-the-configuration-file), `<dataset_id>` the ID of the dataset and `<filepath>` the path of the file in the dataset.
The tool also allows for downloading multiple files at once, by listing their filepaths separated with space and it also allows for selecting a folder where the files will be downloaded, using the `outdir` argument:
```bash
./sda-cli sda-download -config <configuration_file> -dataset <datasetID> -url <download-service-url> -outdir <outdir> <filepath_1_to_download> <filepath_2_to_download> ...
```

## Decrypt file

Given that the instructions in the [download section](#download) have been followed, the key pair and the data files should be stored in some location. The last step is to decrypt the files in order to access their content. That can be achieved using the following command:
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/NBISweden/sda-cli
go 1.22.3

require (
github.com/aws/aws-sdk-go v1.53.10
github.com/aws/aws-sdk-go v1.54.6
github.com/manifoldco/promptui v0.9.0
github.com/neicnordic/crypt4gh v1.12.0
github.com/sirupsen/logrus v1.9.3
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20190308174544-00c44ba9c14f/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4=
golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
Expand Down
4 changes: 4 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/NBISweden/sda-cli/htsget"
"github.com/NBISweden/sda-cli/list"
"github.com/NBISweden/sda-cli/login"
sdaDownload "github.com/NBISweden/sda-cli/sda_download"
"github.com/NBISweden/sda-cli/upload"
"github.com/NBISweden/sda-cli/version"
log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -44,6 +45,7 @@ var Commands = map[string]commandInfo{
"list": {list.Args, list.Usage, list.ArgHelp},
"htsget": {htsget.Args, htsget.Usage, htsget.ArgHelp},
"login": {login.Args, login.Usage, login.ArgHelp},
"sda-download": {sdaDownload.Args, sdaDownload.Usage, sdaDownload.ArgHelp},
"version": {version.Args, version.Usage, version.ArgHelp},
}

Expand Down Expand Up @@ -74,6 +76,8 @@ func main() {
err = htsget.Htsget(args)
case "login":
err = login.NewLogin(args)
case "sda-download":
err = sdaDownload.SdaDownload(args)
case "version":
err = version.Version(Version)
default:
Expand Down
Loading

0 comments on commit ede3255

Please sign in to comment.