diff --git a/cmd/aws/main.go b/cmd/aws/main.go index fc2ad215..b29e2fb8 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -31,9 +31,9 @@ import ( tesseract "github.com/transparency-dev/static-ct" "github.com/transparency-dev/static-ct/storage" "github.com/transparency-dev/static-ct/storage/aws" - "github.com/transparency-dev/static-ct/storage/bbolt" tessera "github.com/transparency-dev/trillian-tessera" taws "github.com/transparency-dev/trillian-tessera/storage/aws" + aws_as "github.com/transparency-dev/trillian-tessera/storage/aws/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -54,13 +54,14 @@ var ( origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.") bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") dbName = flag.String("db_name", "", "AuroraDB name") + antispamDBName = flag.String("antispam_db_name", "", "AuroraDB antispam name") dbHost = flag.String("db_host", "", "AuroraDB host") dbPort = flag.Int("db_port", 3306, "AuroraDB port") dbUser = flag.String("db_user", "", "AuroraDB user") dbPassword = flag.String("db_password", "", "AuroraDB password") dbMaxConns = flag.Int("db_max_conns", 0, "Maximum connections to the database, defaults to 0, i.e unlimited") dbMaxIdle = flag.Int("db_max_idle_conns", 2, "Maximum idle database connections in the connection pool, defaults to 2") - dedupPath = flag.String("dedup_path", "", "Path to the deduplication database.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 256<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -147,9 +148,19 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, if err != nil { return nil, fmt.Errorf("failed to initialize AWS Tessera storage driver: %v", err) } - appender, _, _, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). + + var antispam tessera.Antispam + if *antispamDBName != "" { + antispam, err = aws_as.NewAntispam(ctx, antispamMySQLConfig().FormatDSN(), aws_as.AntispamOpts{}) + if err != nil { + klog.Exitf("Failed to create new AWS antispam storage: %v", err) + } + } + + appender, _, reader, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout()) + WithCTLayout(). + WithAntispam(*inMemoryAntispamCacheSize, antispam)) if err != nil { return nil, fmt.Errorf("failed to initialize AWS Tessera storage: %v", err) } @@ -159,12 +170,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize AWS issuer storage: %v", err) } - beDedupStorage, err := bbolt.NewStorage(*dedupPath) - if err != nil { - return nil, fmt.Errorf("failed to initialize BBolt deduplication database: %v", err) - } - - return storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + return storage.NewCTStorage(ctx, appender, issuerStorage, reader) } type timestampFlag struct { @@ -208,7 +214,7 @@ func storageConfigFromFlags() taws.Config { if *dbUser == "" { klog.Exit("--db_user must be set") } - // Empty passord isn't an option with AuroraDB MySQL. + // Empty password isn't an option with AuroraDB MySQL. if *dbPassword == "" { klog.Exit("--db_password must be set") } @@ -230,3 +236,32 @@ func storageConfigFromFlags() taws.Config { MaxIdleConns: *dbMaxIdle, } } + +func antispamMySQLConfig() *mysql.Config { + if *antispamDBName == "" { + klog.Exit("--antispam_db_name must be set") + } + if *dbHost == "" { + klog.Exit("--db_host must be set") + } + if *dbPort == 0 { + klog.Exit("--db_port must be set") + } + if *dbUser == "" { + klog.Exit("--db_user must be set") + } + // Empty passord isn't an option with AuroraDB MySQL. + if *dbPassword == "" { + klog.Exit("--db_password must be set") + } + + return &mysql.Config{ + User: *dbUser, + Passwd: *dbPassword, + Net: "tcp", + Addr: fmt.Sprintf("%s:%d", *dbHost, *dbPort), + DBName: *antispamDBName, + AllowCleartextPasswords: true, + AllowNativePasswords: true, + } +} diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index 8dc1c674..f53cb7c2 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -33,6 +33,7 @@ import ( "github.com/transparency-dev/static-ct/storage/gcp" tessera "github.com/transparency-dev/trillian-tessera" tgcp "github.com/transparency-dev/trillian-tessera/storage/gcp" + gcp_as "github.com/transparency-dev/trillian-tessera/storage/gcp/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -53,7 +54,8 @@ var ( origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.") bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") spannerDB = flag.String("spanner_db_path", "", "Spanner database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") - spannerDedupDB = flag.String("spanner_dedup_db_path", "", "Spanner deduplication database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") + spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 256<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -157,13 +159,22 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP Tessera storage driver: %v", err) } + var antispam tessera.Antispam + if *spannerAntispamDB != "" { + antispam, err = gcp_as.NewAntispam(ctx, *spannerAntispamDB, gcp_as.AntispamOpts{}) + if err != nil { + klog.Exitf("Failed to create new GCP antispam storage: %v", err) + } + } + opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout() + WithCTLayout(). + WithAntispam(*inMemoryAntispamCacheSize, antispam) // TODO(phbnf): figure out the best way to thread the `shutdown` func NewAppends returns back out to main so we can cleanly close Tessera down // when it's time to exit. - appender, _, _, err := tessera.NewAppender(ctx, driver, opts) + appender, _, reader, err := tessera.NewAppender(ctx, driver, opts) if err != nil { return nil, fmt.Errorf("failed to initialize GCP Tessera appender: %v", err) } @@ -173,12 +184,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP issuer storage: %v", err) } - beDedupStorage, err := gcp.NewDedupeStorage(ctx, *spannerDedupDB) - if err != nil { - return nil, fmt.Errorf("failed to initialize GCP Spanner deduplication database: %v", err) - } - - return storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + return storage.NewCTStorage(ctx, appender, issuerStorage, reader) } type timestampFlag struct { diff --git a/deployment/live/aws/conformance/ci/.terraform.lock.hcl b/deployment/live/aws/conformance/ci/.terraform.lock.hcl new file mode 100644 index 00000000..a9e266b5 --- /dev/null +++ b/deployment/live/aws/conformance/ci/.terraform.lock.hcl @@ -0,0 +1,66 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.92.0" + constraints = "5.92.0" + hashes = [ + "h1:ZnpTxMfg5PThZc5WZCsZELinsR0gPhdTpNmXjVcf7aE=", + "zh:1d3a0b40831360e8e988aee74a9ff3d69d95cb541c2eae5cb843c64303a091ba", + "zh:3d29cbced6c708be2041a708d25c7c0fc22d09e4d0b174360ed113bfae786137", + "zh:4341a203cf5820a0ca18bb514ae10a6c113bc6a728fb432acbf817d232e8eff4", + "zh:4a49e2d91e4d92b6b93ccbcbdcfa2d67935ce62e33b939656766bb81b3fd9a2c", + "zh:54c7189358b37fd895dedbabf84e509c1980a8c404a1ee5b29b06e40497b8655", + "zh:5d8bb1ff089c37cb65c83b4647f1981fded993e87d8132915d92d79f29e2fcd8", + "zh:618f2eb87cd65b245aefba03991ad714a51ff3b841016ef68e2da2b85d0b2325", + "zh:7bce07bc542d0588ca42bac5098dd4f8af715417cd30166b4fb97cedd44ab109", + "zh:81419eab2d8810beb114b1ff5cbb592d21edc21b809dc12bb066e4b88fdd184a", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9dea39d4748eeeebe2e76ca59bca4ccd161c2687050878c47289a98407a23372", + "zh:d692fc33b67ac89e916c8f9233d39eacab8c438fe10172990ee9d94fba5ca372", + "zh:d9075c7da48947c029ba47d5985e1e8e3bf92367bfee8ca1ff0e747765e779a1", + "zh:e81c62db317f3b640b2e04eba0ada8aa606bcbae0152c09f6242e86b86ef5889", + "zh:f68562e073722c378d2f3529eb80ad463f12c44aa5523d558ae3b69f4de5ca1f", + ] +} + +provider "registry.terraform.io/hashicorp/tls" { + version = "4.0.6" + hashes = [ + "h1:dYSb3V94K5dDMtrBRLPzBpkMTPn+3cXZ/kIJdtFL+2M=", + "zh:10de0d8af02f2e578101688fd334da3849f56ea91b0d9bd5b1f7a243417fdda8", + "zh:37fc01f8b2bc9d5b055dc3e78bfd1beb7c42cfb776a4c81106e19c8911366297", + "zh:4578ca03d1dd0b7f572d96bd03f744be24c726bfd282173d54b100fd221608bb", + "zh:6c475491d1250050765a91a493ef330adc24689e8837a0f07da5a0e1269e11c1", + "zh:81bde94d53cdababa5b376bbc6947668be4c45ab655de7aa2e8e4736dfd52509", + "zh:abdce260840b7b050c4e401d4f75c7a199fafe58a8b213947a258f75ac18b3e8", + "zh:b754cebfc5184873840f16a642a7c9ef78c34dc246a8ae29e056c79939963c7a", + "zh:c928b66086078f9917aef0eec15982f2e337914c5c4dbc31dd4741403db7eb18", + "zh:cded27bee5f24de6f2ee0cfd1df46a7f88e84aaffc2ecbf3ff7094160f193d50", + "zh:d65eb3867e8f69aaf1b8bb53bd637c99c6b649ba3db16ded50fa9a01076d1a27", + "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/petoju/mysql" { + version = "3.0.71" + constraints = "3.0.71" + hashes = [ + "h1:Xwam63rfo59cWPri28xmAbuqrFsxHwAE1P7ogpMUdSU=", + "zh:15200cd22d5a03fafb41cc2ffc654ab58cbcea80920e280ce08b46b3a8378813", + "zh:16e2cc6d38d5be31c59412ced68b451129f3d048d4debb3afaf5603befdfbf50", + "zh:31e9f6cc515378f3bec1fca57892b7e3224442801ac95c18ba83148e793dbc8e", + "zh:3c456ab07b34c8b472b050c3ef333b469164a85bb79c0e5a87ac0dc7aa45eb22", + "zh:538ffa7821989fabd763227fb85aebbd85df57dba9fffc2e060306761b320110", + "zh:69cdda5e80dfb4ca71ef44d719abdee9c9250153ac2269b3a3d17a679f3fb858", + "zh:87eb21da93c9ac51f74c5e8cf53690e51fc0561cfbe14d953c342eb583f31d22", + "zh:8db301a9ae9fc6374fb5cc42bbf81c0f0ea1e7f46f447de56dd752dcecb3c43f", + "zh:a1878a45070ec92659a39e265848fcbdd0f1423dc8eb8cc764c690f73c4430a5", + "zh:a9c9029784f7273d9c153d0a4291b493d68049193068bef4f0374dcaf379e67c", + "zh:ac91bc2538d87b4cf703d2bdae2ceab8567235a9d29c0de8b74bcde0eb828f75", + "zh:b95a21d4857fe84a7e8edc3ff5a828c37a9a99eb1e116045603a0bf00b1dcb89", + "zh:bf38af7e3cde5a717fa67e1e7e936550cb6d3197774e575f13ef974ec92061d2", + "zh:e3d481f0c7a47744c40dabe731082f1730d7ae55417257f4f49404b1a93701b8", + ] +} diff --git a/deployment/live/aws/test/.terraform.lock.hcl b/deployment/live/aws/test/.terraform.lock.hcl index fa046975..a9e266b5 100644 --- a/deployment/live/aws/test/.terraform.lock.hcl +++ b/deployment/live/aws/test/.terraform.lock.hcl @@ -41,4 +41,26 @@ provider "registry.terraform.io/hashicorp/tls" { "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", ] -} \ No newline at end of file +} + +provider "registry.terraform.io/petoju/mysql" { + version = "3.0.71" + constraints = "3.0.71" + hashes = [ + "h1:Xwam63rfo59cWPri28xmAbuqrFsxHwAE1P7ogpMUdSU=", + "zh:15200cd22d5a03fafb41cc2ffc654ab58cbcea80920e280ce08b46b3a8378813", + "zh:16e2cc6d38d5be31c59412ced68b451129f3d048d4debb3afaf5603befdfbf50", + "zh:31e9f6cc515378f3bec1fca57892b7e3224442801ac95c18ba83148e793dbc8e", + "zh:3c456ab07b34c8b472b050c3ef333b469164a85bb79c0e5a87ac0dc7aa45eb22", + "zh:538ffa7821989fabd763227fb85aebbd85df57dba9fffc2e060306761b320110", + "zh:69cdda5e80dfb4ca71ef44d719abdee9c9250153ac2269b3a3d17a679f3fb858", + "zh:87eb21da93c9ac51f74c5e8cf53690e51fc0561cfbe14d953c342eb583f31d22", + "zh:8db301a9ae9fc6374fb5cc42bbf81c0f0ea1e7f46f447de56dd752dcecb3c43f", + "zh:a1878a45070ec92659a39e265848fcbdd0f1423dc8eb8cc764c690f73c4430a5", + "zh:a9c9029784f7273d9c153d0a4291b493d68049193068bef4f0374dcaf379e67c", + "zh:ac91bc2538d87b4cf703d2bdae2ceab8567235a9d29c0de8b74bcde0eb828f75", + "zh:b95a21d4857fe84a7e8edc3ff5a828c37a9a99eb1e116045603a0bf00b1dcb89", + "zh:bf38af7e3cde5a717fa67e1e7e936550cb6d3197774e575f13ef974ec92061d2", + "zh:e3d481f0c7a47744c40dabe731082f1730d7ae55417257f4f49404b1a93701b8", + ] +} diff --git a/deployment/live/aws/test/README.md b/deployment/live/aws/test/README.md index 0ea6ab90..8f2e9fba 100644 --- a/deployment/live/aws/test/README.md +++ b/deployment/live/aws/test/README.md @@ -66,14 +66,18 @@ export AWS_PROFILE=AdministratorAccess- Terraforming the account can be done by: 1. `cd` to [/deployment/live/aws/test/](/deployment/live/aws/test/) to deploy/change. - 2. Run `terragrunt apply`. - + 2. Run `terragrunt apply`. If this fails to create the antispam database, + connect the RDS instance to your VM using the instructions below, and run + `terragrunt apply` again. + Store the Aurora RDS database and S3 bucket information into the environment variables: ```sh export TESSERACT_DB_HOST=$(terragrunt output -raw rds_aurora_cluster_endpoint) export TESSERACT_DB_PASSWORD=$(aws secretsmanager get-secret-value --secret-id $(terragrunt output -json rds_aurora_cluster_master_user_secret | jq --raw-output .[0].secret_arn) --query SecretString --output text | jq --raw-output .password) export TESSERACT_BUCKET_NAME=$(terragrunt output -raw s3_bucket_name) +export SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID=$(terragrunt output -raw ecdsa_p256_public_key_id) +export SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID=$(terragrunt output -raw ecdsa_p256_private_key_id) ``` Connect the VM and Aurora database following [these instructions](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/tutorial-ec2-rds-option1.html#option1-task3-connect-ec2-instance-to-rds-database), it takes a few clicks in the UI. @@ -95,7 +99,9 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --dedup_path=test-static-ct + --antispam_db_name=antispam_db \ + --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ + --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} ``` In a different terminal you can either mint and submit certificates manually, or @@ -187,7 +193,9 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --dedup_path=test-static-ct + --antispam_db_name=antispam_db \ + --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ + --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} -v=3 ``` diff --git a/deployment/live/gcp/static-ct/logs/ci/README.md b/deployment/live/gcp/static-ct/logs/ci/README.md index acf5a4e5..86306162 100644 --- a/deployment/live/gcp/static-ct/logs/ci/README.md +++ b/deployment/live/gcp/static-ct/logs/ci/README.md @@ -8,7 +8,7 @@ define a CI environment to run the SCTFE on Cloud Run, backed by Trillian Tesser At a high level, this environment consists of: - One Spanner instance with two databases: - one for Tessera - - one for deduplication + - one for antispam - A GCS Bucket - Secret Manager - Cloud Run diff --git a/deployment/live/gcp/test/README.md b/deployment/live/gcp/test/README.md index d330cbb0..2dc8e255 100644 --- a/deployment/live/gcp/test/README.md +++ b/deployment/live/gcp/test/README.md @@ -14,7 +14,7 @@ define a test environment to run the SCTFE, backed by Trillian Tessera. At a high level, this environment consists of: - One Spanner instance with two databases: - one for Tessera - - one for deduplication + - one for antispam - A GCS Bucket - Secret Manager @@ -56,7 +56,7 @@ On the VM, run the following command to bring up the SCTFE: go run ./cmd/gcp/ \ --bucket=${GOOGLE_PROJECT}-${TESSERA_BASE_NAME}-bucket \ --spanner_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-db \ - --spanner_dedup_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-dedup-db \ + --spanner_antispam_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-antispam-db \ --roots_pem_file=./internal/testdata/fake-ca.cert \ --origin=${TESSERA_BASE_NAME} \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ @@ -147,7 +147,7 @@ go run ./cmd/gcp/ \ --spanner_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-db \ --roots_pem_file=/tmp/hammercfg/roots.pem \ --origin=${TESSERA_BASE_NAME} \ - --spanner_dedup_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-dedup-db \ + --spanner_antispam_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-antispam-db \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} \ -v=3 diff --git a/deployment/modules/aws/storage/main.tf b/deployment/modules/aws/storage/main.tf index 3e5af201..d585a5f2 100644 --- a/deployment/modules/aws/storage/main.tf +++ b/deployment/modules/aws/storage/main.tf @@ -4,6 +4,10 @@ terraform { source = "hashicorp/aws" version = "5.92.0" } + mysql = { + source = "petoju/mysql" + version = "3.0.71" + } } } @@ -50,3 +54,20 @@ data "aws_secretsmanager_secret_version" "db_credentials" { aws_rds_cluster_instance.cluster_instances ] } + +# Configure the MySQL provider based on the outcome of +# creating the aws_db_instance. +# This requires that the machine running terraform has access +# to the DB instance created above. This is _NOT_ the case when +# GitHub actions are applying the terraform. +provider "mysql" { + endpoint = aws_rds_cluster_instance.cluster_instances[0].endpoint + username = aws_rds_cluster.log_rds_cluster.master_username + password = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["password"] +} + +# Create a second database for antispam. +resource "mysql_database" "antispam_db" { + name = "antispam_db" + count = var.create_antispam_db ? 1 : 0 +} diff --git a/deployment/modules/aws/storage/outputs.tf b/deployment/modules/aws/storage/outputs.tf index 1b05d0de..b9c13ddc 100644 --- a/deployment/modules/aws/storage/outputs.tf +++ b/deployment/modules/aws/storage/outputs.tf @@ -33,3 +33,7 @@ output "rds_aurora_cluster_master_user_secret_unsafe" { value = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["password"] sensitive = true # Mark as sensitive, but it can still be exposed } + +output "antispam_database_name" { + value = mysql_database.antispam_db[0].name +} diff --git a/deployment/modules/aws/storage/variables.tf b/deployment/modules/aws/storage/variables.tf index 530c1312..cb59fccf 100644 --- a/deployment/modules/aws/storage/variables.tf +++ b/deployment/modules/aws/storage/variables.tf @@ -13,6 +13,12 @@ variable "region" { type = string } +variable "create_antispam_db" { + description = "Set to true to create another database to be used by the antispam implementation." + type = bool + default = false +} + variable "ephemeral" { description = "Set to true if this is a throwaway/temporary log instance. Will set attributes on created resources to allow them to be disabled/deleted more easily." type = bool diff --git a/deployment/modules/aws/tesseract/conformance/main.tf b/deployment/modules/aws/tesseract/conformance/main.tf index 2a95de6b..57f5cff3 100644 --- a/deployment/modules/aws/tesseract/conformance/main.tf +++ b/deployment/modules/aws/tesseract/conformance/main.tf @@ -16,10 +16,11 @@ locals { module "storage" { source = "../../storage" - prefix_name = var.prefix_name - base_name = var.base_name - region = var.region - ephemeral = var.ephemeral + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = var.ephemeral + create_antispam_db = true } module "secretsmanager" { @@ -168,9 +169,10 @@ resource "aws_ecs_task_definition" "conformance" { "--db_name=tesseract", "--db_host=${module.storage.rds_aurora_cluster_endpoint}", "--db_port=3306", - "--dedup_path=ci-static-ct", "--signer_public_key_secret_name=${module.secretsmanager.ecdsa_p256_public_key_id}", "--signer_private_key_secret_name=${module.secretsmanager.ecdsa_p256_private_key_id}", + "--antispam_db_name=${module.storage.antispam_database_name}", + "--inmemory_antispam_cache_size=25000000", # About 1GB of memory. "-v=2" ], "logConfiguration" : { diff --git a/deployment/modules/aws/tesseract/test/main.tf b/deployment/modules/aws/tesseract/test/main.tf index a479898a..4c1748e9 100644 --- a/deployment/modules/aws/tesseract/test/main.tf +++ b/deployment/modules/aws/tesseract/test/main.tf @@ -5,10 +5,11 @@ terraform { module "storage" { source = "../../storage" - prefix_name = var.prefix_name - base_name = var.base_name - region = var.region - ephemeral = var.ephemeral + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = var.ephemeral + create_antispam_db = true } module "secretsmanager" { diff --git a/deployment/modules/gcp/cloudrun/main.tf b/deployment/modules/gcp/cloudrun/main.tf index 1c0d2bed..4b16a1a5 100644 --- a/deployment/modules/gcp/cloudrun/main.tf +++ b/deployment/modules/gcp/cloudrun/main.tf @@ -12,7 +12,7 @@ terraform { locals { cloudrun_service_account_id = var.env == "" ? "cloudrun-sa" : "cloudrun-${var.env}-sa" spanner_log_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.log_spanner_db}" - spanner_dedup_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.dedup_spanner_db}" + spanner_antispam_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.antispam_spanner_db}" } resource "google_project_service" "cloudrun_api" { @@ -44,11 +44,12 @@ resource "google_cloud_run_v2_service" "default" { "--http_endpoint=:6962", "--bucket=${var.bucket}", "--spanner_db_path=${local.spanner_log_db_path}", - "--spanner_dedup_db_path=${local.spanner_dedup_db_path}", + "--spanner_antispam_db_path=${local.spanner_antispam_db_path}", "--roots_pem_file=/bin/test_root_ca_cert.pem", "--origin=${var.base_name}${var.origin_suffix}", "--signer_public_key_secret_name=${var.signer_public_key_secret_name}", "--signer_private_key_secret_name=${var.signer_private_key_secret_name}", + "--inmemory_antispam_cache_size=25000000", # About 1GB of memory. ] ports { container_port = 6962 diff --git a/deployment/modules/gcp/cloudrun/variables.tf b/deployment/modules/gcp/cloudrun/variables.tf index f77b7812..3d8ae310 100644 --- a/deployment/modules/gcp/cloudrun/variables.tf +++ b/deployment/modules/gcp/cloudrun/variables.tf @@ -43,8 +43,8 @@ variable "log_spanner_db" { type = string } -variable "dedup_spanner_db" { - description = "Dedup Spanner database" +variable "antispam_spanner_db" { + description = "Antispam Spanner database" type = string } diff --git a/deployment/modules/gcp/storage/main.tf b/deployment/modules/gcp/storage/main.tf index 6cf259dd..5a9fb644 100644 --- a/deployment/modules/gcp/storage/main.tf +++ b/deployment/modules/gcp/storage/main.tf @@ -64,11 +64,12 @@ resource "google_spanner_database" "log_db" { deletion_protection = !var.ephemeral } -resource "google_spanner_database" "dedup_db" { +resource "google_spanner_database" "antispam_db" { instance = google_spanner_instance.log_spanner.name - name = "${var.base_name}-dedup-db" + name = "${var.base_name}-antispam-db" ddl = [ - "CREATE TABLE IDSeq (id INT64 NOT NULL, h BYTES(MAX) NOT NULL, idx INT64 NOT NULL, timestamp INT64 NOT NULL,) PRIMARY KEY (id, h)", + "CREATE TABLE IF NOT EXISTS FollowCoord (id INT64 NOT NULL, nextIdx INT64 NOT NULL) PRIMARY KEY (id)", + "CREATE TABLE IF NOT EXISTS IDSeq (h BYTES(32) NOT NULL, idx INT64 NOT NULL) PRIMARY KEY (h)", ] deletion_protection = !var.ephemeral diff --git a/deployment/modules/gcp/storage/outputs.tf b/deployment/modules/gcp/storage/outputs.tf index 578b9ffa..bbbd74cc 100644 --- a/deployment/modules/gcp/storage/outputs.tf +++ b/deployment/modules/gcp/storage/outputs.tf @@ -13,7 +13,7 @@ output "log_spanner_db" { value = google_spanner_database.log_db } -output "dedup_spanner_db" { - description = "Dedup Spanner database" - value = google_spanner_database.dedup_db +output "antispam_spanner_db" { + description = "Antispam Spanner database" + value = google_spanner_database.antispam_db } diff --git a/deployment/modules/gcp/tesseract/cloudrun/main.tf b/deployment/modules/gcp/tesseract/cloudrun/main.tf index 42344e3a..786a537f 100644 --- a/deployment/modules/gcp/tesseract/cloudrun/main.tf +++ b/deployment/modules/gcp/tesseract/cloudrun/main.tf @@ -30,7 +30,7 @@ module "cloudrun" { bucket = module.storage.log_bucket.id log_spanner_instance = module.storage.log_spanner_instance.name log_spanner_db = module.storage.log_spanner_db.name - dedup_spanner_db = module.storage.dedup_spanner_db.name + antispam_spanner_db = module.storage.antispam_spanner_db.name signer_public_key_secret_name = module.secretmanager.ecdsa_p256_public_key_id signer_private_key_secret_name = module.secretmanager.ecdsa_p256_private_key_id diff --git a/go.mod b/go.mod index 08a5de5f..8af928a2 100644 --- a/go.mod +++ b/go.mod @@ -67,6 +67,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect + github.com/dgraph-io/badger/v4 v4.7.0 // indirect + github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -76,11 +79,13 @@ require ( github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/flatbuffers v25.2.10+incompatible // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.14.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect diff --git a/go.sum b/go.sum index 790527ed..13d32870 100644 --- a/go.sum +++ b/go.sum @@ -724,8 +724,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/badger/v4 v4.7.0 h1:Q+J8HApYAY7UMpL8d9owqiB+odzEc0zn/aqOD9jhc6Y= +github.com/dgraph-io/badger/v4 v4.7.0/go.mod h1:He7TzG3YBy3j4f5baj5B7Zl2XyfNe5bl4Udl0aPemVA= +github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= +github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -831,6 +837,8 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= +github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -920,6 +928,8 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= diff --git a/internal/ct/ctlog.go b/internal/ct/ctlog.go index c516a22b..51f85f90 100644 --- a/internal/ct/ctlog.go +++ b/internal/ct/ctlog.go @@ -9,9 +9,7 @@ import ( "fmt" "github.com/transparency-dev/static-ct/internal/types/rfc6962" - "github.com/transparency-dev/static-ct/modules/dedup" "github.com/transparency-dev/static-ct/storage" - tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/ctonly" "k8s.io/klog/v2" ) @@ -36,13 +34,9 @@ type signSCT func(leaf *rfc6962.MerkleTreeLeaf) (*rfc6962.SignedCertificateTimes // Storage provides functions to store certificates in a static-ct-api log. type Storage interface { // Add assigns an index to the provided Entry, stages the entry for integration, and returns a future for the assigned index. - Add(context.Context, *ctonly.Entry) tessera.IndexFuture + Add(context.Context, *ctonly.Entry) (idx uint64, timestamp uint64, err error) // AddIssuerChain stores every the chain certificate in a content-addressable store under their sha256 hash. AddIssuerChain(context.Context, []*x509.Certificate) error - // AddCertDedupInfo stores the SCTDedupInfo of certificate in a log under its hash. - AddCertDedupInfo(context.Context, *x509.Certificate, dedup.SCTDedupInfo) error - // GetCertDedupInfo gets the SCTDedupInfo of certificate in a log from its hash. - GetCertDedupInfo(context.Context, *x509.Certificate) (dedup.SCTDedupInfo, bool, error) } // ChainValidator provides functions to validate incoming chains. diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index 07534789..95766f8a 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -30,7 +30,6 @@ import ( "github.com/transparency-dev/static-ct/internal/types/rfc6962" "github.com/transparency-dev/static-ct/internal/types/tls" "github.com/transparency-dev/static-ct/internal/x509util" - "github.com/transparency-dev/static-ct/modules/dedup" tessera "github.com/transparency-dev/trillian-tessera" "go.opentelemetry.io/otel/metric" "k8s.io/klog/v2" @@ -69,7 +68,9 @@ var ( // setupMetrics initializes all the exported metrics. func setupMetrics() { - // TODO(phboneff): add metrics for deduplication and chain storage. + // TODO(phboneff): add metrics for chain storage. + // TODO(phboneff): add metrics for deduplication. + // TODO(phboneff): break down metrics by whether or not the response has been deduped. knownLogs = mustCreate(meter.Int64Gauge("tesseract.known_logs", metric.WithDescription("Set to 1 for known logs"))) @@ -120,11 +121,11 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { originAttr := originKey.String(a.log.origin) operationAttr := operationKey.String(a.name) reqCounter.Add(r.Context(), 1, metric.WithAttributes(originAttr, operationAttr)) - startTime := a.opts.TimeSource.Now() + startTime := time.Now() logCtx := a.opts.RequestLog.start(r.Context()) a.opts.RequestLog.origin(logCtx, a.log.origin) defer func() { - latency := a.opts.TimeSource.Now().Sub(startTime).Seconds() + latency := time.Since(startTime).Seconds() reqDuration.Record(r.Context(), latency, metric.WithAttributes(originAttr, operationAttr, codeKey.Int(statusCode))) }() @@ -148,7 +149,8 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // impose a deadline on this onward request. - ctx, cancel := context.WithDeadline(logCtx, deadlineTime(a.opts)) + // TODO(phbnf): fine tune together with deduplication + ctx, cancel := context.WithTimeout(logCtx, a.opts.Deadline) defer cancel() var err error @@ -180,6 +182,7 @@ type HandlerOptions struct { // or returned to the user containing the full error message. MaskInternalErrors bool // TimeSource indicated the system time and can be injfected for testing. + // TODO(phbnf): hide inside the log TimeSource TimeSource } @@ -271,47 +274,24 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt return http.StatusBadRequest, fmt.Errorf("failed to build MerkleTreeLeaf: %s", err) } - klog.V(2).Infof("%s: %s => storage.GetCertIndex", log.origin, method) - sctDedupInfo, isDup, err := log.storage.GetCertDedupInfo(ctx, chain[0]) - idx := sctDedupInfo.Idx - if err != nil { - return http.StatusInternalServerError, fmt.Errorf("couldn't deduplicate the request: %s", err) + if err := log.storage.AddIssuerChain(ctx, chain[1:]); err != nil { + return http.StatusInternalServerError, fmt.Errorf("failed to store issuer chain: %s", err) } - if isDup { - klog.V(3).Infof("%s: %s - found duplicate entry at index %d", log.origin, method, idx) - entry.Timestamp = sctDedupInfo.Timestamp - } else { - if err := log.storage.AddIssuerChain(ctx, chain[1:]); err != nil { - return http.StatusInternalServerError, fmt.Errorf("failed to store issuer chain: %s", err) - } - - klog.V(2).Infof("%s: %s => storage.Add", log.origin, method) - index, err := log.storage.Add(ctx, entry)() - if err != nil { - if errors.Is(err, tessera.ErrPushback) { - w.Header().Add("Retry-After", "1") - return http.StatusServiceUnavailable, fmt.Errorf("received pushback from Tessera sequencer: %v", err) - } - return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) - } - // TODO(phbnf): figure out whether to use Tessera's index.IsDup() or a separate "external" antispam impl. - idx = index.Index - - // We store the index for this certificate in the deduplication storage immediately. - // It might be stored again later, if a local deduplication storage is synced, potentially - // with a smaller value. - klog.V(2).Infof("%s: %s => storage.AddCertIndex", log.origin, method) - err = log.storage.AddCertDedupInfo(ctx, chain[0], dedup.SCTDedupInfo{Idx: idx, Timestamp: entry.Timestamp}) - // TODO: block log writes if deduplication breaks - if err != nil { - klog.Warningf("AddCertIndex(): failed to store certificate index: %v", err) + klog.V(2).Infof("%s: %s => storage.Add", log.origin, method) + index, dedupedTimeMillis, err := log.storage.Add(ctx, entry) + if err != nil { + if errors.Is(err, tessera.ErrPushback) { + w.Header().Add("Retry-After", "1") + return http.StatusServiceUnavailable, fmt.Errorf("received pushback from Tessera sequencer: %v", err) } + return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) } + entry.Timestamp = dedupedTimeMillis // Always use the returned leaf as the basis for an SCT. var loggedLeaf rfc6962.MerkleTreeLeaf - leafValue := entry.MerkleTreeLeaf(idx) + leafValue := entry.MerkleTreeLeaf(index) if rest, err := tls.Unmarshal(leafValue, &loggedLeaf); err != nil { return http.StatusInternalServerError, fmt.Errorf("failed to reconstruct MerkleTreeLeaf: %s", err) } else if len(rest) > 0 { @@ -338,7 +318,7 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt klog.V(3).Infof("%s: %s <= SCT", log.origin, method) if sct.Timestamp == timeMillis { lastSCTTimestamp.Record(ctx, otel.Clamp64(sct.Timestamp), metric.WithAttributes(originKey.String(log.origin))) - lastSCTIndex.Record(ctx, otel.Clamp64(idx), metric.WithAttributes(originKey.String(log.origin))) + lastSCTIndex.Record(ctx, otel.Clamp64(index), metric.WithAttributes(originKey.String(log.origin))) } return http.StatusOK, nil @@ -381,11 +361,6 @@ func getRoots(ctx context.Context, opts *HandlerOptions, log *log, w http.Respon return http.StatusOK, nil } -// deadlineTime calculates the future time a request should expire based on our config. -func deadlineTime(opts *HandlerOptions) time.Time { - return opts.TimeSource.Now().Add(opts.Deadline) -} - // marshalAndWriteAddChainResponse is used by add-chain and add-pre-chain to create and write // the JSON response to the client func marshalAndWriteAddChainResponse(sct *rfc6962.SignedCertificateTimestamp, w http.ResponseWriter) error { diff --git a/internal/ct/handlers_test.go b/internal/ct/handlers_test.go index 4b979ae0..fb823e0e 100644 --- a/internal/ct/handlers_test.go +++ b/internal/ct/handlers_test.go @@ -41,11 +41,11 @@ import ( "github.com/transparency-dev/static-ct/internal/types/staticct" "github.com/transparency-dev/static-ct/internal/x509util" "github.com/transparency-dev/static-ct/storage" - "github.com/transparency-dev/static-ct/storage/bbolt" tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/api/layout" "github.com/transparency-dev/trillian-tessera/ctonly" posixTessera "github.com/transparency-dev/trillian-tessera/storage/posix" + badger_as "github.com/transparency-dev/trillian-tessera/storage/posix/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -55,8 +55,10 @@ var ( testRootPath = "../testdata/test_root_ca_cert.pem" // Arbitrary time for use in tests - fakeTime = time.Date(2016, 7, 22, 11, 01, 13, 0, time.UTC) - fakeTimeMillis = uint64(fakeTime.UnixNano() / nanosPerMilli) + fakeTimeStart = time.Date(2016, 7, 22, 11, 01, 13, 0, time.UTC) + // TODO(phbnf): this doesn't need to be gloabal, but it easier until + // TimeSource belongs to hOpts. + timeSource = newFakeTimeSource(fakeTimeStart) // Arbitrary origin for tests origin = "example.com" @@ -64,10 +66,10 @@ var ( // Default handler options for tests hOpts = HandlerOptions{ - Deadline: time.Millisecond * 500, + Deadline: time.Millisecond * 2000, RequestLog: &DefaultRequestLog{}, MaskInternalErrors: false, - TimeSource: newFixedTimeSource(fakeTime), + TimeSource: timeSource, } // POSIX subdirectories @@ -79,16 +81,26 @@ type fixedTimeSource struct { fakeTime time.Time } -// newFixedTimeSource creates a fixedTimeSource instance -func newFixedTimeSource(t time.Time) *fixedTimeSource { +// newFakeTimeSource creates a fixedTimeSource instance +func newFakeTimeSource(t time.Time) *fixedTimeSource { return &fixedTimeSource{fakeTime: t} } +// Reset reinitializes the fake time source +func (f *fixedTimeSource) Reset() { + f.fakeTime = fakeTimeStart +} + // Now returns the time value this instance contains func (f *fixedTimeSource) Now() time.Time { return f.fakeTime } +// Add1m increments time by 1 minute +func (f *fixedTimeSource) Add1m() { + f.fakeTime = f.fakeTime.Add(time.Minute) +} + // setupTestLog creates a test TesseraCT log using a POSIX backend. // // It returns the log and the path to the storage directory. @@ -112,7 +124,7 @@ func setupTestLog(t *testing.T) (*log, string) { rejectUnexpired: false, } - log, err := NewLog(t.Context(), origin, sctSigner.signer, cv, newPOSIXStorageFunc(t, storageDir), newFixedTimeSource(fakeTime)) + log, err := NewLog(t.Context(), origin, sctSigner.signer, cv, newPOSIXStorageFunc(t, storageDir), timeSource) if err != nil { t.Fatalf("newLog(): %v", err) } @@ -135,8 +147,8 @@ func setupTestServer(t *testing.T, log *log, path string) *httptest.Server { // newPOSIXStorageFunc returns a function to create a new storage.CTStorage instance with: // - a POSIX Tessera storage driver +// - a Badger Tessera antispam database // - a POSIX issuer storage system -// - a BBolt deduplication database // // It also prepares directories to host the log and the deduplication database. func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { @@ -148,13 +160,22 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("Failed to initialize POSIX Tessera storage driver: %v", err) } + asOpts := badger_as.AntispamOpts{ + MaxBatchSize: 5000, + PushbackThreshold: 1024, + } + antispam, err := badger_as.NewAntispam(ctx, path.Join(root, "dedup.db"), asOpts) + if err != nil { + klog.Exitf("Failed to create new GCP antispam storage: %v", err) + } + opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout() - // TODO(phboneff): add other options like MaxBatchSize of 1 when implementing - // additional tests + WithCTLayout(). + WithAntispam(256, antispam). + WithCheckpointInterval(time.Second) - appender, _, _, err := tessera.NewAppender(ctx, driver, opts) + appender, _, reader, err := tessera.NewAppender(ctx, driver, opts) if err != nil { klog.Fatalf("Failed to initialize POSIX Tessera appender: %v", err) } @@ -164,12 +185,7 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("failed to initialize InMemory issuer storage: %v", err) } - beDedupStorage, err := bbolt.NewStorage(path.Join(root, "dedup.db")) - if err != nil { - klog.Fatalf("Failed to initialize BBolt deduplication database: %v", err) - } - - s, err := storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + s, err := storage.NewCTStorage(t.Context(), appender, issuerStorage, reader) if err != nil { klog.Fatalf("Failed to initialize CTStorage: %v", err) } @@ -315,7 +331,7 @@ func TestNewPathHandlers(t *testing.T) { }) } -func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Certificate) (*ctonly.Entry, []*x509.Certificate) { +func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Certificate, timestamp time.Time) (*ctonly.Entry, []*x509.Certificate) { t.Helper() pool := loadCertsIntoPoolOrDie(t, pemChain) leafChain := pool.RawCertificates() @@ -326,7 +342,7 @@ func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Cert fullChain[len(leafChain)] = root leafChain = fullChain } - entry, err := x509util.EntryFromChain(leafChain, isPrecert, fakeTimeMillis) + entry, err := x509util.EntryFromChain(leafChain, isPrecert, uint64(timestamp.UnixMilli())) if err != nil { t.Fatalf("Failed to create entry") } @@ -444,12 +460,13 @@ func TestAddChainWhitespace(t *testing.T) { func TestAddChain(t *testing.T) { var tests = []struct { - descr string - chain []string - want int - wantIdx uint64 - wantLogSize uint64 - err error + descr string + chain []string + want int + wantIdx uint64 + wantLogSize uint64 + wantTimestamp time.Time + err error }{ { descr: "leaf-only", @@ -462,45 +479,53 @@ func TestAddChain(t *testing.T) { want: http.StatusBadRequest, }, { - descr: "success", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - wantIdx: 0, - wantLogSize: 1, - want: http.StatusOK, + descr: "success", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, { - descr: "success-duplicate", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - wantIdx: 0, - wantLogSize: 1, - want: http.StatusOK, + descr: "success-duplicate", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, { - descr: "success-not-duplicate", - chain: []string{testdata.TestCertPEM, testdata.CACertPEM}, - wantIdx: 1, - wantLogSize: 2, - want: http.StatusOK, + descr: "success-not-duplicate", + chain: []string{testdata.TestCertPEM, testdata.CACertPEM}, + wantIdx: 1, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(5 * time.Minute), + want: http.StatusOK, }, { - descr: "success-without-root", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot}, - wantIdx: 0, - wantLogSize: 2, - want: http.StatusOK, + descr: "success-without-root-duplicate", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot}, + wantIdx: 0, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, } log, dir := setupTestLog(t) server := setupTestServer(t, log, path.Join(prefix, rfc6962.AddChainPath)) defer server.Close() + defer timeSource.Reset() for _, test := range tests { + // Increment time to make it unique for each test case. + timeSource.Add1m() t.Run(test.descr, func(t *testing.T) { pool := loadCertsIntoPoolOrDie(t, test.chain) chain := createJSONChain(t, *pool) resp, err := http.Post(server.URL+rfc6962.AddChainPath, "application/json", chain) + if err != nil { t.Fatalf("http.Post(%s)=(_,%q); want (_,nil)", rfc6962.AddChainPath, err) } @@ -508,7 +533,7 @@ func TestAddChain(t *testing.T) { t.Errorf("http.Post(%s)=(%d,nil); want (%d,nil)", rfc6962.AddChainPath, got, want) } if test.want == http.StatusOK { - unseqEntry, wantIssChain := parseChain(t, false, test.chain, log.chainValidator.Roots()[0]) + unseqEntry, wantIssChain := parseChain(t, false, test.chain, log.chainValidator.Roots()[0], test.wantTimestamp) var gotRsp rfc6962.AddChainResponse if err := json.NewDecoder(resp.Body).Decode(&gotRsp); err != nil { @@ -520,7 +545,7 @@ func TestAddChain(t *testing.T) { if got, want := gotRsp.ID, demoLogID[:]; !bytes.Equal(got, want) { t.Errorf("resp.ID=%v; want %v", got, want) } - if got, want := gotRsp.Timestamp, fakeTimeMillis; got != want { + if got, want := gotRsp.Timestamp, uint64(test.wantTimestamp.UnixMilli()); got != want { t.Errorf("resp.Timestamp=%d; want %d", got, want) } if got, want := hex.EncodeToString(gotRsp.Signature), "040300067369676e6564"; got != want { @@ -583,12 +608,13 @@ func TestAddChain(t *testing.T) { func TestAddPreChain(t *testing.T) { var tests = []struct { - descr string - chain []string - want int - wantIdx uint64 - wantLogSize uint64 - err error + descr string + chain []string + want int + wantIdx uint64 + wantLogSize uint64 + wantTimestamp time.Time + err error }{ { descr: "leaf-signed-by-different", @@ -601,40 +627,47 @@ func TestAddPreChain(t *testing.T) { want: http.StatusBadRequest, }, { - descr: "success", - chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 1, + descr: "success", + chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, { - descr: "success-duplicate", - chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 1, + descr: "success-duplicate", + chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, { - descr: "success-with-intermediate", - chain: []string{testdata.PreCertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 1, - wantLogSize: 2, + descr: "success-with-intermediate", + chain: []string{testdata.PreCertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 1, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(5 * time.Minute), }, { - descr: "success-without-root", - chain: []string{testdata.PrecertPEMValid}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 2, + descr: "success-without-root-duplicate", + chain: []string{testdata.PrecertPEMValid}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, } log, dir := setupTestLog(t) server := setupTestServer(t, log, path.Join(prefix, rfc6962.AddPreChainPath)) defer server.Close() + defer timeSource.Reset() for _, test := range tests { + // Increment time to make it unique for each test case. + timeSource.Add1m() t.Run(test.descr, func(t *testing.T) { pool := loadCertsIntoPoolOrDie(t, test.chain) chain := createJSONChain(t, *pool) @@ -647,7 +680,7 @@ func TestAddPreChain(t *testing.T) { t.Errorf("http.Post(%s)=(%d,nil); want (%d,nil)", rfc6962.AddPreChainPath, got, want) } if test.want == http.StatusOK { - unseqEntry, wantIssChain := parseChain(t, true, test.chain, log.chainValidator.Roots()[0]) + unseqEntry, wantIssChain := parseChain(t, true, test.chain, log.chainValidator.Roots()[0], test.wantTimestamp) var gotRsp rfc6962.AddChainResponse if err := json.NewDecoder(resp.Body).Decode(&gotRsp); err != nil { @@ -659,7 +692,7 @@ func TestAddPreChain(t *testing.T) { if got, want := gotRsp.ID, demoLogID[:]; !bytes.Equal(got, want) { t.Errorf("resp.ID=%v; want %v", got, want) } - if got, want := gotRsp.Timestamp, fakeTimeMillis; got != want { + if got, want := gotRsp.Timestamp, uint64(test.wantTimestamp.UnixMilli()); got != want { t.Errorf("resp.Timestamp=%d; want %d", got, want) } if got, want := hex.EncodeToString(gotRsp.Signature), "040300067369676e6564"; got != want { diff --git a/internal/types/staticct/staticct.go b/internal/types/staticct/staticct.go index 5e78a897..cb08a1ad 100644 --- a/internal/types/staticct/staticct.go +++ b/internal/types/staticct/staticct.go @@ -37,6 +37,8 @@ type EntryBundle struct { // UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles // which are encoded using the Static CT API spec. +// TODO(phbnf): we can probably parse every individual leaf directly, since most callers +// of this method tend to do so. func (t *EntryBundle) UnmarshalText(raw []byte) error { entries := make([][]byte, 0, layout.EntryBundleWidth) s := cryptobyte.String(raw) @@ -164,6 +166,21 @@ type Entry struct { LeafIndex uint64 } +// UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles +// which are encoded using the Static CT API spec. +func UnmarshalTimestamp(raw []byte) (uint64, error) { + s := cryptobyte.String(raw) + var t uint64 + + if !s.ReadUint64(&t) { + return 0, fmt.Errorf("invalid data tile: timestamp can't be extracted") + } + if t > math.MaxInt64 { + return 0, fmt.Errorf("invalid data tile: timestamp %d > math.MaxInt64", t) + } + return t, nil +} + // UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles // which are encoded using the Static CT API spec. func (t *Entry) UnmarshalText(raw []byte) error { diff --git a/modules/README.md b/modules/README.md deleted file mode 100644 index c73e698c..00000000 --- a/modules/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Modules - -This directory contains modules that Tessera Personality can link to get extra functionalities. - -TODO: move out of this directory once we've sorted out repo structure for personalities \ No newline at end of file diff --git a/modules/dedup/dedup.go b/modules/dedup/dedup.go deleted file mode 100644 index 03a73053..00000000 --- a/modules/dedup/dedup.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package dedup limits the number of duplicate entries a personality allows in a Tessera log. -package dedup - -import ( - "bytes" - "context" - "crypto/sha256" - "errors" - "fmt" - "math" - "os" - "strconv" - "time" - - "github.com/transparency-dev/trillian-tessera/api/layout" - "github.com/transparency-dev/trillian-tessera/client" - "golang.org/x/crypto/cryptobyte" - "k8s.io/klog/v2" -) - -// LeafDedupInfo enables building deduplicated add-pre-chain/add-chain responses. -type LeafDedupInfo struct { - LeafID []byte - SCTDedupInfo -} - -// SCTDedupInfo contains data to build idempotent SCTs. -type SCTDedupInfo struct { - Idx uint64 - Timestamp uint64 -} - -type BEDedupStorage interface { - Add(ctx context.Context, lidxs []LeafDedupInfo) error - Get(ctx context.Context, leafID []byte) (SCTDedupInfo, bool, error) -} - -// TODO: re-architecture to prevent creating a LocaLBEDedupStorage without calling UpdateFromLog -type LocalBEDedupStorage interface { - Add(ctx context.Context, lidxs []LeafDedupInfo) error - Get(ctx context.Context, leafID []byte) (SCTDedupInfo, bool, error) - LogSize() (uint64, error) -} - -type ParseBundleFunc func([]byte, uint64) ([]LeafDedupInfo, error) - -// UpdateFromLog synchronises a local best effort deduplication storage with a log. -func UpdateFromLog(ctx context.Context, lds LocalBEDedupStorage, t time.Duration, fcp client.CheckpointFetcherFunc, fb client.EntryBundleFetcherFunc, pb ParseBundleFunc) { - tck := time.NewTicker(t) - defer tck.Stop() - for { - select { - case <-ctx.Done(): - return - case <-tck.C: - if err := sync(ctx, lds, pb, fcp, fb); err != nil { - klog.Warningf("error updating deduplication data: %v", err) - } - } - } -} - -// sync synchronises a deduplication storage with the corresponding log content. -func sync(ctx context.Context, lds LocalBEDedupStorage, pb ParseBundleFunc, fcp client.CheckpointFetcherFunc, fb client.EntryBundleFetcherFunc) error { - cpRaw, err := fcp(ctx) - if err != nil { - return fmt.Errorf("error fetching checkpoint: %v", err) - } - // A https://c2sp.org/static-ct-api logsize is on the second line - l := bytes.SplitN(cpRaw, []byte("\n"), 3) - if len(l) < 2 { - return errors.New("invalid checkpoint - no size") - } - ckptSize, err := strconv.ParseUint(string(l[1]), 10, 64) - if err != nil { - return fmt.Errorf("invalid checkpoint - can't extract size: %v", err) - } - oldSize, err := lds.LogSize() - if err != nil { - return fmt.Errorf("OldSize(): %v", err) - } - - // TODO(phboneff): add parallelism - // Greatly inspired by - // https://github.com/transparency-dev/trillian-tessera/blob/main/client/client.go - if ckptSize > oldSize { - klog.V(2).Infof("LocalBEDEdup.sync(): log at size %d, dedup database at size %d, startig to sync", ckptSize, oldSize) - for i := oldSize / 256; i <= ckptSize/256; i++ { - eRaw, err := fb(ctx, i, layout.PartialTileSize(0, i, ckptSize)) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("leaf bundle at index %d not found: %v", i, err) - } - return fmt.Errorf("failed to fetch leaf bundle at index %d: %v", i, err) - } - ldis, err := pb(eRaw, i) - if err != nil { - return fmt.Errorf("parseBundle(): %v", err) - } - - if err := lds.Add(ctx, ldis); err != nil { - return fmt.Errorf("error storing deduplication data for tile %d: %v", i, err) - } - klog.V(3).Infof("LocalBEDEdup.sync(): stored dedup data for entry bundle %d, %d more bundles to go", i, ckptSize/256-i) - } - } - klog.V(3).Infof("LocalBEDEdup.sync(): dedup data synced to logsize %d", ckptSize) - return nil -} - -// DedupFromBundle converts a bundle into an array of LeafDedupInfo. -// -// The index of a leaf is computed from its position in the log, instead of parsing SCTs. -// Greatly inspired by https://github.com/FiloSottile/sunlight/blob/main/tile.go -// TODO(phboneff): move this somewhere else, and only leave crypto in this file -func DedupFromBundle(bundle []byte, bundleIdx uint64) ([]LeafDedupInfo, error) { - kvs := []LeafDedupInfo{} - s := cryptobyte.String(bundle) - - for i := bundleIdx * 256; len(s) > 0; i++ { - var timestamp uint64 - var entryType uint16 - var extensions, fingerprints cryptobyte.String - if !s.ReadUint64(×tamp) || !s.ReadUint16(&entryType) || timestamp > math.MaxInt64 { - return nil, fmt.Errorf("invalid data tile") - } - crt := []byte{} - switch entryType { - case 0: // x509_entry - if !s.ReadUint24LengthPrefixed((*cryptobyte.String)(&crt)) || - !s.ReadUint16LengthPrefixed(&extensions) || - !s.ReadUint16LengthPrefixed(&fingerprints) { - return nil, fmt.Errorf("invalid data tile x509_entry") - } - case 1: // precert_entry - IssuerKeyHash := [32]byte{} - var defangedCrt, extensions cryptobyte.String - if !s.CopyBytes(IssuerKeyHash[:]) || - !s.ReadUint24LengthPrefixed(&defangedCrt) || - !s.ReadUint16LengthPrefixed(&extensions) || - !s.ReadUint24LengthPrefixed((*cryptobyte.String)(&crt)) || - !s.ReadUint16LengthPrefixed(&fingerprints) { - return nil, fmt.Errorf("invalid data tile precert_entry") - } - default: - return nil, fmt.Errorf("invalid data tile: unknown type %d", entryType) - } - k := sha256.Sum256(crt) - sctDedupInfo := SCTDedupInfo{Idx: uint64(i), Timestamp: timestamp} - kvs = append(kvs, LeafDedupInfo{LeafID: k[:], SCTDedupInfo: sctDedupInfo}) - } - return kvs, nil -} diff --git a/storage/bbolt/dedup.go b/storage/bbolt/dedup.go deleted file mode 100644 index 20d998e2..00000000 --- a/storage/bbolt/dedup.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package bbolt implements modules/dedup using BBolt. -// -// It contains two buckets: -// - The dedup bucket stores pairs. Entries can either be added after -// sequencing, by the server that received the request, or later when synchronising the dedup -// storage with the log state. -// - The size bucket has a single entry: <"size", X>, where X is the largest contiguous index -// from 0 that has been inserted in the dedup bucket. This allows to know at what index -// deduplication synchronisation should start in order to have the full picture of a log. -// -// Calls to Add will update idx to a smaller value, if possible. -package bbolt - -import ( - "context" - "encoding/binary" - "fmt" - - "github.com/transparency-dev/static-ct/modules/dedup" - - bolt "go.etcd.io/bbolt" - "k8s.io/klog/v2" -) - -var ( - dedupBucket = "leafIdx" - sizeBucket = "logSize" -) - -type Storage struct { - db *bolt.DB -} - -// NewStorage returns a new BBolt storage instance with a dedup and size bucket. -// -// The dedup bucket stores pairs, where idx::timestamp is the -// concatenation of two uint64 8 bytes BigEndian representation. -// The size bucket has a single entry: <"size", X>, where X is the largest contiguous index from 0 -// that has been inserted in the dedup bucket. -// -// If a database already exists at the provided path, NewStorage will load it. -func NewStorage(path string) (*Storage, error) { - db, err := bolt.Open(path, 0600, nil) - if err != nil { - return nil, fmt.Errorf("bolt.Open(): %v", err) - } - s := &Storage{db: db} - - err = db.Update(func(tx *bolt.Tx) error { - dedupB := tx.Bucket([]byte(dedupBucket)) - sizeB := tx.Bucket([]byte(sizeBucket)) - if dedupB == nil && sizeB == nil { - klog.V(2).Infof("NewStorage: no pre-existing buckets, will create %q and %q.", dedupBucket, sizeBucket) - _, err := tx.CreateBucket([]byte(dedupBucket)) - if err != nil { - return fmt.Errorf("create %q bucket: %v", dedupBucket, err) - } - sb, err := tx.CreateBucket([]byte(sizeBucket)) - if err != nil { - return fmt.Errorf("create %q bucket: %v", sizeBucket, err) - } - klog.V(2).Infof("NewStorage: initializing %q with size 0.", sizeBucket) - err = sb.Put([]byte("size"), itob(0)) - if err != nil { - return fmt.Errorf("error reading logsize: %v", err) - } - } else if dedupB == nil && sizeB != nil { - return fmt.Errorf("inconsistent deduplication storage state %q is nil but %q it not nil", dedupBucket, sizeBucket) - } else if dedupB != nil && sizeB == nil { - return fmt.Errorf("inconsistent deduplication storage state, %q is not nil but %q is nil", dedupBucket, sizeBucket) - } else { - klog.V(2).Infof("NewStorage: found pre-existing %q and %q buckets.", dedupBucket, sizeBucket) - } - return nil - }) - - if err != nil { - return nil, fmt.Errorf("error initializing buckets: %v", err) - } - - return s, nil -} - -// Add inserts entries in the dedup bucket and updates the size bucket if need be. -// -// If an entry already exists under a key, Add only updates the value if the new idx is smaller. -// The context is here for consistency with interfaces, but isn't used by BBolt. -func (s *Storage) Add(_ context.Context, ldis []dedup.LeafDedupInfo) error { - for _, ldi := range ldis { - err := s.db.Update(func(tx *bolt.Tx) error { - db := tx.Bucket([]byte(dedupBucket)) - sb := tx.Bucket([]byte(sizeBucket)) - - sizeB := sb.Get([]byte("size")) - if sizeB == nil { - return fmt.Errorf("can't find log size in bucket %q", sizeBucket) - } - size := btoi(sizeB) - vB, err := vtob(ldi.Idx, ldi.Timestamp) - if err != nil { - return fmt.Errorf("vtob(): %v", err) - } - - // old should always be 16 bytes long, but double check - if old := db.Get(ldi.LeafID); len(old) == 16 && btoi(old[:8]) <= ldi.Idx { - klog.V(3).Infof("Add(): bucket %q already contains a smaller index %d < %d for entry \"%x\", not updating", dedupBucket, btoi(old[:8]), ldi.Idx, ldi.LeafID) - } else if err := db.Put(ldi.LeafID, vB); err != nil { - return err - } - // size is a length, ldi.Idx an index, so if they're equal, - // ldi is a new entry. - if size == ldi.Idx { - klog.V(3).Infof("Add(): updating deduped size to %d", size+1) - if err := sb.Put([]byte("size"), itob(size+1)); err != nil { - return err - } - } - return nil - }) - if err != nil { - return fmt.Errorf("db.Update(): error writing leaf index %d: err", ldi.Idx) - } - } - return nil -} - -// Get reads entries from the dedup bucket. -// -// If the requested entry is missing from the bucket, returns false ("comma ok" idiom). -// The context is here for consistency with interfaces, but isn't used by BBolt. -func (s *Storage) Get(_ context.Context, leafID []byte) (dedup.SCTDedupInfo, bool, error) { - var v []byte - _ = s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte(dedupBucket)) - if vv := b.Get(leafID); vv != nil { - v = make([]byte, len(vv)) - copy(v, vv) - } - return nil - }) - if v == nil { - return dedup.SCTDedupInfo{}, false, nil - } - idx, t, err := btov(v) - if err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("btov(): %v", err) - } - return dedup.SCTDedupInfo{Idx: idx, Timestamp: t}, true, nil -} - -// LogSize reads the latest entry from the size bucket. -func (s *Storage) LogSize() (uint64, error) { - var size []byte - err := s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte(sizeBucket)) - v := b.Get([]byte("size")) - if v != nil { - size = make([]byte, 8) - copy(size, v) - } - return nil - }) - if err != nil { - return 0, fmt.Errorf("error reading from %q: %v", sizeBucket, err) - } - if size == nil { - return 0, fmt.Errorf("can't find log size in bucket %q", sizeBucket) - } - return btoi(size), nil -} - -// itob returns an 8-byte big endian representation of idx. -func itob(idx uint64) []byte { - return binary.BigEndian.AppendUint64(nil, idx) -} - -// btoi converts a byte array to a uint64 -func btoi(b []byte) uint64 { - return binary.BigEndian.Uint64(b) -} - -// vtob concatenates an index and timestamp values into a byte array. -func vtob(idx uint64, timestamp uint64) ([]byte, error) { - b := make([]byte, 0, 16) - var err error - - b, err = binary.Append(b, binary.BigEndian, idx) - if err != nil { - return nil, fmt.Errorf("binary.Append() could not encode idx: %v", err) - } - b, err = binary.Append(b, binary.BigEndian, timestamp) - if err != nil { - return nil, fmt.Errorf("binary.Append() could not encode timestamp: %v", err) - } - - return b, nil -} - -// btov parses a byte array into an index and timestamp values. -func btov(b []byte) (uint64, uint64, error) { - var idx, timestamp uint64 - if l := len(b); l != 16 { - return 0, 0, fmt.Errorf("input value is %d bytes long, expected %d", l, 16) - } - n, err := binary.Decode(b, binary.BigEndian, &idx) - if err != nil { - return 0, 0, fmt.Errorf("binary.Decode() could not decode idx: %v", err) - } - _, err = binary.Decode(b[n:], binary.BigEndian, ×tamp) - if err != nil { - return 0, 0, fmt.Errorf("binary.Decode() could not decode timestamp: %v", err) - } - return idx, timestamp, nil -} diff --git a/storage/gcp/dedup.go b/storage/gcp/dedup.go deleted file mode 100644 index 36408a73..00000000 --- a/storage/gcp/dedup.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gcp - -import ( - "context" - "fmt" - - "cloud.google.com/go/spanner" - "cloud.google.com/go/spanner/apiv1/spannerpb" - "github.com/transparency-dev/static-ct/modules/dedup" - "google.golang.org/grpc/codes" -) - -// NewDedupeStorage returns a struct which can be used to store identity -> index mappings backed -// by Spanner. -// -// Note that updates to this dedup storage is logically entriely separate from any updates -// happening to the log storage. -func NewDedupeStorage(ctx context.Context, spannerDB string) (*DedupStorage, error) { - /* - Schema for reference: - - CREATE TABLE IDSeq ( - id INT64 NOT NULL, - h BYTES(MAX) NOT NULL, - idx INT64 NOT NULL, - timestamp INT64 NOT NULL, - ) PRIMARY KEY (id, h); - */ - dedupDB, err := spanner.NewClient(ctx, spannerDB) - if err != nil { - return nil, fmt.Errorf("failed to connect to Spanner: %v", err) - } - - return &DedupStorage{ - dbPool: dedupDB, - }, nil -} - -// DedupStorage is a GCP Spanner based dedup storage implementation for TesseraCT. -type DedupStorage struct { - dbPool *spanner.Client -} - -var _ dedup.BEDedupStorage = &DedupStorage{} - -// Get looks up the stored index, if any, for the given identity. -func (d *DedupStorage) Get(ctx context.Context, i []byte) (dedup.SCTDedupInfo, bool, error) { - var idx, timestamp int64 - if row, err := d.dbPool.Single().ReadRow(ctx, "IDSeq", spanner.Key{0, i}, []string{"idx", "timestamp"}); err != nil { - if c := spanner.ErrCode(err); c == codes.NotFound { - return dedup.SCTDedupInfo{}, false, nil - } - return dedup.SCTDedupInfo{}, false, err - } else { - if err := row.Columns(&idx, ×tamp); err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("failed to read dedup index: %v", err) - } - idx := uint64(idx) - t := uint64(timestamp) - return dedup.SCTDedupInfo{Idx: idx, Timestamp: t}, true, nil - } -} - -// Add stores associations between the passed-in identities and their indices. -func (d *DedupStorage) Add(ctx context.Context, entries []dedup.LeafDedupInfo) error { - m := make([]*spanner.MutationGroup, 0, len(entries)) - for _, e := range entries { - m = append(m, &spanner.MutationGroup{ - Mutations: []*spanner.Mutation{ - spanner.Insert("IDSeq", []string{"id", "h", "idx", "timestamp"}, - []any{0, e.LeafID, int64(e.Idx), int64(e.Timestamp)})}, - }) - } - - i := d.dbPool.BatchWrite(ctx, m) - return i.Do(func(r *spannerpb.BatchWriteResponse) error { - s := r.GetStatus() - if c := codes.Code(s.Code); c != codes.OK && c != codes.AlreadyExists { - return fmt.Errorf("failed to write dedup record: %v (%v)", s.GetMessage(), c) - } - return nil - }) -} diff --git a/storage/storage.go b/storage/storage.go index 42a620f4..c37bc883 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -15,15 +15,21 @@ package storage import ( + "bytes" "context" "crypto/sha256" "crypto/x509" "encoding/hex" + "errors" "fmt" + "os" + "strconv" "sync" + "time" - "github.com/transparency-dev/static-ct/modules/dedup" + "github.com/transparency-dev/static-ct/internal/types/staticct" tessera "github.com/transparency-dev/trillian-tessera" + "github.com/transparency-dev/trillian-tessera/api/layout" "github.com/transparency-dev/trillian-tessera/ctonly" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" @@ -49,30 +55,91 @@ type IssuerStorage interface { AddIssuersIfNotExist(ctx context.Context, kv []KV) error } -// CTStorage implements ct.Storage. +// CTStorage implements ct.Storage and tessera.LogReader. type CTStorage struct { storeData func(context.Context, *ctonly.Entry) tessera.IndexFuture storeIssuers func(context.Context, []KV) error - dedupStorage dedup.BEDedupStorage + reader tessera.LogReader + awaiter *tessera.PublicationAwaiter } // NewCTStorage instantiates a CTStorage object. -func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage, dedupStorage dedup.BEDedupStorage) (*CTStorage, error) { +func NewCTStorage(ctx context.Context, logStorage *tessera.Appender, issuerStorage IssuerStorage, reader tessera.LogReader) (*CTStorage, error) { + awaiter := tessera.NewPublicationAwaiter(ctx, reader.ReadCheckpoint, 200*time.Millisecond) ctStorage := &CTStorage{ storeData: tessera.NewCertificateTransparencyAppender(logStorage), storeIssuers: cachedStoreIssuers(issuerStorage), - dedupStorage: dedupStorage, + reader: reader, + awaiter: awaiter, } return ctStorage, nil } +func (cts *CTStorage) ReadCheckpoint(ctx context.Context) ([]byte, error) { + return cts.reader.ReadCheckpoint(ctx) +} + +// TODO(phbnf): cache timestamps (or more) to avoid reparsing the entire leaf bundle +func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (index, timestamp uint64, err error) { + ctx, span := tracer.Start(ctx, "tesseract.storage.dedupFuture") + defer span.End() + + idx, cpRaw, err := cts.awaiter.Await(ctx, f) + if err != nil { + return 0, 0, fmt.Errorf("error waiting for Tessera future and its integration: %v", err) + } + + // A https://c2sp.org/static-ct-api logsize is on the second line + l := bytes.SplitN(cpRaw, []byte("\n"), 3) + if len(l) < 2 { + return 0, 0, errors.New("invalid checkpoint - no size") + } + ckptSize, err := strconv.ParseUint(string(l[1]), 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid checkpoint - can't extract size: %v", err) + } + + eBIdx := idx.Index / layout.EntryBundleWidth + eBRaw, err := cts.reader.ReadEntryBundle(ctx, eBIdx, layout.PartialTileSize(0, eBIdx, ckptSize)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return 0, 0, fmt.Errorf("leaf bundle at index %d not found: %v", eBIdx, err) + } + return 0, 0, fmt.Errorf("failed to fetch entry bundle at index %d: %v", eBIdx, err) + } + eb := staticct.EntryBundle{} + if err := eb.UnmarshalText(eBRaw); err != nil { + return 0, 0, fmt.Errorf("failed to unmarshal entry bundle at index %d: %v", eBIdx, err) + } + + eIdx := idx.Index % layout.EntryBundleWidth + if uint64(len(eb.Entries)) <= eIdx { + return 0, 0, fmt.Errorf("entry bundle at index %d has only %d entries, but wanted at least %d", eBIdx, eIdx, eBIdx) + } + e := staticct.Entry{} + t, err := staticct.UnmarshalTimestamp([]byte(eb.Entries[eIdx])) + if err != nil { + return 0, 0, fmt.Errorf("failed to extract timestamp from entry %d in entry bundle %d: %v", eIdx, eBIdx, e) + } + + return idx.Index, t, nil +} + // Add stores CT entries. -func (cts *CTStorage) Add(ctx context.Context, entry *ctonly.Entry) tessera.IndexFuture { +func (cts *CTStorage) Add(ctx context.Context, entry *ctonly.Entry) (uint64, uint64, error) { ctx, span := tracer.Start(ctx, "tesseract.storage.Add") defer span.End() - // TODO(phboneff): add deduplication and chain storage - return cts.storeData(ctx, entry) + future := cts.storeData(ctx, entry) + idx, err := future() + if err != nil { + return 0, 0, fmt.Errorf("error waiting for Tessera future: %v", err) + } + if idx.IsDup { + return cts.dedupFuture(ctx, future) + } + return idx.Index, entry.Timestamp, nil + } // AddIssuerChain stores every chain certificate under its sha256. @@ -128,28 +195,3 @@ func cachedStoreIssuers(s IssuerStorage) func(context.Context, []KV) error { return nil } } - -// AddCertDedupInfo stores in the deduplication storage. -func (cts CTStorage) AddCertDedupInfo(ctx context.Context, c *x509.Certificate, sctDedupInfo dedup.SCTDedupInfo) error { - ctx, span := tracer.Start(ctx, "tesseract.storage.AddCertDedupInfo") - defer span.End() - - key := sha256.Sum256(c.Raw) - if err := cts.dedupStorage.Add(ctx, []dedup.LeafDedupInfo{{LeafID: key[:], SCTDedupInfo: sctDedupInfo}}); err != nil { - return fmt.Errorf("error storing SCTDedupInfo %+v of \"%x\": %v", sctDedupInfo, key, err) - } - return nil -} - -// GetCertDedupInfo fetches the SCTDedupInfo of a given certificate from the deduplication storage. -func (cts CTStorage) GetCertDedupInfo(ctx context.Context, c *x509.Certificate) (dedup.SCTDedupInfo, bool, error) { - ctx, span := tracer.Start(ctx, "tesseract.storageGetCertDedupInfo") - defer span.End() - - key := sha256.Sum256(c.Raw) - sctC, ok, err := cts.dedupStorage.Get(ctx, key[:]) - if err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("error fetching index of \"%x\": %v", key, err) - } - return sctC, ok, nil -}