From 468acc13761c41878a9b4318be497cfc0f9e7ed0 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Mon, 14 Apr 2025 18:55:16 +0000 Subject: [PATCH 01/12] enable new dedup # Conflicts: # cmd/gcp/main.go --- cmd/gcp/main.go | 18 +++++++++++++++++- deployment/modules/gcp/cloudrun/main.tf | 2 ++ deployment/modules/gcp/cloudrun/variables.tf | 5 +++++ deployment/modules/gcp/storage/main.tf | 11 +++++++++++ deployment/modules/gcp/storage/outputs.tf | 5 +++++ .../modules/gcp/tesseract/cloudrun/main.tf | 1 + 6 files changed, 41 insertions(+), 1 deletion(-) diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index 8dc1c674..b32a2a10 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -33,6 +33,7 @@ import ( "github.com/transparency-dev/static-ct/storage/gcp" tessera "github.com/transparency-dev/trillian-tessera" tgcp "github.com/transparency-dev/trillian-tessera/storage/gcp" + gcp_as "github.com/transparency-dev/trillian-tessera/storage/gcp/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -54,6 +55,7 @@ var ( bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") spannerDB = flag.String("spanner_db_path", "", "Spanner database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") spannerDedupDB = flag.String("spanner_dedup_db_path", "", "Spanner deduplication database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") + spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "EXPERIMENTAL: Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -157,9 +159,23 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP Tessera storage driver: %v", err) } + var antispam tessera.Antispam + // Persistent antispam is currently experimental, so there's no terraform or documentation yet! + if *spannerAntispamDB != "" { + asOpts := gcp_as.AntispamOpts{ + MaxBatchSize: 5000, + PushbackThreshold: 1024, + } + antispam, err = gcp_as.NewAntispam(ctx, *spannerAntispamDB, asOpts) + if err != nil { + klog.Exitf("Failed to create new GCP antispam storage: %v", err) + } + } + opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout() + WithCTLayout(). + WithAntispam(256, antispam) // TODO(phbnf): figure out the best way to thread the `shutdown` func NewAppends returns back out to main so we can cleanly close Tessera down // when it's time to exit. diff --git a/deployment/modules/gcp/cloudrun/main.tf b/deployment/modules/gcp/cloudrun/main.tf index 1c0d2bed..c6652be1 100644 --- a/deployment/modules/gcp/cloudrun/main.tf +++ b/deployment/modules/gcp/cloudrun/main.tf @@ -13,6 +13,7 @@ locals { cloudrun_service_account_id = var.env == "" ? "cloudrun-sa" : "cloudrun-${var.env}-sa" spanner_log_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.log_spanner_db}" spanner_dedup_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.dedup_spanner_db}" + spanner_antispam_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.antispam_spanner_db}" } resource "google_project_service" "cloudrun_api" { @@ -45,6 +46,7 @@ resource "google_cloud_run_v2_service" "default" { "--bucket=${var.bucket}", "--spanner_db_path=${local.spanner_log_db_path}", "--spanner_dedup_db_path=${local.spanner_dedup_db_path}", + "--spanner_antispam_db_path=${local.spanner_antispam_db_path}", "--roots_pem_file=/bin/test_root_ca_cert.pem", "--origin=${var.base_name}${var.origin_suffix}", "--signer_public_key_secret_name=${var.signer_public_key_secret_name}", diff --git a/deployment/modules/gcp/cloudrun/variables.tf b/deployment/modules/gcp/cloudrun/variables.tf index f77b7812..73cde184 100644 --- a/deployment/modules/gcp/cloudrun/variables.tf +++ b/deployment/modules/gcp/cloudrun/variables.tf @@ -48,6 +48,11 @@ variable "dedup_spanner_db" { type = string } +variable "antispam_spanner_db" { + description = "Antispam Spanner database" + type = string +} + variable "signer_public_key_secret_name" { description = "Public key secret name for checkpoints and SCTs signer. Format: projects/{projectId}/secrets/{secretName}/versions/{secretVersion}." type = string diff --git a/deployment/modules/gcp/storage/main.tf b/deployment/modules/gcp/storage/main.tf index 6cf259dd..582df7e0 100644 --- a/deployment/modules/gcp/storage/main.tf +++ b/deployment/modules/gcp/storage/main.tf @@ -73,3 +73,14 @@ resource "google_spanner_database" "dedup_db" { deletion_protection = !var.ephemeral } + +resource "google_spanner_database" "antispam_db" { + instance = google_spanner_instance.log_spanner.name + name = "${var.base_name}-antispam-db" + ddl = [ + "CREATE TABLE IF NOT EXISTS FollowCoord (id INT64 NOT NULL, nextIdx INT64 NOT NULL) PRIMARY KEY (id)", + "CREATE TABLE IF NOT EXISTS IDSeq (h BYTES(32) NOT NULL, idx INT64 NOT NULL) PRIMARY KEY (h)", + ] + + deletion_protection = !var.ephemeral +} diff --git a/deployment/modules/gcp/storage/outputs.tf b/deployment/modules/gcp/storage/outputs.tf index 578b9ffa..387009fa 100644 --- a/deployment/modules/gcp/storage/outputs.tf +++ b/deployment/modules/gcp/storage/outputs.tf @@ -17,3 +17,8 @@ output "dedup_spanner_db" { description = "Dedup Spanner database" value = google_spanner_database.dedup_db } + +output "antispam_spanner_db" { + description = "Antispam Spanner database" + value = google_spanner_database.antispam_db +} diff --git a/deployment/modules/gcp/tesseract/cloudrun/main.tf b/deployment/modules/gcp/tesseract/cloudrun/main.tf index 42344e3a..747ac50a 100644 --- a/deployment/modules/gcp/tesseract/cloudrun/main.tf +++ b/deployment/modules/gcp/tesseract/cloudrun/main.tf @@ -31,6 +31,7 @@ module "cloudrun" { log_spanner_instance = module.storage.log_spanner_instance.name log_spanner_db = module.storage.log_spanner_db.name dedup_spanner_db = module.storage.dedup_spanner_db.name + antispam_spanner_db = module.storage.antispam_spanner_db.name signer_public_key_secret_name = module.secretmanager.ecdsa_p256_public_key_id signer_private_key_secret_name = module.secretmanager.ecdsa_p256_private_key_id From 5de64b4facf35e99e94dd6dfcefc6a59a8d76365 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Wed, 16 Apr 2025 11:20:04 +0000 Subject: [PATCH 02/12] don't do old deduplication --- internal/ct/handlers.go | 67 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index 07534789..06005d0f 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -30,7 +30,6 @@ import ( "github.com/transparency-dev/static-ct/internal/types/rfc6962" "github.com/transparency-dev/static-ct/internal/types/tls" "github.com/transparency-dev/static-ct/internal/x509util" - "github.com/transparency-dev/static-ct/modules/dedup" tessera "github.com/transparency-dev/trillian-tessera" "go.opentelemetry.io/otel/metric" "k8s.io/klog/v2" @@ -271,43 +270,43 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt return http.StatusBadRequest, fmt.Errorf("failed to build MerkleTreeLeaf: %s", err) } - klog.V(2).Infof("%s: %s => storage.GetCertIndex", log.origin, method) - sctDedupInfo, isDup, err := log.storage.GetCertDedupInfo(ctx, chain[0]) - idx := sctDedupInfo.Idx - if err != nil { - return http.StatusInternalServerError, fmt.Errorf("couldn't deduplicate the request: %s", err) + //klog.V(2).Infof("%s: %s => storage.GetCertIndex", log.origin, method) + //sctDedupInfo, isDup, err := log.storage.GetCertDedupInfo(ctx, chain[0]) + //idx := sctDedupInfo.Idx + //if err != nil { + // return http.StatusInternalServerError, fmt.Errorf("couldn't deduplicate the request: %s", err) + //} + + //if isDup { + // klog.V(3).Infof("%s: %s - found duplicate entry at index %d", log.origin, method, idx) + // entry.Timestamp = sctDedupInfo.Timestamp + //} else { + if err := log.storage.AddIssuerChain(ctx, chain[1:]); err != nil { + return http.StatusInternalServerError, fmt.Errorf("failed to store issuer chain: %s", err) } - if isDup { - klog.V(3).Infof("%s: %s - found duplicate entry at index %d", log.origin, method, idx) - entry.Timestamp = sctDedupInfo.Timestamp - } else { - if err := log.storage.AddIssuerChain(ctx, chain[1:]); err != nil { - return http.StatusInternalServerError, fmt.Errorf("failed to store issuer chain: %s", err) - } - - klog.V(2).Infof("%s: %s => storage.Add", log.origin, method) - index, err := log.storage.Add(ctx, entry)() - if err != nil { - if errors.Is(err, tessera.ErrPushback) { - w.Header().Add("Retry-After", "1") - return http.StatusServiceUnavailable, fmt.Errorf("received pushback from Tessera sequencer: %v", err) - } - return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) - } - // TODO(phbnf): figure out whether to use Tessera's index.IsDup() or a separate "external" antispam impl. - idx = index.Index - - // We store the index for this certificate in the deduplication storage immediately. - // It might be stored again later, if a local deduplication storage is synced, potentially - // with a smaller value. - klog.V(2).Infof("%s: %s => storage.AddCertIndex", log.origin, method) - err = log.storage.AddCertDedupInfo(ctx, chain[0], dedup.SCTDedupInfo{Idx: idx, Timestamp: entry.Timestamp}) - // TODO: block log writes if deduplication breaks - if err != nil { - klog.Warningf("AddCertIndex(): failed to store certificate index: %v", err) + klog.V(2).Infof("%s: %s => storage.Add", log.origin, method) + index, err := log.storage.Add(ctx, entry)() + if err != nil { + if errors.Is(err, tessera.ErrPushback) { + w.Header().Add("Retry-After", "1") + return http.StatusServiceUnavailable, fmt.Errorf("received pushback from Tessera sequencer: %v", err) } + return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) } + // TODO(phbnf): figure out whether to use Tessera's index.IsDup() or a separate "external" antispam impl. + idx := index.Index + + // We store the index for this certificate in the deduplication storage immediately. + // It might be stored again later, if a local deduplication storage is synced, potentially + // with a smaller value. + //klog.V(2).Infof("%s: %s => storage.AddCertIndex", log.origin, method) + //err = log.storage.AddCertDedupInfo(ctx, chain[0], dedup.SCTDedupInfo{Idx: idx, Timestamp: entry.Timestamp}) + //// TODO: block log writes if deduplication breaks + //if err != nil { + // klog.Warningf("AddCertIndex(): failed to store certificate index: %v", err) + //} + // } // Always use the returned leaf as the basis for an SCT. var loggedLeaf rfc6962.MerkleTreeLeaf From b98391dd40fdbd87b2165737d52ba0eb779619fd Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 17 Apr 2025 15:08:26 +0000 Subject: [PATCH 03/12] dedupeverywhere # Conflicts: # go.mod # go.sum # Conflicts: # go.sum # Conflicts: # go.sum --- cmd/aws/main.go | 51 +++- cmd/gcp/main.go | 16 +- deployment/live/aws/test/.terraform.lock.hcl | 24 +- deployment/live/aws/test/README.md | 10 +- deployment/modules/aws/storage/main.tf | 21 ++ deployment/modules/aws/storage/outputs.tf | 4 + deployment/modules/aws/storage/variables.tf | 6 + .../modules/aws/tesseract/conformance/main.tf | 10 +- deployment/modules/aws/tesseract/test/main.tf | 9 +- go.mod | 5 + go.sum | 10 + internal/ct/ctlog.go | 5 - internal/ct/handlers.go | 28 +-- internal/ct/handlers_test.go | 25 +- modules/README.md | 5 - modules/dedup/dedup.go | 167 ------------- storage/bbolt/dedup.go | 228 ------------------ storage/gcp/dedup.go | 97 -------- storage/storage.go | 30 +-- 19 files changed, 149 insertions(+), 602 deletions(-) delete mode 100644 modules/README.md delete mode 100644 modules/dedup/dedup.go delete mode 100644 storage/bbolt/dedup.go delete mode 100644 storage/gcp/dedup.go diff --git a/cmd/aws/main.go b/cmd/aws/main.go index fc2ad215..a5500c3f 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -31,9 +31,9 @@ import ( tesseract "github.com/transparency-dev/static-ct" "github.com/transparency-dev/static-ct/storage" "github.com/transparency-dev/static-ct/storage/aws" - "github.com/transparency-dev/static-ct/storage/bbolt" tessera "github.com/transparency-dev/trillian-tessera" taws "github.com/transparency-dev/trillian-tessera/storage/aws" + aws_as "github.com/transparency-dev/trillian-tessera/storage/aws/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -54,6 +54,7 @@ var ( origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.") bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") dbName = flag.String("db_name", "", "AuroraDB name") + antispamDBName = flag.String("antispam_db_name", "", "AuroraDB antispam name") dbHost = flag.String("db_host", "", "AuroraDB host") dbPort = flag.Int("db_port", 3306, "AuroraDB port") dbUser = flag.String("db_user", "", "AuroraDB user") @@ -147,9 +148,19 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, if err != nil { return nil, fmt.Errorf("failed to initialize AWS Tessera storage driver: %v", err) } + + var antispam tessera.Antispam + if *antispamDBName != "" { + antispam, err = aws_as.NewAntispam(ctx, antispamMysqlConfig().FormatDSN(), aws_as.AntispamOpts{}) + if err != nil { + klog.Exitf("Failed to create new GCP antispam storage: %v", err) + } + } + appender, _, _, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout()) + WithCTLayout(). + WithAntispam(2<<18, antispam)) // TODO(phbnf): do the math to see what fits in memory if err != nil { return nil, fmt.Errorf("failed to initialize AWS Tessera storage: %v", err) } @@ -159,12 +170,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize AWS issuer storage: %v", err) } - beDedupStorage, err := bbolt.NewStorage(*dedupPath) - if err != nil { - return nil, fmt.Errorf("failed to initialize BBolt deduplication database: %v", err) - } - - return storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + return storage.NewCTStorage(appender, issuerStorage) } type timestampFlag struct { @@ -230,3 +236,32 @@ func storageConfigFromFlags() taws.Config { MaxIdleConns: *dbMaxIdle, } } + +func antispamMysqlConfig() *mysql.Config { + if *antispamDBName == "" { + klog.Exit("--antispam_db_name must be set") + } + if *dbHost == "" { + klog.Exit("--db_host must be set") + } + if *dbPort == 0 { + klog.Exit("--db_port must be set") + } + if *dbUser == "" { + klog.Exit("--db_user must be set") + } + // Empty passord isn't an option with AuroraDB MySQL. + if *dbPassword == "" { + klog.Exit("--db_password must be set") + } + + return &mysql.Config{ + User: *dbUser, + Passwd: *dbPassword, + Net: "tcp", + Addr: fmt.Sprintf("%s:%d", *dbHost, *dbPort), + DBName: *antispamDBName, + AllowCleartextPasswords: true, + AllowNativePasswords: true, + } +} diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index b32a2a10..e02e506a 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -54,7 +54,6 @@ var ( origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.") bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") spannerDB = flag.String("spanner_db_path", "", "Spanner database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") - spannerDedupDB = flag.String("spanner_dedup_db_path", "", "Spanner deduplication database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "EXPERIMENTAL: Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") @@ -162,11 +161,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, var antispam tessera.Antispam // Persistent antispam is currently experimental, so there's no terraform or documentation yet! if *spannerAntispamDB != "" { - asOpts := gcp_as.AntispamOpts{ - MaxBatchSize: 5000, - PushbackThreshold: 1024, - } - antispam, err = gcp_as.NewAntispam(ctx, *spannerAntispamDB, asOpts) + antispam, err = gcp_as.NewAntispam(ctx, *spannerAntispamDB, gcp_as.AntispamOpts{}) if err != nil { klog.Exitf("Failed to create new GCP antispam storage: %v", err) } @@ -175,7 +170,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). WithCTLayout(). - WithAntispam(256, antispam) + WithAntispam(2<<18, antispam) // TODO(phbnf): do the math to see what fits in memory // TODO(phbnf): figure out the best way to thread the `shutdown` func NewAppends returns back out to main so we can cleanly close Tessera down // when it's time to exit. @@ -189,12 +184,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP issuer storage: %v", err) } - beDedupStorage, err := gcp.NewDedupeStorage(ctx, *spannerDedupDB) - if err != nil { - return nil, fmt.Errorf("failed to initialize GCP Spanner deduplication database: %v", err) - } - - return storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + return storage.NewCTStorage(appender, issuerStorage) } type timestampFlag struct { diff --git a/deployment/live/aws/test/.terraform.lock.hcl b/deployment/live/aws/test/.terraform.lock.hcl index fa046975..a9e266b5 100644 --- a/deployment/live/aws/test/.terraform.lock.hcl +++ b/deployment/live/aws/test/.terraform.lock.hcl @@ -41,4 +41,26 @@ provider "registry.terraform.io/hashicorp/tls" { "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", ] -} \ No newline at end of file +} + +provider "registry.terraform.io/petoju/mysql" { + version = "3.0.71" + constraints = "3.0.71" + hashes = [ + "h1:Xwam63rfo59cWPri28xmAbuqrFsxHwAE1P7ogpMUdSU=", + "zh:15200cd22d5a03fafb41cc2ffc654ab58cbcea80920e280ce08b46b3a8378813", + "zh:16e2cc6d38d5be31c59412ced68b451129f3d048d4debb3afaf5603befdfbf50", + "zh:31e9f6cc515378f3bec1fca57892b7e3224442801ac95c18ba83148e793dbc8e", + "zh:3c456ab07b34c8b472b050c3ef333b469164a85bb79c0e5a87ac0dc7aa45eb22", + "zh:538ffa7821989fabd763227fb85aebbd85df57dba9fffc2e060306761b320110", + "zh:69cdda5e80dfb4ca71ef44d719abdee9c9250153ac2269b3a3d17a679f3fb858", + "zh:87eb21da93c9ac51f74c5e8cf53690e51fc0561cfbe14d953c342eb583f31d22", + "zh:8db301a9ae9fc6374fb5cc42bbf81c0f0ea1e7f46f447de56dd752dcecb3c43f", + "zh:a1878a45070ec92659a39e265848fcbdd0f1423dc8eb8cc764c690f73c4430a5", + "zh:a9c9029784f7273d9c153d0a4291b493d68049193068bef4f0374dcaf379e67c", + "zh:ac91bc2538d87b4cf703d2bdae2ceab8567235a9d29c0de8b74bcde0eb828f75", + "zh:b95a21d4857fe84a7e8edc3ff5a828c37a9a99eb1e116045603a0bf00b1dcb89", + "zh:bf38af7e3cde5a717fa67e1e7e936550cb6d3197774e575f13ef974ec92061d2", + "zh:e3d481f0c7a47744c40dabe731082f1730d7ae55417257f4f49404b1a93701b8", + ] +} diff --git a/deployment/live/aws/test/README.md b/deployment/live/aws/test/README.md index 0ea6ab90..dff11157 100644 --- a/deployment/live/aws/test/README.md +++ b/deployment/live/aws/test/README.md @@ -66,8 +66,10 @@ export AWS_PROFILE=AdministratorAccess- Terraforming the account can be done by: 1. `cd` to [/deployment/live/aws/test/](/deployment/live/aws/test/) to deploy/change. - 2. Run `terragrunt apply`. - + 2. Run `terragrunt apply`. If this fails to create the antispam database, + connect the RDS instance to your VM using the instrunctions bellow, and run + `terragrunt apply` again. + Store the Aurora RDS database and S3 bucket information into the environment variables: ```sh @@ -95,7 +97,7 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --dedup_path=test-static-ct + --antispam_db_name=antispam_db ``` In a different terminal you can either mint and submit certificates manually, or @@ -187,7 +189,7 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --dedup_path=test-static-ct + --antispam_db_name=antispam_db -v=3 ``` diff --git a/deployment/modules/aws/storage/main.tf b/deployment/modules/aws/storage/main.tf index 3e5af201..ddf555ab 100644 --- a/deployment/modules/aws/storage/main.tf +++ b/deployment/modules/aws/storage/main.tf @@ -4,6 +4,10 @@ terraform { source = "hashicorp/aws" version = "5.92.0" } + mysql = { + source = "petoju/mysql" + version = "3.0.71" + } } } @@ -50,3 +54,20 @@ data "aws_secretsmanager_secret_version" "db_credentials" { aws_rds_cluster_instance.cluster_instances ] } + +# Configure the MySQL provider based on the outcome of +# creating the aws_db_instance. +# This requires that the machine running terraform has access +# to the DB instance created above. This is _NOT_ the case when +# github actions are applying the terraform. +provider "mysql" { + endpoint = aws_rds_cluster_instance.cluster_instances[0].endpoint + username = aws_rds_cluster.log_rds_cluster.master_username + password = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["password"] +} + +# Create a second database for antispam. +resource "mysql_database" "antispam_db" { + name = "antispam_db" + count = var.create_antispam_db ? 1 : 0 +} diff --git a/deployment/modules/aws/storage/outputs.tf b/deployment/modules/aws/storage/outputs.tf index 1b05d0de..b9c13ddc 100644 --- a/deployment/modules/aws/storage/outputs.tf +++ b/deployment/modules/aws/storage/outputs.tf @@ -33,3 +33,7 @@ output "rds_aurora_cluster_master_user_secret_unsafe" { value = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["password"] sensitive = true # Mark as sensitive, but it can still be exposed } + +output "antispam_database_name" { + value = mysql_database.antispam_db[0].name +} diff --git a/deployment/modules/aws/storage/variables.tf b/deployment/modules/aws/storage/variables.tf index 530c1312..cb59fccf 100644 --- a/deployment/modules/aws/storage/variables.tf +++ b/deployment/modules/aws/storage/variables.tf @@ -13,6 +13,12 @@ variable "region" { type = string } +variable "create_antispam_db" { + description = "Set to true to create another database to be used by the antispam implementation." + type = bool + default = false +} + variable "ephemeral" { description = "Set to true if this is a throwaway/temporary log instance. Will set attributes on created resources to allow them to be disabled/deleted more easily." type = bool diff --git a/deployment/modules/aws/tesseract/conformance/main.tf b/deployment/modules/aws/tesseract/conformance/main.tf index 2a95de6b..84afed14 100644 --- a/deployment/modules/aws/tesseract/conformance/main.tf +++ b/deployment/modules/aws/tesseract/conformance/main.tf @@ -16,10 +16,11 @@ locals { module "storage" { source = "../../storage" - prefix_name = var.prefix_name - base_name = var.base_name - region = var.region - ephemeral = var.ephemeral + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = var.ephemeral + create_antispam_db = true } module "secretsmanager" { @@ -171,6 +172,7 @@ resource "aws_ecs_task_definition" "conformance" { "--dedup_path=ci-static-ct", "--signer_public_key_secret_name=${module.secretsmanager.ecdsa_p256_public_key_id}", "--signer_private_key_secret_name=${module.secretsmanager.ecdsa_p256_private_key_id}", + "--antispam_db_name=${module.storage.antispam_database_name}", "-v=2" ], "logConfiguration" : { diff --git a/deployment/modules/aws/tesseract/test/main.tf b/deployment/modules/aws/tesseract/test/main.tf index a479898a..4c1748e9 100644 --- a/deployment/modules/aws/tesseract/test/main.tf +++ b/deployment/modules/aws/tesseract/test/main.tf @@ -5,10 +5,11 @@ terraform { module "storage" { source = "../../storage" - prefix_name = var.prefix_name - base_name = var.base_name - region = var.region - ephemeral = var.ephemeral + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = var.ephemeral + create_antispam_db = true } module "secretsmanager" { diff --git a/go.mod b/go.mod index 08a5de5f..8af928a2 100644 --- a/go.mod +++ b/go.mod @@ -67,6 +67,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect + github.com/dgraph-io/badger/v4 v4.7.0 // indirect + github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -76,11 +79,13 @@ require ( github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/flatbuffers v25.2.10+incompatible // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.14.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect diff --git a/go.sum b/go.sum index 790527ed..13d32870 100644 --- a/go.sum +++ b/go.sum @@ -724,8 +724,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/badger/v4 v4.7.0 h1:Q+J8HApYAY7UMpL8d9owqiB+odzEc0zn/aqOD9jhc6Y= +github.com/dgraph-io/badger/v4 v4.7.0/go.mod h1:He7TzG3YBy3j4f5baj5B7Zl2XyfNe5bl4Udl0aPemVA= +github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= +github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -831,6 +837,8 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= +github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -920,6 +928,8 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= diff --git a/internal/ct/ctlog.go b/internal/ct/ctlog.go index c516a22b..0126df16 100644 --- a/internal/ct/ctlog.go +++ b/internal/ct/ctlog.go @@ -9,7 +9,6 @@ import ( "fmt" "github.com/transparency-dev/static-ct/internal/types/rfc6962" - "github.com/transparency-dev/static-ct/modules/dedup" "github.com/transparency-dev/static-ct/storage" tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/ctonly" @@ -39,10 +38,6 @@ type Storage interface { Add(context.Context, *ctonly.Entry) tessera.IndexFuture // AddIssuerChain stores every the chain certificate in a content-addressable store under their sha256 hash. AddIssuerChain(context.Context, []*x509.Certificate) error - // AddCertDedupInfo stores the SCTDedupInfo of certificate in a log under its hash. - AddCertDedupInfo(context.Context, *x509.Certificate, dedup.SCTDedupInfo) error - // GetCertDedupInfo gets the SCTDedupInfo of certificate in a log from its hash. - GetCertDedupInfo(context.Context, *x509.Certificate) (dedup.SCTDedupInfo, bool, error) } // ChainValidator provides functions to validate incoming chains. diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index 06005d0f..6729230b 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -270,17 +270,6 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt return http.StatusBadRequest, fmt.Errorf("failed to build MerkleTreeLeaf: %s", err) } - //klog.V(2).Infof("%s: %s => storage.GetCertIndex", log.origin, method) - //sctDedupInfo, isDup, err := log.storage.GetCertDedupInfo(ctx, chain[0]) - //idx := sctDedupInfo.Idx - //if err != nil { - // return http.StatusInternalServerError, fmt.Errorf("couldn't deduplicate the request: %s", err) - //} - - //if isDup { - // klog.V(3).Infof("%s: %s - found duplicate entry at index %d", log.origin, method, idx) - // entry.Timestamp = sctDedupInfo.Timestamp - //} else { if err := log.storage.AddIssuerChain(ctx, chain[1:]); err != nil { return http.StatusInternalServerError, fmt.Errorf("failed to store issuer chain: %s", err) } @@ -294,23 +283,10 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt } return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) } - // TODO(phbnf): figure out whether to use Tessera's index.IsDup() or a separate "external" antispam impl. - idx := index.Index - - // We store the index for this certificate in the deduplication storage immediately. - // It might be stored again later, if a local deduplication storage is synced, potentially - // with a smaller value. - //klog.V(2).Infof("%s: %s => storage.AddCertIndex", log.origin, method) - //err = log.storage.AddCertDedupInfo(ctx, chain[0], dedup.SCTDedupInfo{Idx: idx, Timestamp: entry.Timestamp}) - //// TODO: block log writes if deduplication breaks - //if err != nil { - // klog.Warningf("AddCertIndex(): failed to store certificate index: %v", err) - //} - // } // Always use the returned leaf as the basis for an SCT. var loggedLeaf rfc6962.MerkleTreeLeaf - leafValue := entry.MerkleTreeLeaf(idx) + leafValue := entry.MerkleTreeLeaf(index.Index) if rest, err := tls.Unmarshal(leafValue, &loggedLeaf); err != nil { return http.StatusInternalServerError, fmt.Errorf("failed to reconstruct MerkleTreeLeaf: %s", err) } else if len(rest) > 0 { @@ -337,7 +313,7 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt klog.V(3).Infof("%s: %s <= SCT", log.origin, method) if sct.Timestamp == timeMillis { lastSCTTimestamp.Record(ctx, otel.Clamp64(sct.Timestamp), metric.WithAttributes(originKey.String(log.origin))) - lastSCTIndex.Record(ctx, otel.Clamp64(idx), metric.WithAttributes(originKey.String(log.origin))) + lastSCTIndex.Record(ctx, otel.Clamp64(index.Index), metric.WithAttributes(originKey.String(log.origin))) } return http.StatusOK, nil diff --git a/internal/ct/handlers_test.go b/internal/ct/handlers_test.go index 4b979ae0..e4fc01b7 100644 --- a/internal/ct/handlers_test.go +++ b/internal/ct/handlers_test.go @@ -41,11 +41,11 @@ import ( "github.com/transparency-dev/static-ct/internal/types/staticct" "github.com/transparency-dev/static-ct/internal/x509util" "github.com/transparency-dev/static-ct/storage" - "github.com/transparency-dev/static-ct/storage/bbolt" tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/api/layout" "github.com/transparency-dev/trillian-tessera/ctonly" posixTessera "github.com/transparency-dev/trillian-tessera/storage/posix" + badger_as "github.com/transparency-dev/trillian-tessera/storage/posix/antispam" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" ) @@ -135,8 +135,8 @@ func setupTestServer(t *testing.T, log *log, path string) *httptest.Server { // newPOSIXStorageFunc returns a function to create a new storage.CTStorage instance with: // - a POSIX Tessera storage driver +// - a Badger Tessera antispam database // - a POSIX issuer storage system -// - a BBolt deduplication database // // It also prepares directories to host the log and the deduplication database. func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { @@ -148,11 +148,19 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("Failed to initialize POSIX Tessera storage driver: %v", err) } + asOpts := badger_as.AntispamOpts{ + MaxBatchSize: 5000, + PushbackThreshold: 1024, + } + antispam, err := badger_as.NewAntispam(ctx, path.Join(root, "dedup.db"), asOpts) + if err != nil { + klog.Exitf("Failed to create new GCP antispam storage: %v", err) + } + opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). - WithCTLayout() - // TODO(phboneff): add other options like MaxBatchSize of 1 when implementing - // additional tests + WithCTLayout(). + WithAntispam(256, antispam) appender, _, _, err := tessera.NewAppender(ctx, driver, opts) if err != nil { @@ -164,12 +172,7 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("failed to initialize InMemory issuer storage: %v", err) } - beDedupStorage, err := bbolt.NewStorage(path.Join(root, "dedup.db")) - if err != nil { - klog.Fatalf("Failed to initialize BBolt deduplication database: %v", err) - } - - s, err := storage.NewCTStorage(appender, issuerStorage, beDedupStorage) + s, err := storage.NewCTStorage(appender, issuerStorage) if err != nil { klog.Fatalf("Failed to initialize CTStorage: %v", err) } diff --git a/modules/README.md b/modules/README.md deleted file mode 100644 index c73e698c..00000000 --- a/modules/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Modules - -This directory contains modules that Tessera Personality can link to get extra functionalities. - -TODO: move out of this directory once we've sorted out repo structure for personalities \ No newline at end of file diff --git a/modules/dedup/dedup.go b/modules/dedup/dedup.go deleted file mode 100644 index 03a73053..00000000 --- a/modules/dedup/dedup.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package dedup limits the number of duplicate entries a personality allows in a Tessera log. -package dedup - -import ( - "bytes" - "context" - "crypto/sha256" - "errors" - "fmt" - "math" - "os" - "strconv" - "time" - - "github.com/transparency-dev/trillian-tessera/api/layout" - "github.com/transparency-dev/trillian-tessera/client" - "golang.org/x/crypto/cryptobyte" - "k8s.io/klog/v2" -) - -// LeafDedupInfo enables building deduplicated add-pre-chain/add-chain responses. -type LeafDedupInfo struct { - LeafID []byte - SCTDedupInfo -} - -// SCTDedupInfo contains data to build idempotent SCTs. -type SCTDedupInfo struct { - Idx uint64 - Timestamp uint64 -} - -type BEDedupStorage interface { - Add(ctx context.Context, lidxs []LeafDedupInfo) error - Get(ctx context.Context, leafID []byte) (SCTDedupInfo, bool, error) -} - -// TODO: re-architecture to prevent creating a LocaLBEDedupStorage without calling UpdateFromLog -type LocalBEDedupStorage interface { - Add(ctx context.Context, lidxs []LeafDedupInfo) error - Get(ctx context.Context, leafID []byte) (SCTDedupInfo, bool, error) - LogSize() (uint64, error) -} - -type ParseBundleFunc func([]byte, uint64) ([]LeafDedupInfo, error) - -// UpdateFromLog synchronises a local best effort deduplication storage with a log. -func UpdateFromLog(ctx context.Context, lds LocalBEDedupStorage, t time.Duration, fcp client.CheckpointFetcherFunc, fb client.EntryBundleFetcherFunc, pb ParseBundleFunc) { - tck := time.NewTicker(t) - defer tck.Stop() - for { - select { - case <-ctx.Done(): - return - case <-tck.C: - if err := sync(ctx, lds, pb, fcp, fb); err != nil { - klog.Warningf("error updating deduplication data: %v", err) - } - } - } -} - -// sync synchronises a deduplication storage with the corresponding log content. -func sync(ctx context.Context, lds LocalBEDedupStorage, pb ParseBundleFunc, fcp client.CheckpointFetcherFunc, fb client.EntryBundleFetcherFunc) error { - cpRaw, err := fcp(ctx) - if err != nil { - return fmt.Errorf("error fetching checkpoint: %v", err) - } - // A https://c2sp.org/static-ct-api logsize is on the second line - l := bytes.SplitN(cpRaw, []byte("\n"), 3) - if len(l) < 2 { - return errors.New("invalid checkpoint - no size") - } - ckptSize, err := strconv.ParseUint(string(l[1]), 10, 64) - if err != nil { - return fmt.Errorf("invalid checkpoint - can't extract size: %v", err) - } - oldSize, err := lds.LogSize() - if err != nil { - return fmt.Errorf("OldSize(): %v", err) - } - - // TODO(phboneff): add parallelism - // Greatly inspired by - // https://github.com/transparency-dev/trillian-tessera/blob/main/client/client.go - if ckptSize > oldSize { - klog.V(2).Infof("LocalBEDEdup.sync(): log at size %d, dedup database at size %d, startig to sync", ckptSize, oldSize) - for i := oldSize / 256; i <= ckptSize/256; i++ { - eRaw, err := fb(ctx, i, layout.PartialTileSize(0, i, ckptSize)) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("leaf bundle at index %d not found: %v", i, err) - } - return fmt.Errorf("failed to fetch leaf bundle at index %d: %v", i, err) - } - ldis, err := pb(eRaw, i) - if err != nil { - return fmt.Errorf("parseBundle(): %v", err) - } - - if err := lds.Add(ctx, ldis); err != nil { - return fmt.Errorf("error storing deduplication data for tile %d: %v", i, err) - } - klog.V(3).Infof("LocalBEDEdup.sync(): stored dedup data for entry bundle %d, %d more bundles to go", i, ckptSize/256-i) - } - } - klog.V(3).Infof("LocalBEDEdup.sync(): dedup data synced to logsize %d", ckptSize) - return nil -} - -// DedupFromBundle converts a bundle into an array of LeafDedupInfo. -// -// The index of a leaf is computed from its position in the log, instead of parsing SCTs. -// Greatly inspired by https://github.com/FiloSottile/sunlight/blob/main/tile.go -// TODO(phboneff): move this somewhere else, and only leave crypto in this file -func DedupFromBundle(bundle []byte, bundleIdx uint64) ([]LeafDedupInfo, error) { - kvs := []LeafDedupInfo{} - s := cryptobyte.String(bundle) - - for i := bundleIdx * 256; len(s) > 0; i++ { - var timestamp uint64 - var entryType uint16 - var extensions, fingerprints cryptobyte.String - if !s.ReadUint64(×tamp) || !s.ReadUint16(&entryType) || timestamp > math.MaxInt64 { - return nil, fmt.Errorf("invalid data tile") - } - crt := []byte{} - switch entryType { - case 0: // x509_entry - if !s.ReadUint24LengthPrefixed((*cryptobyte.String)(&crt)) || - !s.ReadUint16LengthPrefixed(&extensions) || - !s.ReadUint16LengthPrefixed(&fingerprints) { - return nil, fmt.Errorf("invalid data tile x509_entry") - } - case 1: // precert_entry - IssuerKeyHash := [32]byte{} - var defangedCrt, extensions cryptobyte.String - if !s.CopyBytes(IssuerKeyHash[:]) || - !s.ReadUint24LengthPrefixed(&defangedCrt) || - !s.ReadUint16LengthPrefixed(&extensions) || - !s.ReadUint24LengthPrefixed((*cryptobyte.String)(&crt)) || - !s.ReadUint16LengthPrefixed(&fingerprints) { - return nil, fmt.Errorf("invalid data tile precert_entry") - } - default: - return nil, fmt.Errorf("invalid data tile: unknown type %d", entryType) - } - k := sha256.Sum256(crt) - sctDedupInfo := SCTDedupInfo{Idx: uint64(i), Timestamp: timestamp} - kvs = append(kvs, LeafDedupInfo{LeafID: k[:], SCTDedupInfo: sctDedupInfo}) - } - return kvs, nil -} diff --git a/storage/bbolt/dedup.go b/storage/bbolt/dedup.go deleted file mode 100644 index 20d998e2..00000000 --- a/storage/bbolt/dedup.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package bbolt implements modules/dedup using BBolt. -// -// It contains two buckets: -// - The dedup bucket stores pairs. Entries can either be added after -// sequencing, by the server that received the request, or later when synchronising the dedup -// storage with the log state. -// - The size bucket has a single entry: <"size", X>, where X is the largest contiguous index -// from 0 that has been inserted in the dedup bucket. This allows to know at what index -// deduplication synchronisation should start in order to have the full picture of a log. -// -// Calls to Add will update idx to a smaller value, if possible. -package bbolt - -import ( - "context" - "encoding/binary" - "fmt" - - "github.com/transparency-dev/static-ct/modules/dedup" - - bolt "go.etcd.io/bbolt" - "k8s.io/klog/v2" -) - -var ( - dedupBucket = "leafIdx" - sizeBucket = "logSize" -) - -type Storage struct { - db *bolt.DB -} - -// NewStorage returns a new BBolt storage instance with a dedup and size bucket. -// -// The dedup bucket stores pairs, where idx::timestamp is the -// concatenation of two uint64 8 bytes BigEndian representation. -// The size bucket has a single entry: <"size", X>, where X is the largest contiguous index from 0 -// that has been inserted in the dedup bucket. -// -// If a database already exists at the provided path, NewStorage will load it. -func NewStorage(path string) (*Storage, error) { - db, err := bolt.Open(path, 0600, nil) - if err != nil { - return nil, fmt.Errorf("bolt.Open(): %v", err) - } - s := &Storage{db: db} - - err = db.Update(func(tx *bolt.Tx) error { - dedupB := tx.Bucket([]byte(dedupBucket)) - sizeB := tx.Bucket([]byte(sizeBucket)) - if dedupB == nil && sizeB == nil { - klog.V(2).Infof("NewStorage: no pre-existing buckets, will create %q and %q.", dedupBucket, sizeBucket) - _, err := tx.CreateBucket([]byte(dedupBucket)) - if err != nil { - return fmt.Errorf("create %q bucket: %v", dedupBucket, err) - } - sb, err := tx.CreateBucket([]byte(sizeBucket)) - if err != nil { - return fmt.Errorf("create %q bucket: %v", sizeBucket, err) - } - klog.V(2).Infof("NewStorage: initializing %q with size 0.", sizeBucket) - err = sb.Put([]byte("size"), itob(0)) - if err != nil { - return fmt.Errorf("error reading logsize: %v", err) - } - } else if dedupB == nil && sizeB != nil { - return fmt.Errorf("inconsistent deduplication storage state %q is nil but %q it not nil", dedupBucket, sizeBucket) - } else if dedupB != nil && sizeB == nil { - return fmt.Errorf("inconsistent deduplication storage state, %q is not nil but %q is nil", dedupBucket, sizeBucket) - } else { - klog.V(2).Infof("NewStorage: found pre-existing %q and %q buckets.", dedupBucket, sizeBucket) - } - return nil - }) - - if err != nil { - return nil, fmt.Errorf("error initializing buckets: %v", err) - } - - return s, nil -} - -// Add inserts entries in the dedup bucket and updates the size bucket if need be. -// -// If an entry already exists under a key, Add only updates the value if the new idx is smaller. -// The context is here for consistency with interfaces, but isn't used by BBolt. -func (s *Storage) Add(_ context.Context, ldis []dedup.LeafDedupInfo) error { - for _, ldi := range ldis { - err := s.db.Update(func(tx *bolt.Tx) error { - db := tx.Bucket([]byte(dedupBucket)) - sb := tx.Bucket([]byte(sizeBucket)) - - sizeB := sb.Get([]byte("size")) - if sizeB == nil { - return fmt.Errorf("can't find log size in bucket %q", sizeBucket) - } - size := btoi(sizeB) - vB, err := vtob(ldi.Idx, ldi.Timestamp) - if err != nil { - return fmt.Errorf("vtob(): %v", err) - } - - // old should always be 16 bytes long, but double check - if old := db.Get(ldi.LeafID); len(old) == 16 && btoi(old[:8]) <= ldi.Idx { - klog.V(3).Infof("Add(): bucket %q already contains a smaller index %d < %d for entry \"%x\", not updating", dedupBucket, btoi(old[:8]), ldi.Idx, ldi.LeafID) - } else if err := db.Put(ldi.LeafID, vB); err != nil { - return err - } - // size is a length, ldi.Idx an index, so if they're equal, - // ldi is a new entry. - if size == ldi.Idx { - klog.V(3).Infof("Add(): updating deduped size to %d", size+1) - if err := sb.Put([]byte("size"), itob(size+1)); err != nil { - return err - } - } - return nil - }) - if err != nil { - return fmt.Errorf("db.Update(): error writing leaf index %d: err", ldi.Idx) - } - } - return nil -} - -// Get reads entries from the dedup bucket. -// -// If the requested entry is missing from the bucket, returns false ("comma ok" idiom). -// The context is here for consistency with interfaces, but isn't used by BBolt. -func (s *Storage) Get(_ context.Context, leafID []byte) (dedup.SCTDedupInfo, bool, error) { - var v []byte - _ = s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte(dedupBucket)) - if vv := b.Get(leafID); vv != nil { - v = make([]byte, len(vv)) - copy(v, vv) - } - return nil - }) - if v == nil { - return dedup.SCTDedupInfo{}, false, nil - } - idx, t, err := btov(v) - if err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("btov(): %v", err) - } - return dedup.SCTDedupInfo{Idx: idx, Timestamp: t}, true, nil -} - -// LogSize reads the latest entry from the size bucket. -func (s *Storage) LogSize() (uint64, error) { - var size []byte - err := s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte(sizeBucket)) - v := b.Get([]byte("size")) - if v != nil { - size = make([]byte, 8) - copy(size, v) - } - return nil - }) - if err != nil { - return 0, fmt.Errorf("error reading from %q: %v", sizeBucket, err) - } - if size == nil { - return 0, fmt.Errorf("can't find log size in bucket %q", sizeBucket) - } - return btoi(size), nil -} - -// itob returns an 8-byte big endian representation of idx. -func itob(idx uint64) []byte { - return binary.BigEndian.AppendUint64(nil, idx) -} - -// btoi converts a byte array to a uint64 -func btoi(b []byte) uint64 { - return binary.BigEndian.Uint64(b) -} - -// vtob concatenates an index and timestamp values into a byte array. -func vtob(idx uint64, timestamp uint64) ([]byte, error) { - b := make([]byte, 0, 16) - var err error - - b, err = binary.Append(b, binary.BigEndian, idx) - if err != nil { - return nil, fmt.Errorf("binary.Append() could not encode idx: %v", err) - } - b, err = binary.Append(b, binary.BigEndian, timestamp) - if err != nil { - return nil, fmt.Errorf("binary.Append() could not encode timestamp: %v", err) - } - - return b, nil -} - -// btov parses a byte array into an index and timestamp values. -func btov(b []byte) (uint64, uint64, error) { - var idx, timestamp uint64 - if l := len(b); l != 16 { - return 0, 0, fmt.Errorf("input value is %d bytes long, expected %d", l, 16) - } - n, err := binary.Decode(b, binary.BigEndian, &idx) - if err != nil { - return 0, 0, fmt.Errorf("binary.Decode() could not decode idx: %v", err) - } - _, err = binary.Decode(b[n:], binary.BigEndian, ×tamp) - if err != nil { - return 0, 0, fmt.Errorf("binary.Decode() could not decode timestamp: %v", err) - } - return idx, timestamp, nil -} diff --git a/storage/gcp/dedup.go b/storage/gcp/dedup.go deleted file mode 100644 index 36408a73..00000000 --- a/storage/gcp/dedup.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2024 The Tessera authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gcp - -import ( - "context" - "fmt" - - "cloud.google.com/go/spanner" - "cloud.google.com/go/spanner/apiv1/spannerpb" - "github.com/transparency-dev/static-ct/modules/dedup" - "google.golang.org/grpc/codes" -) - -// NewDedupeStorage returns a struct which can be used to store identity -> index mappings backed -// by Spanner. -// -// Note that updates to this dedup storage is logically entriely separate from any updates -// happening to the log storage. -func NewDedupeStorage(ctx context.Context, spannerDB string) (*DedupStorage, error) { - /* - Schema for reference: - - CREATE TABLE IDSeq ( - id INT64 NOT NULL, - h BYTES(MAX) NOT NULL, - idx INT64 NOT NULL, - timestamp INT64 NOT NULL, - ) PRIMARY KEY (id, h); - */ - dedupDB, err := spanner.NewClient(ctx, spannerDB) - if err != nil { - return nil, fmt.Errorf("failed to connect to Spanner: %v", err) - } - - return &DedupStorage{ - dbPool: dedupDB, - }, nil -} - -// DedupStorage is a GCP Spanner based dedup storage implementation for TesseraCT. -type DedupStorage struct { - dbPool *spanner.Client -} - -var _ dedup.BEDedupStorage = &DedupStorage{} - -// Get looks up the stored index, if any, for the given identity. -func (d *DedupStorage) Get(ctx context.Context, i []byte) (dedup.SCTDedupInfo, bool, error) { - var idx, timestamp int64 - if row, err := d.dbPool.Single().ReadRow(ctx, "IDSeq", spanner.Key{0, i}, []string{"idx", "timestamp"}); err != nil { - if c := spanner.ErrCode(err); c == codes.NotFound { - return dedup.SCTDedupInfo{}, false, nil - } - return dedup.SCTDedupInfo{}, false, err - } else { - if err := row.Columns(&idx, ×tamp); err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("failed to read dedup index: %v", err) - } - idx := uint64(idx) - t := uint64(timestamp) - return dedup.SCTDedupInfo{Idx: idx, Timestamp: t}, true, nil - } -} - -// Add stores associations between the passed-in identities and their indices. -func (d *DedupStorage) Add(ctx context.Context, entries []dedup.LeafDedupInfo) error { - m := make([]*spanner.MutationGroup, 0, len(entries)) - for _, e := range entries { - m = append(m, &spanner.MutationGroup{ - Mutations: []*spanner.Mutation{ - spanner.Insert("IDSeq", []string{"id", "h", "idx", "timestamp"}, - []any{0, e.LeafID, int64(e.Idx), int64(e.Timestamp)})}, - }) - } - - i := d.dbPool.BatchWrite(ctx, m) - return i.Do(func(r *spannerpb.BatchWriteResponse) error { - s := r.GetStatus() - if c := codes.Code(s.Code); c != codes.OK && c != codes.AlreadyExists { - return fmt.Errorf("failed to write dedup record: %v (%v)", s.GetMessage(), c) - } - return nil - }) -} diff --git a/storage/storage.go b/storage/storage.go index 42a620f4..0efd8b5e 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -22,7 +22,6 @@ import ( "fmt" "sync" - "github.com/transparency-dev/static-ct/modules/dedup" tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/ctonly" "golang.org/x/mod/sumdb/note" @@ -53,15 +52,13 @@ type IssuerStorage interface { type CTStorage struct { storeData func(context.Context, *ctonly.Entry) tessera.IndexFuture storeIssuers func(context.Context, []KV) error - dedupStorage dedup.BEDedupStorage } // NewCTStorage instantiates a CTStorage object. -func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage, dedupStorage dedup.BEDedupStorage) (*CTStorage, error) { +func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage) (*CTStorage, error) { ctStorage := &CTStorage{ storeData: tessera.NewCertificateTransparencyAppender(logStorage), storeIssuers: cachedStoreIssuers(issuerStorage), - dedupStorage: dedupStorage, } return ctStorage, nil } @@ -128,28 +125,3 @@ func cachedStoreIssuers(s IssuerStorage) func(context.Context, []KV) error { return nil } } - -// AddCertDedupInfo stores in the deduplication storage. -func (cts CTStorage) AddCertDedupInfo(ctx context.Context, c *x509.Certificate, sctDedupInfo dedup.SCTDedupInfo) error { - ctx, span := tracer.Start(ctx, "tesseract.storage.AddCertDedupInfo") - defer span.End() - - key := sha256.Sum256(c.Raw) - if err := cts.dedupStorage.Add(ctx, []dedup.LeafDedupInfo{{LeafID: key[:], SCTDedupInfo: sctDedupInfo}}); err != nil { - return fmt.Errorf("error storing SCTDedupInfo %+v of \"%x\": %v", sctDedupInfo, key, err) - } - return nil -} - -// GetCertDedupInfo fetches the SCTDedupInfo of a given certificate from the deduplication storage. -func (cts CTStorage) GetCertDedupInfo(ctx context.Context, c *x509.Certificate) (dedup.SCTDedupInfo, bool, error) { - ctx, span := tracer.Start(ctx, "tesseract.storageGetCertDedupInfo") - defer span.End() - - key := sha256.Sum256(c.Raw) - sctC, ok, err := cts.dedupStorage.Get(ctx, key[:]) - if err != nil { - return dedup.SCTDedupInfo{}, false, fmt.Errorf("error fetching index of \"%x\": %v", key, err) - } - return sctC, ok, nil -} From 48e9424385b1fff0bea8af54b8213d5568a895ec Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 8 May 2025 07:58:58 +0000 Subject: [PATCH 04/12] use flag for antispam size # Conflicts: # deployment/modules/aws/tesseract/conformance/main.tf --- cmd/aws/main.go | 4 +- cmd/gcp/main.go | 3 +- .../aws/conformance/ci/.terraform.lock.hcl | 66 +++++++++++++++++++ deployment/live/aws/test/README.md | 6 ++ .../modules/aws/tesseract/conformance/main.tf | 1 + deployment/modules/gcp/cloudrun/main.tf | 1 + 6 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 deployment/live/aws/conformance/ci/.terraform.lock.hcl diff --git a/cmd/aws/main.go b/cmd/aws/main.go index a5500c3f..bddff03c 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -61,7 +61,7 @@ var ( dbPassword = flag.String("db_password", "", "AuroraDB password") dbMaxConns = flag.Int("db_max_conns", 0, "Maximum connections to the database, defaults to 0, i.e unlimited") dbMaxIdle = flag.Int("db_max_idle_conns", 2, "Maximum idle database connections in the connection pool, defaults to 2") - dedupPath = flag.String("dedup_path", "", "Path to the deduplication database.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 2<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -160,7 +160,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, appender, _, _, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). WithCheckpointSigner(signer). WithCTLayout(). - WithAntispam(2<<18, antispam)) // TODO(phbnf): do the math to see what fits in memory + WithAntispam(*inMemoryAntispamCacheSize, antispam)) if err != nil { return nil, fmt.Errorf("failed to initialize AWS Tessera storage: %v", err) } diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index e02e506a..81efdf1e 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -55,6 +55,7 @@ var ( bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") spannerDB = flag.String("spanner_db_path", "", "Spanner database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "EXPERIMENTAL: Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 2<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -170,7 +171,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). WithCTLayout(). - WithAntispam(2<<18, antispam) // TODO(phbnf): do the math to see what fits in memory + WithAntispam(*inMemoryAntispamCacheSize, antispam) // TODO(phbnf): figure out the best way to thread the `shutdown` func NewAppends returns back out to main so we can cleanly close Tessera down // when it's time to exit. diff --git a/deployment/live/aws/conformance/ci/.terraform.lock.hcl b/deployment/live/aws/conformance/ci/.terraform.lock.hcl new file mode 100644 index 00000000..a9e266b5 --- /dev/null +++ b/deployment/live/aws/conformance/ci/.terraform.lock.hcl @@ -0,0 +1,66 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.92.0" + constraints = "5.92.0" + hashes = [ + "h1:ZnpTxMfg5PThZc5WZCsZELinsR0gPhdTpNmXjVcf7aE=", + "zh:1d3a0b40831360e8e988aee74a9ff3d69d95cb541c2eae5cb843c64303a091ba", + "zh:3d29cbced6c708be2041a708d25c7c0fc22d09e4d0b174360ed113bfae786137", + "zh:4341a203cf5820a0ca18bb514ae10a6c113bc6a728fb432acbf817d232e8eff4", + "zh:4a49e2d91e4d92b6b93ccbcbdcfa2d67935ce62e33b939656766bb81b3fd9a2c", + "zh:54c7189358b37fd895dedbabf84e509c1980a8c404a1ee5b29b06e40497b8655", + "zh:5d8bb1ff089c37cb65c83b4647f1981fded993e87d8132915d92d79f29e2fcd8", + "zh:618f2eb87cd65b245aefba03991ad714a51ff3b841016ef68e2da2b85d0b2325", + "zh:7bce07bc542d0588ca42bac5098dd4f8af715417cd30166b4fb97cedd44ab109", + "zh:81419eab2d8810beb114b1ff5cbb592d21edc21b809dc12bb066e4b88fdd184a", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9dea39d4748eeeebe2e76ca59bca4ccd161c2687050878c47289a98407a23372", + "zh:d692fc33b67ac89e916c8f9233d39eacab8c438fe10172990ee9d94fba5ca372", + "zh:d9075c7da48947c029ba47d5985e1e8e3bf92367bfee8ca1ff0e747765e779a1", + "zh:e81c62db317f3b640b2e04eba0ada8aa606bcbae0152c09f6242e86b86ef5889", + "zh:f68562e073722c378d2f3529eb80ad463f12c44aa5523d558ae3b69f4de5ca1f", + ] +} + +provider "registry.terraform.io/hashicorp/tls" { + version = "4.0.6" + hashes = [ + "h1:dYSb3V94K5dDMtrBRLPzBpkMTPn+3cXZ/kIJdtFL+2M=", + "zh:10de0d8af02f2e578101688fd334da3849f56ea91b0d9bd5b1f7a243417fdda8", + "zh:37fc01f8b2bc9d5b055dc3e78bfd1beb7c42cfb776a4c81106e19c8911366297", + "zh:4578ca03d1dd0b7f572d96bd03f744be24c726bfd282173d54b100fd221608bb", + "zh:6c475491d1250050765a91a493ef330adc24689e8837a0f07da5a0e1269e11c1", + "zh:81bde94d53cdababa5b376bbc6947668be4c45ab655de7aa2e8e4736dfd52509", + "zh:abdce260840b7b050c4e401d4f75c7a199fafe58a8b213947a258f75ac18b3e8", + "zh:b754cebfc5184873840f16a642a7c9ef78c34dc246a8ae29e056c79939963c7a", + "zh:c928b66086078f9917aef0eec15982f2e337914c5c4dbc31dd4741403db7eb18", + "zh:cded27bee5f24de6f2ee0cfd1df46a7f88e84aaffc2ecbf3ff7094160f193d50", + "zh:d65eb3867e8f69aaf1b8bb53bd637c99c6b649ba3db16ded50fa9a01076d1a27", + "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/petoju/mysql" { + version = "3.0.71" + constraints = "3.0.71" + hashes = [ + "h1:Xwam63rfo59cWPri28xmAbuqrFsxHwAE1P7ogpMUdSU=", + "zh:15200cd22d5a03fafb41cc2ffc654ab58cbcea80920e280ce08b46b3a8378813", + "zh:16e2cc6d38d5be31c59412ced68b451129f3d048d4debb3afaf5603befdfbf50", + "zh:31e9f6cc515378f3bec1fca57892b7e3224442801ac95c18ba83148e793dbc8e", + "zh:3c456ab07b34c8b472b050c3ef333b469164a85bb79c0e5a87ac0dc7aa45eb22", + "zh:538ffa7821989fabd763227fb85aebbd85df57dba9fffc2e060306761b320110", + "zh:69cdda5e80dfb4ca71ef44d719abdee9c9250153ac2269b3a3d17a679f3fb858", + "zh:87eb21da93c9ac51f74c5e8cf53690e51fc0561cfbe14d953c342eb583f31d22", + "zh:8db301a9ae9fc6374fb5cc42bbf81c0f0ea1e7f46f447de56dd752dcecb3c43f", + "zh:a1878a45070ec92659a39e265848fcbdd0f1423dc8eb8cc764c690f73c4430a5", + "zh:a9c9029784f7273d9c153d0a4291b493d68049193068bef4f0374dcaf379e67c", + "zh:ac91bc2538d87b4cf703d2bdae2ceab8567235a9d29c0de8b74bcde0eb828f75", + "zh:b95a21d4857fe84a7e8edc3ff5a828c37a9a99eb1e116045603a0bf00b1dcb89", + "zh:bf38af7e3cde5a717fa67e1e7e936550cb6d3197774e575f13ef974ec92061d2", + "zh:e3d481f0c7a47744c40dabe731082f1730d7ae55417257f4f49404b1a93701b8", + ] +} diff --git a/deployment/live/aws/test/README.md b/deployment/live/aws/test/README.md index dff11157..854dd7de 100644 --- a/deployment/live/aws/test/README.md +++ b/deployment/live/aws/test/README.md @@ -76,6 +76,8 @@ Store the Aurora RDS database and S3 bucket information into the environment var export TESSERACT_DB_HOST=$(terragrunt output -raw rds_aurora_cluster_endpoint) export TESSERACT_DB_PASSWORD=$(aws secretsmanager get-secret-value --secret-id $(terragrunt output -json rds_aurora_cluster_master_user_secret | jq --raw-output .[0].secret_arn) --query SecretString --output text | jq --raw-output .password) export TESSERACT_BUCKET_NAME=$(terragrunt output -raw s3_bucket_name) +export SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID=$(terragrunt output -raw ecdsa_p256_public_key_id) +export SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID=$(terragrunt output -raw ecdsa_p256_private_key_id) ``` Connect the VM and Aurora database following [these instructions](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/tutorial-ec2-rds-option1.html#option1-task3-connect-ec2-instance-to-rds-database), it takes a few clicks in the UI. @@ -98,6 +100,8 @@ go run ./cmd/aws \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ --antispam_db_name=antispam_db + --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ + --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} ``` In a different terminal you can either mint and submit certificates manually, or @@ -190,6 +194,8 @@ go run ./cmd/aws \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ --antispam_db_name=antispam_db + --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ + --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} -v=3 ``` diff --git a/deployment/modules/aws/tesseract/conformance/main.tf b/deployment/modules/aws/tesseract/conformance/main.tf index 84afed14..3bd5057c 100644 --- a/deployment/modules/aws/tesseract/conformance/main.tf +++ b/deployment/modules/aws/tesseract/conformance/main.tf @@ -173,6 +173,7 @@ resource "aws_ecs_task_definition" "conformance" { "--signer_public_key_secret_name=${module.secretsmanager.ecdsa_p256_public_key_id}", "--signer_private_key_secret_name=${module.secretsmanager.ecdsa_p256_private_key_id}", "--antispam_db_name=${module.storage.antispam_database_name}", + "--inmemory_antispam_cache_size=25000000", # About 1GB of memory. "-v=2" ], "logConfiguration" : { diff --git a/deployment/modules/gcp/cloudrun/main.tf b/deployment/modules/gcp/cloudrun/main.tf index c6652be1..33e8db25 100644 --- a/deployment/modules/gcp/cloudrun/main.tf +++ b/deployment/modules/gcp/cloudrun/main.tf @@ -51,6 +51,7 @@ resource "google_cloud_run_v2_service" "default" { "--origin=${var.base_name}${var.origin_suffix}", "--signer_public_key_secret_name=${var.signer_public_key_secret_name}", "--signer_private_key_secret_name=${var.signer_private_key_secret_name}", + "--inmemory_antispam_cache_size=25000000", # About 1GB of memory. ] ports { container_port = 6962 From 2762ee9aa335c455d437e979f4bb39b873e97697 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 8 May 2025 17:52:21 +0000 Subject: [PATCH 05/12] address comments --- cmd/aws/main.go | 8 ++++---- deployment/live/aws/test/README.md | 6 +++--- deployment/modules/aws/storage/main.tf | 2 +- deployment/modules/gcp/storage/main.tf | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/aws/main.go b/cmd/aws/main.go index bddff03c..d33b5504 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -151,9 +151,9 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, var antispam tessera.Antispam if *antispamDBName != "" { - antispam, err = aws_as.NewAntispam(ctx, antispamMysqlConfig().FormatDSN(), aws_as.AntispamOpts{}) + antispam, err = aws_as.NewAntispam(ctx, antispamMySQLConfig().FormatDSN(), aws_as.AntispamOpts{}) if err != nil { - klog.Exitf("Failed to create new GCP antispam storage: %v", err) + klog.Exitf("Failed to create new AWS antispam storage: %v", err) } } @@ -214,7 +214,7 @@ func storageConfigFromFlags() taws.Config { if *dbUser == "" { klog.Exit("--db_user must be set") } - // Empty passord isn't an option with AuroraDB MySQL. + // Empty password isn't an option with AuroraDB MySQL. if *dbPassword == "" { klog.Exit("--db_password must be set") } @@ -237,7 +237,7 @@ func storageConfigFromFlags() taws.Config { } } -func antispamMysqlConfig() *mysql.Config { +func antispamMySQLConfig() *mysql.Config { if *antispamDBName == "" { klog.Exit("--antispam_db_name must be set") } diff --git a/deployment/live/aws/test/README.md b/deployment/live/aws/test/README.md index 854dd7de..8f2e9fba 100644 --- a/deployment/live/aws/test/README.md +++ b/deployment/live/aws/test/README.md @@ -67,7 +67,7 @@ export AWS_PROFILE=AdministratorAccess- Terraforming the account can be done by: 1. `cd` to [/deployment/live/aws/test/](/deployment/live/aws/test/) to deploy/change. 2. Run `terragrunt apply`. If this fails to create the antispam database, - connect the RDS instance to your VM using the instrunctions bellow, and run + connect the RDS instance to your VM using the instructions below, and run `terragrunt apply` again. Store the Aurora RDS database and S3 bucket information into the environment variables: @@ -99,7 +99,7 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --antispam_db_name=antispam_db + --antispam_db_name=antispam_db \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} ``` @@ -193,7 +193,7 @@ go run ./cmd/aws \ --db_port=3306 \ --db_user=tesseract \ --db_password=${TESSERACT_DB_PASSWORD} \ - --antispam_db_name=antispam_db + --antispam_db_name=antispam_db \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} -v=3 diff --git a/deployment/modules/aws/storage/main.tf b/deployment/modules/aws/storage/main.tf index ddf555ab..d585a5f2 100644 --- a/deployment/modules/aws/storage/main.tf +++ b/deployment/modules/aws/storage/main.tf @@ -59,7 +59,7 @@ data "aws_secretsmanager_secret_version" "db_credentials" { # creating the aws_db_instance. # This requires that the machine running terraform has access # to the DB instance created above. This is _NOT_ the case when -# github actions are applying the terraform. +# GitHub actions are applying the terraform. provider "mysql" { endpoint = aws_rds_cluster_instance.cluster_instances[0].endpoint username = aws_rds_cluster.log_rds_cluster.master_username diff --git a/deployment/modules/gcp/storage/main.tf b/deployment/modules/gcp/storage/main.tf index 582df7e0..8b022214 100644 --- a/deployment/modules/gcp/storage/main.tf +++ b/deployment/modules/gcp/storage/main.tf @@ -78,8 +78,8 @@ resource "google_spanner_database" "antispam_db" { instance = google_spanner_instance.log_spanner.name name = "${var.base_name}-antispam-db" ddl = [ - "CREATE TABLE IF NOT EXISTS FollowCoord (id INT64 NOT NULL, nextIdx INT64 NOT NULL) PRIMARY KEY (id)", - "CREATE TABLE IF NOT EXISTS IDSeq (h BYTES(32) NOT NULL, idx INT64 NOT NULL) PRIMARY KEY (h)", + "CREATE TABLE IF NOT EXISTS FollowCoord (id INT64 NOT NULL, nextIdx INT64 NOT NULL) PRIMARY KEY (id)", + "CREATE TABLE IF NOT EXISTS IDSeq (h BYTES(32) NOT NULL, idx INT64 NOT NULL) PRIMARY KEY (h)", ] deletion_protection = !var.ephemeral From 4dd173e12b8707007e6886538fa61ecc18322dd6 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 8 May 2025 18:00:25 +0000 Subject: [PATCH 06/12] remove old dedup terraform and readme --- deployment/live/gcp/static-ct/logs/ci/README.md | 2 +- deployment/live/gcp/test/README.md | 6 +++--- deployment/modules/aws/tesseract/conformance/main.tf | 1 - deployment/modules/gcp/cloudrun/main.tf | 2 -- deployment/modules/gcp/cloudrun/variables.tf | 5 ----- deployment/modules/gcp/storage/main.tf | 10 ---------- deployment/modules/gcp/storage/outputs.tf | 5 ----- deployment/modules/gcp/tesseract/cloudrun/main.tf | 1 - 8 files changed, 4 insertions(+), 28 deletions(-) diff --git a/deployment/live/gcp/static-ct/logs/ci/README.md b/deployment/live/gcp/static-ct/logs/ci/README.md index acf5a4e5..86306162 100644 --- a/deployment/live/gcp/static-ct/logs/ci/README.md +++ b/deployment/live/gcp/static-ct/logs/ci/README.md @@ -8,7 +8,7 @@ define a CI environment to run the SCTFE on Cloud Run, backed by Trillian Tesser At a high level, this environment consists of: - One Spanner instance with two databases: - one for Tessera - - one for deduplication + - one for antispam - A GCS Bucket - Secret Manager - Cloud Run diff --git a/deployment/live/gcp/test/README.md b/deployment/live/gcp/test/README.md index d330cbb0..2dc8e255 100644 --- a/deployment/live/gcp/test/README.md +++ b/deployment/live/gcp/test/README.md @@ -14,7 +14,7 @@ define a test environment to run the SCTFE, backed by Trillian Tessera. At a high level, this environment consists of: - One Spanner instance with two databases: - one for Tessera - - one for deduplication + - one for antispam - A GCS Bucket - Secret Manager @@ -56,7 +56,7 @@ On the VM, run the following command to bring up the SCTFE: go run ./cmd/gcp/ \ --bucket=${GOOGLE_PROJECT}-${TESSERA_BASE_NAME}-bucket \ --spanner_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-db \ - --spanner_dedup_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-dedup-db \ + --spanner_antispam_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-antispam-db \ --roots_pem_file=./internal/testdata/fake-ca.cert \ --origin=${TESSERA_BASE_NAME} \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ @@ -147,7 +147,7 @@ go run ./cmd/gcp/ \ --spanner_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-db \ --roots_pem_file=/tmp/hammercfg/roots.pem \ --origin=${TESSERA_BASE_NAME} \ - --spanner_dedup_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-dedup-db \ + --spanner_antispam_db_path=projects/${GOOGLE_PROJECT}/instances/${TESSERA_BASE_NAME}/databases/${TESSERA_BASE_NAME}-antispam-db \ --signer_public_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PUBLIC_KEY_ID} \ --signer_private_key_secret_name=${SCTFE_SIGNER_ECDSA_P256_PRIVATE_KEY_ID} \ -v=3 diff --git a/deployment/modules/aws/tesseract/conformance/main.tf b/deployment/modules/aws/tesseract/conformance/main.tf index 3bd5057c..57f5cff3 100644 --- a/deployment/modules/aws/tesseract/conformance/main.tf +++ b/deployment/modules/aws/tesseract/conformance/main.tf @@ -169,7 +169,6 @@ resource "aws_ecs_task_definition" "conformance" { "--db_name=tesseract", "--db_host=${module.storage.rds_aurora_cluster_endpoint}", "--db_port=3306", - "--dedup_path=ci-static-ct", "--signer_public_key_secret_name=${module.secretsmanager.ecdsa_p256_public_key_id}", "--signer_private_key_secret_name=${module.secretsmanager.ecdsa_p256_private_key_id}", "--antispam_db_name=${module.storage.antispam_database_name}", diff --git a/deployment/modules/gcp/cloudrun/main.tf b/deployment/modules/gcp/cloudrun/main.tf index 33e8db25..4b16a1a5 100644 --- a/deployment/modules/gcp/cloudrun/main.tf +++ b/deployment/modules/gcp/cloudrun/main.tf @@ -12,7 +12,6 @@ terraform { locals { cloudrun_service_account_id = var.env == "" ? "cloudrun-sa" : "cloudrun-${var.env}-sa" spanner_log_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.log_spanner_db}" - spanner_dedup_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.dedup_spanner_db}" spanner_antispam_db_path = "projects/${var.project_id}/instances/${var.log_spanner_instance}/databases/${var.antispam_spanner_db}" } @@ -45,7 +44,6 @@ resource "google_cloud_run_v2_service" "default" { "--http_endpoint=:6962", "--bucket=${var.bucket}", "--spanner_db_path=${local.spanner_log_db_path}", - "--spanner_dedup_db_path=${local.spanner_dedup_db_path}", "--spanner_antispam_db_path=${local.spanner_antispam_db_path}", "--roots_pem_file=/bin/test_root_ca_cert.pem", "--origin=${var.base_name}${var.origin_suffix}", diff --git a/deployment/modules/gcp/cloudrun/variables.tf b/deployment/modules/gcp/cloudrun/variables.tf index 73cde184..3d8ae310 100644 --- a/deployment/modules/gcp/cloudrun/variables.tf +++ b/deployment/modules/gcp/cloudrun/variables.tf @@ -43,11 +43,6 @@ variable "log_spanner_db" { type = string } -variable "dedup_spanner_db" { - description = "Dedup Spanner database" - type = string -} - variable "antispam_spanner_db" { description = "Antispam Spanner database" type = string diff --git a/deployment/modules/gcp/storage/main.tf b/deployment/modules/gcp/storage/main.tf index 8b022214..5a9fb644 100644 --- a/deployment/modules/gcp/storage/main.tf +++ b/deployment/modules/gcp/storage/main.tf @@ -64,16 +64,6 @@ resource "google_spanner_database" "log_db" { deletion_protection = !var.ephemeral } -resource "google_spanner_database" "dedup_db" { - instance = google_spanner_instance.log_spanner.name - name = "${var.base_name}-dedup-db" - ddl = [ - "CREATE TABLE IDSeq (id INT64 NOT NULL, h BYTES(MAX) NOT NULL, idx INT64 NOT NULL, timestamp INT64 NOT NULL,) PRIMARY KEY (id, h)", - ] - - deletion_protection = !var.ephemeral -} - resource "google_spanner_database" "antispam_db" { instance = google_spanner_instance.log_spanner.name name = "${var.base_name}-antispam-db" diff --git a/deployment/modules/gcp/storage/outputs.tf b/deployment/modules/gcp/storage/outputs.tf index 387009fa..bbbd74cc 100644 --- a/deployment/modules/gcp/storage/outputs.tf +++ b/deployment/modules/gcp/storage/outputs.tf @@ -13,11 +13,6 @@ output "log_spanner_db" { value = google_spanner_database.log_db } -output "dedup_spanner_db" { - description = "Dedup Spanner database" - value = google_spanner_database.dedup_db -} - output "antispam_spanner_db" { description = "Antispam Spanner database" value = google_spanner_database.antispam_db diff --git a/deployment/modules/gcp/tesseract/cloudrun/main.tf b/deployment/modules/gcp/tesseract/cloudrun/main.tf index 747ac50a..786a537f 100644 --- a/deployment/modules/gcp/tesseract/cloudrun/main.tf +++ b/deployment/modules/gcp/tesseract/cloudrun/main.tf @@ -30,7 +30,6 @@ module "cloudrun" { bucket = module.storage.log_bucket.id log_spanner_instance = module.storage.log_spanner_instance.name log_spanner_db = module.storage.log_spanner_db.name - dedup_spanner_db = module.storage.dedup_spanner_db.name antispam_spanner_db = module.storage.antispam_spanner_db.name signer_public_key_secret_name = module.secretmanager.ecdsa_p256_public_key_id signer_private_key_secret_name = module.secretmanager.ecdsa_p256_private_key_id From 6823b7b3f6830fceb9e0031a029f6cfad7b91ec1 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 8 May 2025 18:00:45 +0000 Subject: [PATCH 07/12] remove dedup todo --- internal/ct/handlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index 6729230b..55e84c7a 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -68,7 +68,6 @@ var ( // setupMetrics initializes all the exported metrics. func setupMetrics() { - // TODO(phboneff): add metrics for deduplication and chain storage. knownLogs = mustCreate(meter.Int64Gauge("tesseract.known_logs", metric.WithDescription("Set to 1 for known logs"))) From 19c2c1d5c459623e09886920296c7f2524d92d75 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Fri, 9 May 2025 14:47:12 +0000 Subject: [PATCH 08/12] address comments --- cmd/aws/main.go | 6 +- cmd/gcp/main.go | 9 +- internal/ct/ctlog.go | 3 +- internal/ct/handlers.go | 19 ++-- internal/ct/handlers_test.go | 166 ++++++++++++++++------------ internal/types/staticct/staticct.go | 2 + storage/storage.go | 77 ++++++++++++- 7 files changed, 188 insertions(+), 94 deletions(-) diff --git a/cmd/aws/main.go b/cmd/aws/main.go index d33b5504..0574ef62 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -61,7 +61,7 @@ var ( dbPassword = flag.String("db_password", "", "AuroraDB password") dbMaxConns = flag.Int("db_max_conns", 0, "Maximum connections to the database, defaults to 0, i.e unlimited") dbMaxIdle = flag.Int("db_max_idle_conns", 2, "Maximum idle database connections in the connection pool, defaults to 2") - inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 2<<10, "Maximum number of entries to keep in the in-memory antispam cache.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 256<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -157,7 +157,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, } } - appender, _, _, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). + appender, _, reader, err := tessera.NewAppender(ctx, driver, tessera.NewAppendOptions(). WithCheckpointSigner(signer). WithCTLayout(). WithAntispam(*inMemoryAntispamCacheSize, antispam)) @@ -170,7 +170,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize AWS issuer storage: %v", err) } - return storage.NewCTStorage(appender, issuerStorage) + return storage.NewCTStorage(appender, issuerStorage, reader) } type timestampFlag struct { diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index 81efdf1e..56785250 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -54,8 +54,8 @@ var ( origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.") bucket = flag.String("bucket", "", "Name of the bucket to store the log in.") spannerDB = flag.String("spanner_db_path", "", "Spanner database path: projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") - spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "EXPERIMENTAL: Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") - inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 2<<10, "Maximum number of entries to keep in the in-memory antispam cache.") + spannerAntispamDB = flag.String("spanner_antispam_db_path", "", "Spanner antispam deduplication database path projects/{projectId}/instances/{instanceId}/databases/{databaseId}.") + inMemoryAntispamCacheSize = flag.Uint("inmemory_antispam_cache_size", 256<<10, "Maximum number of entries to keep in the in-memory antispam cache.") rootsPemFile = flag.String("roots_pem_file", "", "Path to the file containing root certificates that are acceptable to the log. The certs are served through get-roots endpoint.") rejectExpired = flag.Bool("reject_expired", false, "If true then the certificate validity period will be checked against the current time during the validation of submissions. This will cause expired certificates to be rejected.") rejectUnexpired = flag.Bool("reject_unexpired", false, "If true then CTFE rejects certificates that are either currently valid or not yet valid.") @@ -160,7 +160,6 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, } var antispam tessera.Antispam - // Persistent antispam is currently experimental, so there's no terraform or documentation yet! if *spannerAntispamDB != "" { antispam, err = gcp_as.NewAntispam(ctx, *spannerAntispamDB, gcp_as.AntispamOpts{}) if err != nil { @@ -175,7 +174,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, // TODO(phbnf): figure out the best way to thread the `shutdown` func NewAppends returns back out to main so we can cleanly close Tessera down // when it's time to exit. - appender, _, _, err := tessera.NewAppender(ctx, driver, opts) + appender, _, reader, err := tessera.NewAppender(ctx, driver, opts) if err != nil { return nil, fmt.Errorf("failed to initialize GCP Tessera appender: %v", err) } @@ -185,7 +184,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP issuer storage: %v", err) } - return storage.NewCTStorage(appender, issuerStorage) + return storage.NewCTStorage(appender, issuerStorage, reader) } type timestampFlag struct { diff --git a/internal/ct/ctlog.go b/internal/ct/ctlog.go index 0126df16..51f85f90 100644 --- a/internal/ct/ctlog.go +++ b/internal/ct/ctlog.go @@ -10,7 +10,6 @@ import ( "github.com/transparency-dev/static-ct/internal/types/rfc6962" "github.com/transparency-dev/static-ct/storage" - tessera "github.com/transparency-dev/trillian-tessera" "github.com/transparency-dev/trillian-tessera/ctonly" "k8s.io/klog/v2" ) @@ -35,7 +34,7 @@ type signSCT func(leaf *rfc6962.MerkleTreeLeaf) (*rfc6962.SignedCertificateTimes // Storage provides functions to store certificates in a static-ct-api log. type Storage interface { // Add assigns an index to the provided Entry, stages the entry for integration, and returns a future for the assigned index. - Add(context.Context, *ctonly.Entry) tessera.IndexFuture + Add(context.Context, *ctonly.Entry) (idx uint64, timestamp uint64, err error) // AddIssuerChain stores every the chain certificate in a content-addressable store under their sha256 hash. AddIssuerChain(context.Context, []*x509.Certificate) error } diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index 55e84c7a..d86270f2 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -118,11 +118,11 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { originAttr := originKey.String(a.log.origin) operationAttr := operationKey.String(a.name) reqCounter.Add(r.Context(), 1, metric.WithAttributes(originAttr, operationAttr)) - startTime := a.opts.TimeSource.Now() + startTime := time.Now() logCtx := a.opts.RequestLog.start(r.Context()) a.opts.RequestLog.origin(logCtx, a.log.origin) defer func() { - latency := a.opts.TimeSource.Now().Sub(startTime).Seconds() + latency := time.Since(startTime).Seconds() reqDuration.Record(r.Context(), latency, metric.WithAttributes(originAttr, operationAttr, codeKey.Int(statusCode))) }() @@ -146,7 +146,7 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // impose a deadline on this onward request. - ctx, cancel := context.WithDeadline(logCtx, deadlineTime(a.opts)) + ctx, cancel := context.WithTimeout(logCtx, a.opts.Deadline) defer cancel() var err error @@ -178,6 +178,7 @@ type HandlerOptions struct { // or returned to the user containing the full error message. MaskInternalErrors bool // TimeSource indicated the system time and can be injfected for testing. + // TODO(phbnf): hide inside the log TimeSource TimeSource } @@ -274,7 +275,7 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt } klog.V(2).Infof("%s: %s => storage.Add", log.origin, method) - index, err := log.storage.Add(ctx, entry)() + index, dedupedTimeMillis, err := log.storage.Add(ctx, entry) if err != nil { if errors.Is(err, tessera.ErrPushback) { w.Header().Add("Retry-After", "1") @@ -282,10 +283,11 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt } return http.StatusInternalServerError, fmt.Errorf("couldn't store the leaf: %v", err) } + entry.Timestamp = dedupedTimeMillis // Always use the returned leaf as the basis for an SCT. var loggedLeaf rfc6962.MerkleTreeLeaf - leafValue := entry.MerkleTreeLeaf(index.Index) + leafValue := entry.MerkleTreeLeaf(index) if rest, err := tls.Unmarshal(leafValue, &loggedLeaf); err != nil { return http.StatusInternalServerError, fmt.Errorf("failed to reconstruct MerkleTreeLeaf: %s", err) } else if len(rest) > 0 { @@ -312,7 +314,7 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt klog.V(3).Infof("%s: %s <= SCT", log.origin, method) if sct.Timestamp == timeMillis { lastSCTTimestamp.Record(ctx, otel.Clamp64(sct.Timestamp), metric.WithAttributes(originKey.String(log.origin))) - lastSCTIndex.Record(ctx, otel.Clamp64(index.Index), metric.WithAttributes(originKey.String(log.origin))) + lastSCTIndex.Record(ctx, otel.Clamp64(index), metric.WithAttributes(originKey.String(log.origin))) } return http.StatusOK, nil @@ -355,11 +357,6 @@ func getRoots(ctx context.Context, opts *HandlerOptions, log *log, w http.Respon return http.StatusOK, nil } -// deadlineTime calculates the future time a request should expire based on our config. -func deadlineTime(opts *HandlerOptions) time.Time { - return opts.TimeSource.Now().Add(opts.Deadline) -} - // marshalAndWriteAddChainResponse is used by add-chain and add-pre-chain to create and write // the JSON response to the client func marshalAndWriteAddChainResponse(sct *rfc6962.SignedCertificateTimestamp, w http.ResponseWriter) error { diff --git a/internal/ct/handlers_test.go b/internal/ct/handlers_test.go index e4fc01b7..1d166099 100644 --- a/internal/ct/handlers_test.go +++ b/internal/ct/handlers_test.go @@ -55,8 +55,10 @@ var ( testRootPath = "../testdata/test_root_ca_cert.pem" // Arbitrary time for use in tests - fakeTime = time.Date(2016, 7, 22, 11, 01, 13, 0, time.UTC) - fakeTimeMillis = uint64(fakeTime.UnixNano() / nanosPerMilli) + fakeTimeStart = time.Date(2016, 7, 22, 11, 01, 13, 0, time.UTC) + // TODO(phbnf): this doesn't need to be gloabal, but it easier until + // TimeSource belongs to hOpts. + timeSource = newFakeTimeSource(fakeTimeStart) // Arbitrary origin for tests origin = "example.com" @@ -64,10 +66,10 @@ var ( // Default handler options for tests hOpts = HandlerOptions{ - Deadline: time.Millisecond * 500, + Deadline: time.Millisecond * 2000, RequestLog: &DefaultRequestLog{}, MaskInternalErrors: false, - TimeSource: newFixedTimeSource(fakeTime), + TimeSource: timeSource, } // POSIX subdirectories @@ -79,16 +81,26 @@ type fixedTimeSource struct { fakeTime time.Time } -// newFixedTimeSource creates a fixedTimeSource instance -func newFixedTimeSource(t time.Time) *fixedTimeSource { +// newFakeTimeSource creates a fixedTimeSource instance +func newFakeTimeSource(t time.Time) *fixedTimeSource { return &fixedTimeSource{fakeTime: t} } +// Reset reinitializes the fake time source +func (f *fixedTimeSource) Reset() { + f.fakeTime = fakeTimeStart +} + // Now returns the time value this instance contains func (f *fixedTimeSource) Now() time.Time { return f.fakeTime } +// Add1m increments time by 1 minute +func (f *fixedTimeSource) Add1m() { + f.fakeTime = f.fakeTime.Add(time.Minute) +} + // setupTestLog creates a test TesseraCT log using a POSIX backend. // // It returns the log and the path to the storage directory. @@ -112,7 +124,7 @@ func setupTestLog(t *testing.T) (*log, string) { rejectUnexpired: false, } - log, err := NewLog(t.Context(), origin, sctSigner.signer, cv, newPOSIXStorageFunc(t, storageDir), newFixedTimeSource(fakeTime)) + log, err := NewLog(t.Context(), origin, sctSigner.signer, cv, newPOSIXStorageFunc(t, storageDir), timeSource) if err != nil { t.Fatalf("newLog(): %v", err) } @@ -160,9 +172,10 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { opts := tessera.NewAppendOptions(). WithCheckpointSigner(signer). WithCTLayout(). - WithAntispam(256, antispam) + WithAntispam(256, antispam). + WithCheckpointInterval(time.Second) - appender, _, _, err := tessera.NewAppender(ctx, driver, opts) + appender, _, reader, err := tessera.NewAppender(ctx, driver, opts) if err != nil { klog.Fatalf("Failed to initialize POSIX Tessera appender: %v", err) } @@ -172,7 +185,7 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("failed to initialize InMemory issuer storage: %v", err) } - s, err := storage.NewCTStorage(appender, issuerStorage) + s, err := storage.NewCTStorage(appender, issuerStorage, reader) if err != nil { klog.Fatalf("Failed to initialize CTStorage: %v", err) } @@ -318,7 +331,7 @@ func TestNewPathHandlers(t *testing.T) { }) } -func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Certificate) (*ctonly.Entry, []*x509.Certificate) { +func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Certificate, timestamp time.Time) (*ctonly.Entry, []*x509.Certificate) { t.Helper() pool := loadCertsIntoPoolOrDie(t, pemChain) leafChain := pool.RawCertificates() @@ -329,7 +342,7 @@ func parseChain(t *testing.T, isPrecert bool, pemChain []string, root *x509.Cert fullChain[len(leafChain)] = root leafChain = fullChain } - entry, err := x509util.EntryFromChain(leafChain, isPrecert, fakeTimeMillis) + entry, err := x509util.EntryFromChain(leafChain, isPrecert, uint64(timestamp.UnixMilli())) if err != nil { t.Fatalf("Failed to create entry") } @@ -447,12 +460,13 @@ func TestAddChainWhitespace(t *testing.T) { func TestAddChain(t *testing.T) { var tests = []struct { - descr string - chain []string - want int - wantIdx uint64 - wantLogSize uint64 - err error + descr string + chain []string + want int + wantIdx uint64 + wantLogSize uint64 + wantTimestamp time.Time + err error }{ { descr: "leaf-only", @@ -465,45 +479,53 @@ func TestAddChain(t *testing.T) { want: http.StatusBadRequest, }, { - descr: "success", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - wantIdx: 0, - wantLogSize: 1, - want: http.StatusOK, + descr: "success", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, { - descr: "success-duplicate", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - wantIdx: 0, - wantLogSize: 1, - want: http.StatusOK, + descr: "success-duplicate", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, { - descr: "success-not-duplicate", - chain: []string{testdata.TestCertPEM, testdata.CACertPEM}, - wantIdx: 1, - wantLogSize: 2, - want: http.StatusOK, + descr: "success-not-duplicate", + chain: []string{testdata.TestCertPEM, testdata.CACertPEM}, + wantIdx: 1, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(5 * time.Minute), + want: http.StatusOK, }, { - descr: "success-without-root", - chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot}, - wantIdx: 0, - wantLogSize: 2, - want: http.StatusOK, + descr: "success-without-root-duplicate", + chain: []string{testdata.CertFromIntermediate, testdata.IntermediateFromRoot}, + wantIdx: 0, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), + want: http.StatusOK, }, } log, dir := setupTestLog(t) server := setupTestServer(t, log, path.Join(prefix, rfc6962.AddChainPath)) defer server.Close() + defer timeSource.Reset() for _, test := range tests { + // Increment time to make it unique for each test case. + timeSource.Add1m() t.Run(test.descr, func(t *testing.T) { pool := loadCertsIntoPoolOrDie(t, test.chain) chain := createJSONChain(t, *pool) resp, err := http.Post(server.URL+rfc6962.AddChainPath, "application/json", chain) + if err != nil { t.Fatalf("http.Post(%s)=(_,%q); want (_,nil)", rfc6962.AddChainPath, err) } @@ -511,7 +533,7 @@ func TestAddChain(t *testing.T) { t.Errorf("http.Post(%s)=(%d,nil); want (%d,nil)", rfc6962.AddChainPath, got, want) } if test.want == http.StatusOK { - unseqEntry, wantIssChain := parseChain(t, false, test.chain, log.chainValidator.Roots()[0]) + unseqEntry, wantIssChain := parseChain(t, false, test.chain, log.chainValidator.Roots()[0], test.wantTimestamp) var gotRsp rfc6962.AddChainResponse if err := json.NewDecoder(resp.Body).Decode(&gotRsp); err != nil { @@ -523,7 +545,7 @@ func TestAddChain(t *testing.T) { if got, want := gotRsp.ID, demoLogID[:]; !bytes.Equal(got, want) { t.Errorf("resp.ID=%v; want %v", got, want) } - if got, want := gotRsp.Timestamp, fakeTimeMillis; got != want { + if got, want := gotRsp.Timestamp, uint64(test.wantTimestamp.UnixMilli()); got != want { t.Errorf("resp.Timestamp=%d; want %d", got, want) } if got, want := hex.EncodeToString(gotRsp.Signature), "040300067369676e6564"; got != want { @@ -586,12 +608,13 @@ func TestAddChain(t *testing.T) { func TestAddPreChain(t *testing.T) { var tests = []struct { - descr string - chain []string - want int - wantIdx uint64 - wantLogSize uint64 - err error + descr string + chain []string + want int + wantIdx uint64 + wantLogSize uint64 + wantTimestamp time.Time + err error }{ { descr: "leaf-signed-by-different", @@ -604,40 +627,47 @@ func TestAddPreChain(t *testing.T) { want: http.StatusBadRequest, }, { - descr: "success", - chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 1, + descr: "success", + chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, { - descr: "success-duplicate", - chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 1, + descr: "success-duplicate", + chain: []string{testdata.PrecertPEMValid, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 1, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, { - descr: "success-with-intermediate", - chain: []string{testdata.PreCertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, - want: http.StatusOK, - wantIdx: 1, - wantLogSize: 2, + descr: "success-with-intermediate", + chain: []string{testdata.PreCertFromIntermediate, testdata.IntermediateFromRoot, testdata.CACertPEM}, + want: http.StatusOK, + wantIdx: 1, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(5 * time.Minute), }, { - descr: "success-without-root", - chain: []string{testdata.PrecertPEMValid}, - want: http.StatusOK, - wantIdx: 0, - wantLogSize: 2, + descr: "success-without-root-duplicate", + chain: []string{testdata.PrecertPEMValid}, + want: http.StatusOK, + wantIdx: 0, + wantLogSize: 2, + wantTimestamp: fakeTimeStart.Add(3 * time.Minute), }, } log, dir := setupTestLog(t) server := setupTestServer(t, log, path.Join(prefix, rfc6962.AddPreChainPath)) defer server.Close() + defer timeSource.Reset() for _, test := range tests { + // Increment time to make it unique for each test case. + timeSource.Add1m() t.Run(test.descr, func(t *testing.T) { pool := loadCertsIntoPoolOrDie(t, test.chain) chain := createJSONChain(t, *pool) @@ -650,7 +680,7 @@ func TestAddPreChain(t *testing.T) { t.Errorf("http.Post(%s)=(%d,nil); want (%d,nil)", rfc6962.AddPreChainPath, got, want) } if test.want == http.StatusOK { - unseqEntry, wantIssChain := parseChain(t, true, test.chain, log.chainValidator.Roots()[0]) + unseqEntry, wantIssChain := parseChain(t, true, test.chain, log.chainValidator.Roots()[0], test.wantTimestamp) var gotRsp rfc6962.AddChainResponse if err := json.NewDecoder(resp.Body).Decode(&gotRsp); err != nil { @@ -662,7 +692,7 @@ func TestAddPreChain(t *testing.T) { if got, want := gotRsp.ID, demoLogID[:]; !bytes.Equal(got, want) { t.Errorf("resp.ID=%v; want %v", got, want) } - if got, want := gotRsp.Timestamp, fakeTimeMillis; got != want { + if got, want := gotRsp.Timestamp, uint64(test.wantTimestamp.UnixMilli()); got != want { t.Errorf("resp.Timestamp=%d; want %d", got, want) } if got, want := hex.EncodeToString(gotRsp.Signature), "040300067369676e6564"; got != want { diff --git a/internal/types/staticct/staticct.go b/internal/types/staticct/staticct.go index 5e78a897..7324065c 100644 --- a/internal/types/staticct/staticct.go +++ b/internal/types/staticct/staticct.go @@ -37,6 +37,8 @@ type EntryBundle struct { // UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles // which are encoded using the Static CT API spec. +// TODO(phbnf): we can probably parse every individual leaf directly, since most callers +// of this method tend to do so. func (t *EntryBundle) UnmarshalText(raw []byte) error { entries := make([][]byte, 0, layout.EntryBundleWidth) s := cryptobyte.String(raw) diff --git a/storage/storage.go b/storage/storage.go index 0efd8b5e..7d915035 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -15,14 +15,21 @@ package storage import ( + "bytes" "context" "crypto/sha256" "crypto/x509" "encoding/hex" + "errors" "fmt" + "os" + "strconv" "sync" + "time" + "github.com/transparency-dev/static-ct/internal/types/staticct" tessera "github.com/transparency-dev/trillian-tessera" + "github.com/transparency-dev/trillian-tessera/api/layout" "github.com/transparency-dev/trillian-tessera/ctonly" "golang.org/x/mod/sumdb/note" "k8s.io/klog/v2" @@ -48,28 +55,88 @@ type IssuerStorage interface { AddIssuersIfNotExist(ctx context.Context, kv []KV) error } -// CTStorage implements ct.Storage. +// CTStorage implements ct.Storage and tessera.LogReader. type CTStorage struct { storeData func(context.Context, *ctonly.Entry) tessera.IndexFuture storeIssuers func(context.Context, []KV) error + reader tessera.LogReader } // NewCTStorage instantiates a CTStorage object. -func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage) (*CTStorage, error) { +func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage, reader tessera.LogReader) (*CTStorage, error) { ctStorage := &CTStorage{ storeData: tessera.NewCertificateTransparencyAppender(logStorage), storeIssuers: cachedStoreIssuers(issuerStorage), + reader: reader, } return ctStorage, nil } +func (cts *CTStorage) ReadCheckpoint(ctx context.Context) ([]byte, error) { + return cts.reader.ReadCheckpoint(ctx) +} + +// TODO(phbnf): cache timestamps (or more) to avoid reparsing the entire leaf bundle +func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (index, timestamp uint64, err error) { + ctx, span := tracer.Start(ctx, "tesseract.storage.dedupFuture") + defer span.End() + + awaiter := tessera.NewPublicationAwaiter(ctx, cts.reader.ReadCheckpoint, 10*time.Millisecond) + idx, cpRaw, err := awaiter.Await(ctx, f) + if err != nil { + return 0, 0, fmt.Errorf("error waiting for Tessera future and its integration: %v", err) + } + + // A https://c2sp.org/static-ct-api logsize is on the second line + l := bytes.SplitN(cpRaw, []byte("\n"), 3) + if len(l) < 2 { + return 0, 0, errors.New("invalid checkpoint - no size") + } + ckptSize, err := strconv.ParseUint(string(l[1]), 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid checkpoint - can't extract size: %v", err) + } + + eBIdx := idx.Index / layout.EntryBundleWidth + eBRaw, err := cts.reader.ReadEntryBundle(ctx, idx.Index/layout.EntryBundleWidth, layout.PartialTileSize(0, idx.Index, ckptSize)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return 0, 0, fmt.Errorf("leaf bundle at index %d not found: %v", eBIdx, err) + } + return 0, 0, fmt.Errorf("failed to fetch entry bundle at index %d: %v", eBIdx, err) + } + eb := staticct.EntryBundle{} + if err := eb.UnmarshalText(eBRaw); err != nil { + return 0, 0, fmt.Errorf("failed to unmarshal entry bundle at index %d: %v", eBIdx, err) + } + + eIdx := idx.Index % layout.EntryBundleWidth + if uint64(len(eb.Entries)) < eIdx { + return 0, 0, fmt.Errorf("failed to read entry %d in entry bundle %d", eIdx, eBIdx) + } + e := staticct.Entry{} + if err := e.UnmarshalText([]byte(eb.Entries[eIdx])); err != nil { + return 0, 0, fmt.Errorf("failed to unmarshal entry %d in entry bundle %d: %v", eIdx, eBIdx, e) + } + + return idx.Index, e.Timestamp, nil +} + // Add stores CT entries. -func (cts *CTStorage) Add(ctx context.Context, entry *ctonly.Entry) tessera.IndexFuture { +func (cts *CTStorage) Add(ctx context.Context, entry *ctonly.Entry) (uint64, uint64, error) { ctx, span := tracer.Start(ctx, "tesseract.storage.Add") defer span.End() - // TODO(phboneff): add deduplication and chain storage - return cts.storeData(ctx, entry) + future := cts.storeData(ctx, entry) + idx, err := future() + if err != nil { + return 0, 0, fmt.Errorf("error waiting for Tessera future: %v", err) + } + if idx.IsDup { + return cts.dedupFuture(ctx, future) + } + return idx.Index, entry.Timestamp, nil + } // AddIssuerChain stores every chain certificate under its sha256. From 80b9ff1f37729a50c06fca0140f90534923d2f6d Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Mon, 12 May 2025 12:34:21 +0000 Subject: [PATCH 09/12] add TODOs --- internal/ct/handlers.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/ct/handlers.go b/internal/ct/handlers.go index d86270f2..95766f8a 100644 --- a/internal/ct/handlers.go +++ b/internal/ct/handlers.go @@ -68,6 +68,9 @@ var ( // setupMetrics initializes all the exported metrics. func setupMetrics() { + // TODO(phboneff): add metrics for chain storage. + // TODO(phboneff): add metrics for deduplication. + // TODO(phboneff): break down metrics by whether or not the response has been deduped. knownLogs = mustCreate(meter.Int64Gauge("tesseract.known_logs", metric.WithDescription("Set to 1 for known logs"))) @@ -146,6 +149,7 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // impose a deadline on this onward request. + // TODO(phbnf): fine tune together with deduplication ctx, cancel := context.WithTimeout(logCtx, a.opts.Deadline) defer cancel() From 2d6da67aca654155d285c74898f59deddef033a8 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Mon, 12 May 2025 13:57:44 +0000 Subject: [PATCH 10/12] address comments --- cmd/aws/main.go | 2 +- cmd/gcp/main.go | 2 +- internal/ct/handlers_test.go | 2 +- internal/types/staticct/staticct.go | 15 +++++++++++++++ storage/storage.go | 17 ++++++++++------- 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/cmd/aws/main.go b/cmd/aws/main.go index 0574ef62..b29e2fb8 100644 --- a/cmd/aws/main.go +++ b/cmd/aws/main.go @@ -170,7 +170,7 @@ func newAWSStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize AWS issuer storage: %v", err) } - return storage.NewCTStorage(appender, issuerStorage, reader) + return storage.NewCTStorage(ctx, appender, issuerStorage, reader) } type timestampFlag struct { diff --git a/cmd/gcp/main.go b/cmd/gcp/main.go index 56785250..f53cb7c2 100644 --- a/cmd/gcp/main.go +++ b/cmd/gcp/main.go @@ -184,7 +184,7 @@ func newGCPStorage(ctx context.Context, signer note.Signer) (*storage.CTStorage, return nil, fmt.Errorf("failed to initialize GCP issuer storage: %v", err) } - return storage.NewCTStorage(appender, issuerStorage, reader) + return storage.NewCTStorage(ctx, appender, issuerStorage, reader) } type timestampFlag struct { diff --git a/internal/ct/handlers_test.go b/internal/ct/handlers_test.go index 1d166099..fb823e0e 100644 --- a/internal/ct/handlers_test.go +++ b/internal/ct/handlers_test.go @@ -185,7 +185,7 @@ func newPOSIXStorageFunc(t *testing.T, root string) storage.CreateStorage { klog.Fatalf("failed to initialize InMemory issuer storage: %v", err) } - s, err := storage.NewCTStorage(appender, issuerStorage, reader) + s, err := storage.NewCTStorage(t.Context(), appender, issuerStorage, reader) if err != nil { klog.Fatalf("Failed to initialize CTStorage: %v", err) } diff --git a/internal/types/staticct/staticct.go b/internal/types/staticct/staticct.go index 7324065c..f55a5e3b 100644 --- a/internal/types/staticct/staticct.go +++ b/internal/types/staticct/staticct.go @@ -166,6 +166,21 @@ type Entry struct { LeafIndex uint64 } +// UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles +// which are encoded using the Static CT API spec. +func UnmarshalTimestamp(raw []byte) (uint64, error) { + s := cryptobyte.String(raw) + var t uint64 + + if !s.ReadUint64(&t) || t > math.MaxInt64 { + return 0, fmt.Errorf("invalid data tile: timestamp can't be extracted") + } + if t > math.MaxInt64 { + return 0, fmt.Errorf("invalid data tile: timestamp %d > math.MaxInt64", t) + } + return t, nil +} + // UnmarshalText implements encoding/TextUnmarshaler and reads EntryBundles // which are encoded using the Static CT API spec. func (t *Entry) UnmarshalText(raw []byte) error { diff --git a/storage/storage.go b/storage/storage.go index 7d915035..a3fba224 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -60,14 +60,17 @@ type CTStorage struct { storeData func(context.Context, *ctonly.Entry) tessera.IndexFuture storeIssuers func(context.Context, []KV) error reader tessera.LogReader + awaiter *tessera.PublicationAwaiter } // NewCTStorage instantiates a CTStorage object. -func NewCTStorage(logStorage *tessera.Appender, issuerStorage IssuerStorage, reader tessera.LogReader) (*CTStorage, error) { +func NewCTStorage(ctx context.Context, logStorage *tessera.Appender, issuerStorage IssuerStorage, reader tessera.LogReader) (*CTStorage, error) { + awaiter := tessera.NewPublicationAwaiter(ctx, reader.ReadCheckpoint, 200*time.Millisecond) ctStorage := &CTStorage{ storeData: tessera.NewCertificateTransparencyAppender(logStorage), storeIssuers: cachedStoreIssuers(issuerStorage), reader: reader, + awaiter: awaiter, } return ctStorage, nil } @@ -81,8 +84,7 @@ func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (i ctx, span := tracer.Start(ctx, "tesseract.storage.dedupFuture") defer span.End() - awaiter := tessera.NewPublicationAwaiter(ctx, cts.reader.ReadCheckpoint, 10*time.Millisecond) - idx, cpRaw, err := awaiter.Await(ctx, f) + idx, cpRaw, err := cts.awaiter.Await(ctx, f) if err != nil { return 0, 0, fmt.Errorf("error waiting for Tessera future and its integration: %v", err) } @@ -112,14 +114,15 @@ func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (i eIdx := idx.Index % layout.EntryBundleWidth if uint64(len(eb.Entries)) < eIdx { - return 0, 0, fmt.Errorf("failed to read entry %d in entry bundle %d", eIdx, eBIdx) + return 0, 0, fmt.Errorf("entry bundle at index %d has only %d entries, but wanted at least %d", eBIdx, eIdx, eBIdx) } e := staticct.Entry{} - if err := e.UnmarshalText([]byte(eb.Entries[eIdx])); err != nil { - return 0, 0, fmt.Errorf("failed to unmarshal entry %d in entry bundle %d: %v", eIdx, eBIdx, e) + t, err := staticct.UnmarshalTimestamp([]byte(eb.Entries[eIdx])) + if err != nil { + return 0, 0, fmt.Errorf("failed to extract timestamp from entry %d in entry bundle %d: %v", eIdx, eBIdx, e) } - return idx.Index, e.Timestamp, nil + return idx.Index, t, nil } // Add stores CT entries. From 5b8df524330187c6654d8a0806f6b646cddd0907 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Mon, 12 May 2025 15:08:29 +0000 Subject: [PATCH 11/12] remove duplicate check --- internal/types/staticct/staticct.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/types/staticct/staticct.go b/internal/types/staticct/staticct.go index f55a5e3b..cb08a1ad 100644 --- a/internal/types/staticct/staticct.go +++ b/internal/types/staticct/staticct.go @@ -172,7 +172,7 @@ func UnmarshalTimestamp(raw []byte) (uint64, error) { s := cryptobyte.String(raw) var t uint64 - if !s.ReadUint64(&t) || t > math.MaxInt64 { + if !s.ReadUint64(&t) { return 0, fmt.Errorf("invalid data tile: timestamp can't be extracted") } if t > math.MaxInt64 { From bfcb7a5c169d239ed2fd1ff5c21797ced82df890 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Mon, 12 May 2025 16:21:24 +0000 Subject: [PATCH 12/12] double bug fix --- storage/storage.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/storage.go b/storage/storage.go index a3fba224..c37bc883 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -100,7 +100,7 @@ func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (i } eBIdx := idx.Index / layout.EntryBundleWidth - eBRaw, err := cts.reader.ReadEntryBundle(ctx, idx.Index/layout.EntryBundleWidth, layout.PartialTileSize(0, idx.Index, ckptSize)) + eBRaw, err := cts.reader.ReadEntryBundle(ctx, eBIdx, layout.PartialTileSize(0, eBIdx, ckptSize)) if err != nil { if errors.Is(err, os.ErrNotExist) { return 0, 0, fmt.Errorf("leaf bundle at index %d not found: %v", eBIdx, err) @@ -113,7 +113,7 @@ func (cts *CTStorage) dedupFuture(ctx context.Context, f tessera.IndexFuture) (i } eIdx := idx.Index % layout.EntryBundleWidth - if uint64(len(eb.Entries)) < eIdx { + if uint64(len(eb.Entries)) <= eIdx { return 0, 0, fmt.Errorf("entry bundle at index %d has only %d entries, but wanted at least %d", eBIdx, eIdx, eBIdx) } e := staticct.Entry{}