Skip to content

Commit 56c734e

Browse files
committed
Garbage collection
1 parent 29298ee commit 56c734e

File tree

4 files changed

+327
-20
lines changed

4 files changed

+327
-20
lines changed

append_lifecycle.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ const (
4747
DefaultCheckpointInterval = 10 * time.Second
4848
// DefaultPushbackMaxOutstanding is used by storage implementations if no WithPushback option is provided when instantiating it.
4949
DefaultPushbackMaxOutstanding = 4096
50+
// DefaultGarbageCollectionInterval is the default value used if no WithGarbageCollectionInterval option is provided.
51+
DefaultGarbageCollectionInterval = time.Minute
5052
)
5153

5254
var (
@@ -480,13 +482,14 @@ func (o *AppendOptions) WithAntispam(inMemEntries uint, as Antispam) *AppendOpti
480482

481483
func NewAppendOptions() *AppendOptions {
482484
return &AppendOptions{
483-
batchMaxSize: DefaultBatchMaxSize,
484-
batchMaxAge: DefaultBatchMaxAge,
485-
entriesPath: layout.EntriesPath,
486-
bundleIDHasher: defaultIDHasher,
487-
checkpointInterval: DefaultCheckpointInterval,
488-
addDecorators: make([]func(AddFn) AddFn, 0),
489-
pushbackMaxOutstanding: DefaultPushbackMaxOutstanding,
485+
batchMaxSize: DefaultBatchMaxSize,
486+
batchMaxAge: DefaultBatchMaxAge,
487+
entriesPath: layout.EntriesPath,
488+
bundleIDHasher: defaultIDHasher,
489+
checkpointInterval: DefaultCheckpointInterval,
490+
addDecorators: make([]func(AddFn) AddFn, 0),
491+
pushbackMaxOutstanding: DefaultPushbackMaxOutstanding,
492+
garbageCollectionInterval: DefaultGarbageCollectionInterval,
490493
}
491494
}
492495

@@ -511,6 +514,9 @@ type AppendOptions struct {
511514

512515
addDecorators []func(AddFn) AddFn
513516
followers []stream.Follower
517+
518+
// garbageCollectionInterval of zero should be interpreted as requesting garbage collection to be disabled.
519+
garbageCollectionInterval time.Duration
514520
}
515521

516522
// valid returns an error if an invalid combination of options has been set, or nil otherwise.
@@ -572,6 +578,10 @@ func (o AppendOptions) CheckpointInterval() time.Duration {
572578
return o.checkpointInterval
573579
}
574580

581+
func (o AppendOptions) GarbageCollectionInterval() time.Duration {
582+
return o.garbageCollectionInterval
583+
}
584+
575585
// WithCheckpointSigner is an option for setting the note signer and verifier to use when creating and parsing checkpoints.
576586
// This option is mandatory for creating logs where the checkpoint is signed locally, e.g. in
577587
// the Appender mode. This does not need to be provided where the storage will be used to mirror
@@ -696,3 +706,12 @@ type WitnessOptions struct {
696706
// and will be disabled and/or removed in the future.
697707
FailOpen bool
698708
}
709+
710+
// WithGarbageCollectionInterval allows the interval between scans to remove obsolete partial
711+
// tiles and entry bundles.
712+
//
713+
// Setting to zero disables garbage collection.
714+
func (o *AppendOptions) WithGarbageCollectionInterval(interval time.Duration) *AppendOptions {
715+
o.garbageCollectionInterval = interval
716+
return o
717+
}

storage/gcp/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ This table is used to coordinate integration of sequenced batches in the `Seq` t
3838
This table is used to coordinate publication of new checkpoints, ensuring that checkpoints are not published
3939
more frequently than configured.
4040

41+
### `GCCoord`
42+
This table is used to coordinate garbage collection of partial tiles and entry bundles which have been
43+
make obsolete by the continued growth of the log.
44+
4145
## Life of a leaf
4246

4347
1. Leaves are submitted by the binary built using Tessera via a call the storage's `Add` func.

storage/gcp/gcp.go

Lines changed: 155 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import (
5555
"github.com/transparency-dev/tessera/api/layout"
5656
"github.com/transparency-dev/tessera/internal/migrate"
5757
"github.com/transparency-dev/tessera/internal/otel"
58+
"github.com/transparency-dev/tessera/internal/parse"
5859
"github.com/transparency-dev/tessera/internal/stream"
5960
storage "github.com/transparency-dev/tessera/storage/internal"
6061
"golang.org/x/sync/errgroup"
@@ -112,6 +113,8 @@ type sequencer interface {
112113
nextIndex(ctx context.Context) (uint64, error)
113114
// publishTree coordinates the publication of new checkpoints based on the current integrated tree.
114115
publishTree(ctx context.Context, minAge time.Duration, f func(ctx context.Context, size uint64, root []byte) error) error
116+
// garbageCollect coordinates the removal of unneeded partial tiles/entry bundles for the provided tree size, up to a maximum number of deletes per invocation.
117+
garbageCollect(ctx context.Context, treeSize uint64, maxDeletes uint, removePrefix func(ctx context.Context, prefix string) error) error
115118
}
116119

117120
// consumeFunc is the signature of a function which can consume entries from the sequencer and integrate
@@ -196,29 +199,41 @@ func (lr *LogReader) StreamEntries(ctx context.Context, startEntry, N uint64) it
196199
return stream.EntryBundles(ctx, numWorkers, lr.integratedSize, lr.lrs.getEntryBundle, startEntry, N)
197200
}
198201

202+
// Appender creates a new tessera.Appender lifecycle object.
199203
func (s *Storage) Appender(ctx context.Context, opts *tessera.AppendOptions) (*tessera.Appender, tessera.LogReader, error) {
200-
201-
if opts.CheckpointInterval() < minCheckpointInterval {
202-
return nil, nil, fmt.Errorf("requested CheckpointInterval (%v) is less than minimum permitted %v", opts.CheckpointInterval(), minCheckpointInterval)
203-
}
204-
205204
c, err := gcs.NewClient(ctx, gcs.WithJSONReads())
206205
if err != nil {
207206
return nil, nil, fmt.Errorf("failed to create GCS client: %v", err)
208207
}
208+
gs := &gcsStorage{
209+
gcsClient: c,
210+
bucket: s.cfg.Bucket,
211+
bucketPrefix: s.cfg.BucketPrefix,
212+
}
209213

210214
seq, err := newSpannerCoordinator(ctx, s.cfg.Spanner, uint64(opts.PushbackMaxOutstanding()))
211215
if err != nil {
212216
return nil, nil, fmt.Errorf("failed to create Spanner coordinator: %v", err)
213217
}
214218

219+
a, lr, err := s.newAppender(ctx, gs, seq, opts)
220+
if err != nil {
221+
return nil, nil, err
222+
}
223+
return &tessera.Appender{
224+
Add: a.Add,
225+
}, lr, nil
226+
}
227+
228+
// newAppender creates and initialises a tessera.Appender struct with the provided underlying storage implementations.
229+
func (s *Storage) newAppender(ctx context.Context, o objStore, seq *spannerCoordinator, opts *tessera.AppendOptions) (*Appender, tessera.LogReader, error) {
230+
if opts.CheckpointInterval() < minCheckpointInterval {
231+
return nil, nil, fmt.Errorf("requested CheckpointInterval (%v) is less than minimum permitted %v", opts.CheckpointInterval(), minCheckpointInterval)
232+
}
233+
215234
a := &Appender{
216235
logStore: &logResourceStore{
217-
objStore: &gcsStorage{
218-
gcsClient: c,
219-
bucket: s.cfg.Bucket,
220-
bucketPrefix: s.cfg.BucketPrefix,
221-
},
236+
objStore: o,
222237
entriesPath: opts.EntriesPath(),
223238
},
224239
sequencer: seq,
@@ -244,10 +259,11 @@ func (s *Storage) Appender(ctx context.Context, opts *tessera.AppendOptions) (*t
244259

245260
go a.sequencerJob(ctx)
246261
go a.publisherJob(ctx, opts.CheckpointInterval())
262+
if i := opts.GarbageCollectionInterval(); i > 0 {
263+
go a.garbageCollectorJob(ctx, i)
264+
}
247265

248-
return &tessera.Appender{
249-
Add: a.Add,
250-
}, reader, nil
266+
return a, reader, nil
251267
}
252268

253269
// Appender is an implementation of the Tessera appender lifecycle contract.
@@ -324,6 +340,46 @@ func (a *Appender) publisherJob(ctx context.Context, i time.Duration) {
324340
}
325341
}
326342

343+
// garbageCollectorJob is a long-running function which handles the removal of obsolete partial tiles
344+
// and entry bundles.
345+
// Blocks until ctx is done.
346+
func (a *Appender) garbageCollectorJob(ctx context.Context, i time.Duration) {
347+
t := time.NewTicker(i)
348+
defer t.Stop()
349+
350+
// Entirely arbitrary number.
351+
maxDeletesPerRun := uint(1024)
352+
353+
for {
354+
select {
355+
case <-ctx.Done():
356+
return
357+
case <-t.C:
358+
}
359+
func() {
360+
ctx, span := tracer.Start(ctx, "tessera.storage.gcp.garbageCollectTask")
361+
defer span.End()
362+
363+
// Figure out the size of the latest published checkpoint - we can't be removing partial tiles implied by
364+
// that checkpoint just because we've done an integration and know about a larger (but as yet unpublished)
365+
// checkpoint!
366+
cp, err := a.logStore.getCheckpoint(ctx)
367+
if err != nil {
368+
klog.Warningf("Failed to get published checkpoint: %v", err)
369+
}
370+
_, pubSize, _, err := parse.CheckpointUnsafe(cp)
371+
if err != nil {
372+
klog.Warningf("Failed to parse published checkpoint: %v", err)
373+
}
374+
375+
if err := a.sequencer.garbageCollect(ctx, pubSize, maxDeletesPerRun, a.logStore.objStore.deleteObjectsWithPrefix); err != nil {
376+
klog.Warningf("GarbageCollect failed: %v", err)
377+
}
378+
}()
379+
}
380+
381+
}
382+
327383
// init ensures that the storage represents a log in a valid state.
328384
func (a *Appender) init(ctx context.Context) error {
329385
if _, err := a.logStore.getCheckpoint(ctx); err != nil {
@@ -372,6 +428,7 @@ func (a *Appender) publishCheckpoint(ctx context.Context, size uint64, root []by
372428
type objStore interface {
373429
getObject(ctx context.Context, obj string) ([]byte, int64, error)
374430
setObject(ctx context.Context, obj string, data []byte, cond *gcs.Conditions, contType string, cacheCtl string) error
431+
deleteObjectsWithPrefix(ctx context.Context, prefix string) error
375432
}
376433

377434
// logResourceStore knows how to read and write entries which represent a tiles log inside an objStore.
@@ -665,12 +722,14 @@ func (s *spannerCoordinator) initDB(ctx context.Context, spannerDB string) error
665722
"CREATE TABLE IF NOT EXISTS Seq (id INT64 NOT NULL, seq INT64 NOT NULL, v BYTES(MAX),) PRIMARY KEY (id, seq)",
666723
"CREATE TABLE IF NOT EXISTS IntCoord (id INT64 NOT NULL, seq INT64 NOT NULL, rootHash BYTES(32)) PRIMARY KEY (id)",
667724
"CREATE TABLE IF NOT EXISTS PubCoord (id INT64 NOT NULL, publishedAt TIMESTAMP NOT NULL) PRIMARY KEY (id)",
725+
"CREATE TABLE IF NOT EXISTS GCCoord (id INT64 NOT NULL, fromSize INT64 NOT NULL) PRIMARY KEY (id)",
668726
},
669727
[][]*spanner.Mutation{
670728
{spanner.Insert("Tessera", []string{"id", "compatibilityVersion"}, []any{0, SchemaCompatibilityVersion})},
671729
{spanner.Insert("SeqCoord", []string{"id", "next"}, []any{0, 0})},
672730
{spanner.Insert("IntCoord", []string{"id", "seq", "rootHash"}, []any{0, 0, rfc6962.DefaultHasher.EmptyRoot()})},
673731
{spanner.Insert("PubCoord", []string{"id", "publishedAt"}, []any{0, time.Unix(0, 0)})},
732+
{spanner.Insert("GCCoord", []string{"id", "fromSize"}, []any{0, 0})},
674733
},
675734
)
676735
}
@@ -949,6 +1008,58 @@ func (s *spannerCoordinator) publishTree(ctx context.Context, minAge time.Durati
9491008
return nil
9501009
}
9511010

1011+
// garbageCollect is a long running function which will identify unneeded partial tiles/entry bundles, and call the provided function to remove them.
1012+
//
1013+
// Uses the `GCCoord` table to ensure that only one binary is actively garbage collecting at any given time, and to track progress so that we don't
1014+
// needlessly attempt to GC over regions which have already been cleaned.
1015+
//
1016+
// Returns true if we've "caught up" with the current state of the tree.
1017+
func (s *spannerCoordinator) garbageCollect(ctx context.Context, treeSize uint64, maxDeletes uint, deleteWithPrefix func(ctx context.Context, prefix string) error) error {
1018+
_, err := s.dbPool.ReadWriteTransaction(ctx, func(ctx context.Context, txn *spanner.ReadWriteTransaction) error {
1019+
row, err := txn.ReadRowWithOptions(ctx, "GCCoord", spanner.Key{0}, []string{"fromSize"}, &spanner.ReadOptions{LockHint: spannerpb.ReadRequest_LOCK_HINT_EXCLUSIVE})
1020+
if err != nil {
1021+
return fmt.Errorf("failed to read GCCoord: %w", err)
1022+
}
1023+
var fs int64
1024+
if err := row.Columns(&fs); err != nil {
1025+
return fmt.Errorf("failed to parse row contents: %v", err)
1026+
}
1027+
fromSize := uint64(fs)
1028+
1029+
if fromSize == treeSize {
1030+
return nil
1031+
}
1032+
1033+
d := uint(0)
1034+
eg := errgroup.Group{}
1035+
done:
1036+
for l, f, x := uint64(0), fromSize, treeSize; x > 0; l, f, x = l+1, f>>layout.TileHeight, x>>layout.TileHeight {
1037+
for ri := range layout.Range(f, x-f, x) {
1038+
if ri.Partial != 0 || d > maxDeletes {
1039+
break done
1040+
}
1041+
if l == 0 {
1042+
eg.Go(func() error { return deleteWithPrefix(ctx, layout.EntriesPath(ri.Index, 0)+".p/") })
1043+
d++
1044+
fromSize += uint64(ri.N)
1045+
}
1046+
eg.Go(func() error { return deleteWithPrefix(ctx, layout.TilePath(l, ri.Index, 0)+".p/") })
1047+
d++
1048+
}
1049+
}
1050+
if err := eg.Wait(); err != nil {
1051+
return fmt.Errorf("failed to delete one or more objects: %v", err)
1052+
}
1053+
1054+
if err := txn.BufferWrite([]*spanner.Mutation{spanner.Update("GCCoord", []string{"id", "fromSize"}, []any{0, int64(fromSize)})}); err != nil {
1055+
return err
1056+
}
1057+
1058+
return nil
1059+
})
1060+
return err
1061+
}
1062+
9521063
// gcsStorage knows how to store and retrieve objects from GCS.
9531064
type gcsStorage struct {
9541065
bucket string
@@ -1038,6 +1149,37 @@ func (s *gcsStorage) setObject(ctx context.Context, objName string, data []byte,
10381149
return nil
10391150
}
10401151

1152+
// deleteObjectsWithPrefix removes any objects with the provided prefix from GCS.
1153+
func (s *gcsStorage) deleteObjectsWithPrefix(ctx context.Context, objPrefix string) error {
1154+
ctx, span := tracer.Start(ctx, "tessera.storage.gcp.deleteObject")
1155+
defer span.End()
1156+
1157+
if s.bucketPrefix != "" {
1158+
objPrefix = filepath.Join(s.bucketPrefix, objPrefix)
1159+
}
1160+
span.SetAttributes(objectPathKey.String(objPrefix))
1161+
1162+
bkt := s.gcsClient.Bucket(s.bucket)
1163+
1164+
errs := []error(nil)
1165+
it := bkt.Objects(ctx, &gcs.Query{Prefix: objPrefix})
1166+
for {
1167+
attr, err := it.Next()
1168+
if err != nil {
1169+
if err == iterator.Done {
1170+
break
1171+
}
1172+
return err
1173+
}
1174+
klog.V(2).Infof("Deleting object %s", attr.Name)
1175+
if err := bkt.Object(attr.Name).Delete(ctx); err != nil {
1176+
errs = append(errs, err)
1177+
}
1178+
}
1179+
1180+
return errors.Join(errs...)
1181+
}
1182+
10411183
// MigrationWriter creates a new GCP storage for the MigrationTarget lifecycle mode.
10421184
func (s *Storage) MigrationWriter(ctx context.Context, opts *tessera.MigrationOptions) (migrate.MigrationWriter, tessera.LogReader, error) {
10431185
c, err := gcs.NewClient(ctx, gcs.WithJSONReads())

0 commit comments

Comments
 (0)