Skip to content

Commit 41d63a9

Browse files
authored
Merge pull request #186 from estebanreyl/fix-build-race-condition
[Bugfix] userspace convertor: Fix race condition on error
2 parents e2f41d7 + 4036c75 commit 41d63a9

File tree

1 file changed

+45
-63
lines changed

1 file changed

+45
-63
lines changed

cmd/convertor/builder/builder.go

Lines changed: 45 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@ package builder
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"strings"
22-
"sync"
2323

2424
"github.com/containerd/accelerated-container-image/cmd/convertor/database"
2525
"github.com/containerd/containerd/reference"
2626
"github.com/containerd/containerd/remotes/docker"
2727
v1 "github.com/opencontainers/image-spec/specs-go/v1"
2828
"github.com/pkg/errors"
2929
"github.com/sirupsen/logrus"
30+
"golang.org/x/sync/errgroup"
3031
)
3132

3233
type Builder interface {
@@ -95,112 +96,93 @@ func (b *overlaybdBuilder) Build(ctx context.Context) error {
9596
alreadyConverted := make([]chan *v1.Descriptor, b.layers)
9697
downloaded := make([]chan error, b.layers)
9798
converted := make([]chan error, b.layers)
98-
var uploaded sync.WaitGroup
99-
100-
errCh := make(chan error)
101-
defer close(errCh)
102-
ctx, cancel := context.WithCancel(ctx)
103-
defer cancel()
104-
105-
// collect error and kill all builder goroutines
106-
var retErr error
107-
retErr = nil
108-
go func() {
109-
select {
110-
case <-ctx.Done():
111-
case retErr = <-errCh:
112-
}
113-
if retErr != nil {
114-
cancel()
115-
}
116-
}()
99+
// Errgroups will close the context after wait returns so the operations need their own
100+
// derived context.
101+
g, rctx := errgroup.WithContext(ctx)
117102

118103
for i := 0; i < b.layers; i++ {
119-
downloaded[i] = make(chan error)
120-
converted[i] = make(chan error)
121-
alreadyConverted[i] = make(chan *v1.Descriptor)
104+
idx := i
105+
downloaded[idx] = make(chan error)
106+
converted[idx] = make(chan error)
107+
alreadyConverted[idx] = make(chan *v1.Descriptor)
122108

123109
// deduplication Goroutine
124-
go func(idx int) {
110+
g.Go(func() error {
125111
defer close(alreadyConverted[idx])
126112
// try to find chainID -> converted digest conversion if available
127-
desc, err := b.engine.CheckForConvertedLayer(ctx, idx)
113+
desc, err := b.engine.CheckForConvertedLayer(rctx, idx)
128114
if err != nil {
129115
// in the event of failure fallback to regular process
130-
return
116+
return nil
131117
}
132118
alreadyConverted[idx] <- &desc
133-
}(i)
119+
return nil
120+
})
134121

135122
// download goroutine
136-
go func(idx int) {
123+
g.Go(func() error {
137124
var cachedLayer *v1.Descriptor
138125
select {
139-
case <-ctx.Done():
126+
case <-rctx.Done():
140127
case cachedLayer = <-alreadyConverted[idx]:
141128
}
142129

143130
defer close(downloaded[idx])
144131
if cachedLayer != nil {
145132
// download the converted layer
146-
err := b.engine.DownloadConvertedLayer(ctx, idx, *cachedLayer)
133+
err := b.engine.DownloadConvertedLayer(rctx, idx, *cachedLayer)
147134
if err == nil {
148135
logrus.Infof("downloaded cached layer %d", idx)
149-
sendToChannel(ctx, downloaded[idx], nil)
150-
return
136+
return err
151137
}
152138
logrus.Infof("failed to download cached layer %d falling back to conversion : %s", idx, err)
153139
}
154140

155-
if err := b.engine.DownloadLayer(ctx, idx); err != nil {
156-
sendToChannel(ctx, errCh, errors.Wrapf(err, "failed to download layer %d", idx))
157-
return
141+
if err := b.engine.DownloadLayer(rctx, idx); err != nil {
142+
return err
158143
}
159144
logrus.Infof("downloaded layer %d", idx)
160-
sendToChannel(ctx, downloaded[idx], nil)
161-
}(i)
145+
sendToChannel(rctx, downloaded[idx], nil)
146+
return nil
147+
})
162148

163149
// convert goroutine
164-
go func(idx int) {
150+
g.Go(func() error {
165151
defer close(converted[idx])
166-
if waitForChannel(ctx, downloaded[idx]); ctx.Err() != nil {
167-
return
152+
if waitForChannel(rctx, downloaded[idx]); rctx.Err() != nil {
153+
return rctx.Err()
168154
}
169155
if idx > 0 {
170-
if waitForChannel(ctx, converted[idx-1]); ctx.Err() != nil {
171-
return
156+
if waitForChannel(rctx, converted[idx-1]); rctx.Err() != nil {
157+
return rctx.Err()
172158
}
173159
}
174-
if err := b.engine.BuildLayer(ctx, idx); err != nil {
175-
sendToChannel(ctx, errCh, errors.Wrapf(err, "failed to convert layer %d", idx))
176-
return
160+
if err := b.engine.BuildLayer(rctx, idx); err != nil {
161+
return fmt.Errorf("failed to convert layer %d: %w", idx, err)
177162
}
178163
logrus.Infof("layer %d converted", idx)
179164
// send to upload(idx) and convert(idx+1) once each
180-
sendToChannel(ctx, converted[idx], nil)
165+
sendToChannel(rctx, converted[idx], nil)
181166
if idx+1 < b.layers {
182-
sendToChannel(ctx, converted[idx], nil)
167+
sendToChannel(rctx, converted[idx], nil)
183168
}
184-
}(i)
185-
186-
// upload goroutine
187-
uploaded.Add(1)
188-
go func(idx int) {
189-
defer uploaded.Done()
190-
if waitForChannel(ctx, converted[idx]); ctx.Err() != nil {
191-
return
169+
return nil
170+
})
171+
172+
g.Go(func() error {
173+
if waitForChannel(rctx, converted[idx]); rctx.Err() != nil {
174+
return rctx.Err()
192175
}
193-
if err := b.engine.UploadLayer(ctx, idx); err != nil {
194-
sendToChannel(ctx, errCh, errors.Wrapf(err, "failed to upload layer %d", idx))
195-
return
176+
if err := b.engine.UploadLayer(rctx, idx); err != nil {
177+
return fmt.Errorf("failed to upload layer %d: %w", idx, err)
196178
}
197-
b.engine.StoreConvertedLayerDetails(ctx, idx)
179+
b.engine.StoreConvertedLayerDetails(rctx, idx)
198180
logrus.Infof("layer %d uploaded", idx)
199-
}(i)
181+
return nil
182+
})
200183
}
201-
uploaded.Wait()
202-
if retErr != nil {
203-
return retErr
184+
if err := g.Wait(); err != nil {
185+
return err
204186
}
205187

206188
if err := b.engine.UploadImage(ctx); err != nil {

0 commit comments

Comments
 (0)