Skip to content

Commit 9e2eb7f

Browse files
committed
Fix few bugs
1 parent 3ad83b9 commit 9e2eb7f

File tree

7 files changed

+52
-24
lines changed

7 files changed

+52
-24
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ With 128 workers we get avg sync speed around 2k obj/sec (small objects 1-20 kb)
2222
* Each object is loaded into RAM. So you need `<avg object size> * <workers count>` RAM.
2323
If you don't have enough RAM, you can use swap. A large (32-64 Gb) swap on SSD does not affect the tool performance.
2424
This happened because the tool was designed to synchronize billions of small files and optimized for this workload.
25+
To avoid this you can use streaming storage drivers (now available only for S3 and FS). It's uses less RAM, but slower on small objects.
2526

2627
## Usage
2728
```

cli/cli.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ type args struct {
9797
DisableHTTP2 bool `arg:"--disable-http2" help:"Disable HTTP2 for http client"`
9898
ListBuffer uint `arg:"--list-buffer" help:"Size of list buffer" default:"1000"`
9999
SkipSSLVerify bool `arg:"--skip-ssl-verify" help:"Disable SSL verification for S3"`
100+
ServerGzip bool `arg:"--server-gzip" help:"Workaround for S3 servers with enabled gzip compression for all files."`
100101
Profiler bool `arg:"--profiler" help:"Enable profiler on :8080"`
101102
// Rate Limit
102103
RateLimitObjPerSec uint `arg:"--ratelimit-objects" help:"Rate limit objects per second"`

cli/setup.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func setupStorages(ctx context.Context, syncGroup *pipeline.Group, cli *argsPars
2121
switch cli.Source.Type {
2222
case storage.TypeS3:
2323
sourceStorage = s3.NewS3Storage(cli.SourceNoSign, cli.SourceKey, cli.SourceSecret, cli.SourceToken, cli.SourceRegion, cli.SourceEndpoint,
24-
cli.Source.Bucket, cli.Source.Path, cli.S3KeysPerReq, cli.S3Retry, cli.S3RetryInterval, cli.SkipSSLVerify,
24+
cli.Source.Bucket, cli.Source.Path, cli.S3KeysPerReq, cli.S3Retry, cli.S3RetryInterval, cli.SkipSSLVerify, cli.ServerGzip,
2525
)
2626
case storage.TypeS3Stream:
2727
sourceStorage = s3stream.NewS3StreamStorage(cli.SourceNoSign, cli.SourceKey, cli.SourceSecret, cli.SourceToken, cli.SourceRegion, cli.SourceEndpoint,
@@ -39,7 +39,7 @@ func setupStorages(ctx context.Context, syncGroup *pipeline.Group, cli *argsPars
3939
switch cli.Target.Type {
4040
case storage.TypeS3:
4141
targetStorage = s3.NewS3Storage(cli.TargetNoSign, cli.TargetKey, cli.TargetSecret, cli.TargetToken, cli.TargetRegion, cli.TargetEndpoint,
42-
cli.Target.Bucket, cli.Target.Path, cli.S3KeysPerReq, cli.S3Retry, cli.S3RetryInterval, cli.SkipSSLVerify,
42+
cli.Target.Bucket, cli.Target.Path, cli.S3KeysPerReq, cli.S3Retry, cli.S3RetryInterval, cli.SkipSSLVerify, cli.ServerGzip,
4343
)
4444
case storage.TypeS3Stream:
4545
targetStorage = s3stream.NewS3StreamStorage(cli.TargetNoSign, cli.TargetKey, cli.TargetSecret, cli.TargetToken, cli.TargetRegion, cli.TargetEndpoint,

pipeline/collection/misc.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ package collection
22

33
import (
44
"github.com/larrabee/ratelimit"
5+
"github.com/sirupsen/logrus"
6+
57
"github.com/larrabee/s3sync/pipeline"
68
"github.com/larrabee/s3sync/storage"
7-
"github.com/sirupsen/logrus"
89
)
910

1011
// Terminator like a /dev/null
@@ -27,9 +28,10 @@ var Logger pipeline.StepFn = func(group *pipeline.Group, stepNum int, input <-ch
2728
for obj := range input {
2829
if ok {
2930
cfg.WithFields(logrus.Fields{
30-
"key": *obj.Key,
31-
"size": *obj.ContentLength,
32-
"Content-Type": *obj.ContentType,
31+
"key": storage.ToValue(obj.Key),
32+
"size": storage.ToValue(obj.ContentLength),
33+
"Content-Type": storage.ToValue(obj.ContentType),
34+
"Content-Encoding": storage.ToValue(obj.ContentEncoding),
3335
}).Infof("Sync file")
3436
output <- obj
3537
}

storage/s3/opts.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package s3
2+
3+
import (
4+
"github.com/aws/aws-sdk-go/aws/request"
5+
)
6+
7+
func withAcceptEncoding(e string) request.Option {
8+
return func(r *request.Request) {
9+
r.HTTPRequest.Header.Add("Accept-Encoding", e)
10+
}
11+
}

storage/s3/s3.go

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ import (
55
"context"
66
"crypto/tls"
77
"errors"
8-
"github.com/aws/aws-sdk-go/aws/request"
98
"io"
109
"net/http"
1110
"net/url"
1211
"strings"
1312
"time"
1413

14+
"github.com/aws/aws-sdk-go/aws/request"
15+
1516
"github.com/aws/aws-sdk-go/aws"
1617
"github.com/aws/aws-sdk-go/aws/credentials"
1718
"github.com/aws/aws-sdk-go/aws/defaults"
@@ -34,12 +35,13 @@ type S3Storage struct {
3435
ctx context.Context
3536
listMarker *string
3637
rlBucket ratelimit.Bucket
38+
serverGzip bool
3739
}
3840

3941
// NewS3Storage return new configured S3 storage.
4042
//
4143
// You should always create new storage with this constructor.
42-
func NewS3Storage(awsNoSign bool, awsAccessKey, awsSecretKey, awsToken, awsRegion, endpoint, bucketName, prefix string, keysPerReq int64, retryCnt uint, retryDelay time.Duration, skipSSLVerify bool) *S3Storage {
44+
func NewS3Storage(awsNoSign bool, awsAccessKey, awsSecretKey, awsToken, awsRegion, endpoint, bucketName, prefix string, keysPerReq int64, retryCnt uint, retryDelay time.Duration, skipSSLVerify bool, serverGzip bool) *S3Storage {
4345
sess := session.Must(session.NewSessionWithOptions(session.Options{
4446
SharedConfigState: session.SharedConfigEnable,
4547
}))
@@ -83,6 +85,7 @@ func NewS3Storage(awsNoSign bool, awsAccessKey, awsSecretKey, awsToken, awsRegio
8385
retryInterval: retryDelay,
8486
ctx: context.TODO(),
8587
rlBucket: ratelimit.NewFakeBucket(),
88+
serverGzip: serverGzip,
8689
}
8790

8891
return &st
@@ -105,7 +108,7 @@ func (st *S3Storage) WithRateLimit(limit int) error {
105108

106109
// List S3 bucket and send founded objects to chan.
107110
func (st *S3Storage) List(output chan<- *storage.Object) error {
108-
listObjectsFn := func(p *s3.ListObjectsOutput, lastPage bool) bool {
111+
listObjectsFn := func(p *s3.ListObjectsV2Output, lastPage bool) bool {
109112
for _, o := range p.Contents {
110113
key, _ := url.QueryUnescape(aws.StringValue(o.Key))
111114
key = strings.Replace(key, st.prefix, "", 1)
@@ -117,19 +120,19 @@ func (st *S3Storage) List(output chan<- *storage.Object) error {
117120
IsLatest: aws.Bool(true),
118121
}
119122
}
120-
st.listMarker = p.Marker
123+
st.listMarker = p.NextContinuationToken
121124
return !lastPage // continue paging
122125
}
123126

124-
input := &s3.ListObjectsInput{
125-
Bucket: st.awsBucket,
126-
Prefix: aws.String(st.prefix),
127-
MaxKeys: aws.Int64(st.keysPerReq),
128-
EncodingType: aws.String(s3.EncodingTypeUrl),
129-
Marker: st.listMarker,
127+
input := &s3.ListObjectsV2Input{
128+
Bucket: st.awsBucket,
129+
Prefix: aws.String(st.prefix),
130+
MaxKeys: aws.Int64(st.keysPerReq),
131+
EncodingType: aws.String(s3.EncodingTypeUrl),
132+
ContinuationToken: st.listMarker,
130133
}
131134

132-
if err := st.awsSvc.ListObjectsPagesWithContext(st.ctx, input, listObjectsFn); err != nil {
135+
if err := st.awsSvc.ListObjectsV2PagesWithContext(st.ctx, input, listObjectsFn); err != nil {
133136
return err
134137
}
135138
storage.Log.Debugf("Listing bucket finished")
@@ -191,12 +194,6 @@ func (st *S3Storage) PutObject(obj *storage.Object) error {
191194
return nil
192195
}
193196

194-
func withAcceptEncoding(e string) request.Option {
195-
return func(r *request.Request) {
196-
r.HTTPRequest.Header.Add("Accept-Encoding", e)
197-
}
198-
}
199-
200197
// GetObjectContent read object content and metadata from S3.
201198
func (st *S3Storage) GetObjectContent(obj *storage.Object) error {
202199
input := &s3.GetObjectInput{
@@ -205,7 +202,12 @@ func (st *S3Storage) GetObjectContent(obj *storage.Object) error {
205202
VersionId: obj.VersionId,
206203
}
207204

208-
result, err := st.awsSvc.GetObjectWithContext(st.ctx, input, withAcceptEncoding("gzip"))
205+
opts := make([]request.Option, 0, 1)
206+
if !st.serverGzip {
207+
opts = append(opts, withAcceptEncoding("gzip"))
208+
}
209+
210+
result, err := st.awsSvc.GetObjectWithContext(st.ctx, input, opts...)
209211
if err != nil {
210212
return err
211213
}

storage/utils.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,14 @@ func GetInsecureRandString(n int) string {
5858

5959
return sb.String()
6060
}
61+
62+
func ToPtr[K any](val K) *K {
63+
return &val
64+
}
65+
66+
func ToValue[K any](val *K) K {
67+
if val == nil {
68+
return *new(K)
69+
}
70+
return *val
71+
}

0 commit comments

Comments
 (0)