From 4d419d788812c7bf08413de9246a38f23b051b9c Mon Sep 17 00:00:00 2001 From: Adam Faulkner Date: Tue, 5 Mar 2024 21:50:33 -0800 Subject: [PATCH] Fix handling of ranges beyond end of object (#5) * Fix handling of ranges beyond end of object * add error case; fix bad bug * add xml error formatting --- services/s3/itest/s3_test.go | 16 +++++++++++--- services/s3/s3.go | 43 +++++++++++++++++++++++++++++++----- services/s3/types.go | 10 +++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/services/s3/itest/s3_test.go b/services/s3/itest/s3_test.go index bbacd62..e0e2cdb 100644 --- a/services/s3/itest/s3_test.go +++ b/services/s3/itest/s3_test.go @@ -198,6 +198,7 @@ type RangeTestCase struct { Name string Range string Body string + Error bool } func TestRangeQuery(t *testing.T) { @@ -216,7 +217,7 @@ func TestRangeQuery(t *testing.T) { id := upload.UploadId var parts []types.CompletedPart - for i, s := range []string{"hello", " world ", "hi"} { + for i, s := range []string{"hello", " world ", "hi", " things are fun"} { output, err := client.UploadPart(ctx, &s3.UploadPartInput{ PartNumber: int32(i), Bucket: &bucket, @@ -246,10 +247,12 @@ func TestRangeQuery(t *testing.T) { } testCases := []RangeTestCase{ - {Name: "entire range", Range: "bytes=0-13", Body: "hello world hi"}, + {Name: "entire range", Range: "bytes=0-28", Body: "hello world hi things are fun"}, {Name: "Skip entire first part and half of second part", Range: "bytes=8-13", Body: "rld hi"}, {Name: "Prefix", Range: "bytes=0-8", Body: "hello wor"}, - {Name: "Suffix", Range: "bytes=-4", Body: "d hi"}, + {Name: "Suffix", Range: "bytes=-4", Body: " fun"}, + {Name: "Ending beyond the end of the object", Range: "bytes=0-100", Body: "hello world hi things are fun"}, + {Name: "Starting beyond the end of the object", Range: "bytes=100-", Body: "", Error: true}, } for _, testCase := range testCases { @@ -260,6 +263,13 @@ func TestRangeQuery(t *testing.T) { Key: &key, Range: &testCase.Range, }) + if testCase.Error { + if err == nil { + t.Fatal("expected error") + } + return + } + if err != nil { t.Fatal(err) } diff --git a/services/s3/s3.go b/services/s3/s3.go index 0bf5355..c653caf 100644 --- a/services/s3/s3.go +++ b/services/s3/s3.go @@ -4,6 +4,7 @@ import ( "crypto/md5" "encoding/base64" "encoding/hex" + "encoding/xml" "fmt" "io" "log/slog" @@ -242,7 +243,14 @@ func parseRangeHeader(rangeHeader string, o *Object) ([]ByteRange, *awserrors.Er return result, nil } -func (s *S3) readerForRange(object *Object, br ByteRange) (io.Reader, *awserrors.Error) { +// Returns a reader, a content length, and an error. +// +// When given a range that stretches beyond the length of an object, S3 will return the piece of +// the object that exists, with an appropriate content length. +// +// When given a range that is entirely not within the object, S3 will return a 416 error. This is +// currently not implemented. +func (s *S3) readerForRange(object *Object, br ByteRange) (io.Reader, int64, *awserrors.Error) { var parts []Part if len(object.Parts) == 0 { parts = []Part{{MD5: object.MD5, Size: object.ContentLength}} @@ -255,6 +263,7 @@ func (s *S3) readerForRange(object *Object, br ByteRange) (io.Reader, *awserrors bytesUntilStart := br.startByte bytesUntilEnd := br.endByte var readers []io.Reader + readLength := int64(0) // Loop over parts in order, updating bytesUntilStart and bytesUntilEnd. If the chunk lies // within the range that must be returned, use a section reader to capture the appropriate range. for _, part := range parts { @@ -274,7 +283,7 @@ func (s *S3) readerForRange(object *Object, br ByteRange) (io.Reader, *awserrors // Otherwise, open a reader for this chunk. f, err := os.Open(s.filepath(part.MD5)) if err != nil { - return nil, awserrors.XXX_TODO(err.Error()) + return nil, 0, awserrors.XXX_TODO(err.Error()) } var bytesToReadFromThisChunk int64 if bytesUntilEnd >= size { @@ -286,10 +295,32 @@ func (s *S3) readerForRange(object *Object, br ByteRange) (io.Reader, *awserrors } readers = append(readers, io.NewSectionReader(f, bytesUntilStart, bytesToReadFromThisChunk)) + bytesUntilEnd -= (bytesUntilStart + bytesToReadFromThisChunk) bytesUntilStart = 0 - bytesUntilEnd -= int64(bytesToReadFromThisChunk) + readLength += bytesToReadFromThisChunk + } + if bytesUntilStart > 0 { + // For whatever reason, the AWS sdk requires that this error message is formatted correctly. + errorObj := InvalidRangeError{ + Code: "InvalidRange", + Message: "The requested range is not satisfiable", + RangeRequested: fmt.Sprintf("bytes=%d-%d", br.startByte, br.endByte-1), + ActualObjectSize: object.ContentLength, + } + serializedError, err := xml.Marshal(errorObj) + if err != nil { + return nil, 0, awserrors.XXX_TODO(err.Error()) + } + + return nil, 0, &awserrors.Error{ + Code: 416, + Body: awserrors.ErrorBody{ + Type: "InvalidRange", + Message: string(serializedError), + }, + } } - return io.MultiReader(readers...), nil + return io.MultiReader(readers...), readLength, nil } func (s *S3) getObject(input GetObjectInput, includeBody bool) (*GetObjectOutput, *awserrors.Error) { @@ -338,8 +369,8 @@ func (s *S3) getObject(input GetObjectInput, includeBody bool) (*GetObjectOutput var readers []io.Reader totalLength := int64(0) for _, rangeItem := range ranges { - totalLength += (rangeItem.endByte - rangeItem.startByte) - readerForRange, err := s.readerForRange(object, rangeItem) + readerForRange, readLength, err := s.readerForRange(object, rangeItem) + totalLength += readLength if err != nil { return nil, err } diff --git a/services/s3/types.go b/services/s3/types.go index 069872b..44546af 100644 --- a/services/s3/types.go +++ b/services/s3/types.go @@ -316,3 +316,13 @@ type ListObjectsV2Output struct { Prefix *string StartAfter *string } + +type InvalidRangeError struct { + XMLName xml.Name `xml:"Error"` + Code string + Message string + RangeRequested string + ActualObjectSize int64 + RequestId string + HostId string +}