Skip to content

Commit

Permalink
Multipart copy method added
Browse files Browse the repository at this point in the history
  • Loading branch information
nmuntyanov committed Oct 17, 2023
1 parent 21806bb commit 261722d
Show file tree
Hide file tree
Showing 7 changed files with 358 additions and 1 deletion.
3 changes: 3 additions & 0 deletions docs/integration/simple-s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ $resource = \fopen('/path/to/cat/image.jpg', 'r');
$s3->upload('my-image-bucket', 'photos/cat_2.jpg', $resource);
$s3->upload('my-image-bucket', 'photos/cat_2.txt', 'I like this cat');

// Copy objects between buckets
$s3->copy('source-bucket', 'source-key', 'destination-bucket', 'destination-key');

// Check if a file exists
$s3->has('my-image-bucket', 'photos/cat_2.jpg'); // true

Expand Down
3 changes: 2 additions & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,8 @@
"PutObject",
"PutObjectAcl",
"PutObjectTagging",
"UploadPart"
"UploadPart",
"UploadPartCopy"
]
},
"Scheduler": {
Expand Down
90 changes: 90 additions & 0 deletions src/Integration/Aws/SimpleS3/src/SimpleS3Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
use AsyncAws\Core\Stream\FixedSizeStream;
use AsyncAws\Core\Stream\ResultStream;
use AsyncAws\Core\Stream\StreamFactory;
use AsyncAws\S3\Input\CompleteMultipartUploadRequest;
use AsyncAws\S3\Input\CopyObjectRequest;
use AsyncAws\S3\Input\CreateMultipartUploadRequest;
use AsyncAws\S3\Input\GetObjectRequest;
use AsyncAws\S3\Input\UploadPartCopyRequest;
use AsyncAws\S3\S3Client;
use AsyncAws\S3\ValueObject\CompletedMultipartUpload;
use AsyncAws\S3\ValueObject\CompletedPart;
Expand Down Expand Up @@ -47,6 +51,70 @@ public function has(string $bucket, string $key): bool
return $this->objectExists(['Bucket' => $bucket, 'Key' => $key])->isSuccess();
}

/**
* @param array{
* ACL?: \AsyncAws\S3\Enum\ObjectCannedACL::*,
* CacheControl?: string,
* ContentLength?: int,
* ContentType?: string,
* Metadata?: array<string, string>,
* PartSize?: int,
* } $options
*/
public function copy(string $srcBucket, string $srcKey, string $destBucket, string $destKey, array $options = []): void
{
$megabyte = 1024 * 1024;
if (!empty($options['ContentLength'])) {
$contentLength = (int) $options['ContentLength'];
unset($options['ContentLength']);
} else {
$contentLength = (int) $this->headObject(['Bucket' => $srcBucket, 'Key' => $srcKey])->getContentLength();
}

/*
* The maximum number of parts is 10.000. The partSize must be a power of 2.
* We default this to 64MB per part. That means that we only support to copy
* files smaller than 64 * 10 000 = 640GB. If you are coping larger files,
* please set PartSize to a higher number, like 128, 256 or 512. (Max 4096).
*/
$partSize = ($options['PartSize'] ?? 64) * $megabyte;
unset($options['PartSize']);

// If file is less than 5GB, use normal atomic copy
if ($contentLength < 5120 * $megabyte) {
$this->copyObject(
CopyObjectRequest::create(
array_merge($options, ['Bucket' => $destBucket, 'Key' => $destKey, 'CopySource' => "{$srcBucket}/{$srcKey}"])
)
);

return;
}

$uploadId = $this->createMultipartUpload(
CreateMultipartUploadRequest::create(
array_merge($options, ['Bucket' => $destBucket, 'Key' => $destKey])
)
)->getUploadId();

$bytePosition = 0;
$parts = [];
for ($i = 1; $bytePosition < $contentLength; ++$i) {
$startByte = $bytePosition;
$endByte = $bytePosition + $partSize - 1 >= $contentLength ? $contentLength - 1 : $bytePosition + $partSize - 1;
$parts[] = $this->doMultipartCopy($destBucket, $destKey, $uploadId, $i, "{$srcBucket}/{$srcKey}", $startByte, $endByte);
$bytePosition += $partSize;
}
$this->completeMultipartUpload(
CompleteMultipartUploadRequest::create([
'Bucket' => $destBucket,
'Key' => $destKey,
'UploadId' => $uploadId,
'MultipartUpload' => new CompletedMultipartUpload(['Parts' => $parts]),
])
);
}

/**
* @param string|resource|(callable(int): string)|iterable<string> $object
* @param array{
Expand Down Expand Up @@ -195,4 +263,26 @@ private function doSmallFileUpload(array $options, string $bucket, string $key,
'Body' => $object,
]));
}

private function doMultipartCopy(string $bucket, string $key, string $uploadId, int $partNumber, string $copySource, int $startByte, int $endByte): CompletedPart
{
try {
$response = $this->uploadPartCopy(
UploadPartCopyRequest::create([
'Bucket' => $bucket,
'Key' => $key,
'UploadId' => $uploadId,
'CopySource' => $copySource,
'CopySourceRange' => "bytes={$startByte}-{$endByte}",
'PartNumber' => $partNumber,
])
);

return new CompletedPart(['ETag' => $response->getCopyPartResult()?->getEtag(), 'PartNumber' => $partNumber]);
} catch (\Throwable $e) {
$this->abortMultipartUpload(['Bucket' => $bucket, 'Key' => $key, 'UploadId' => $uploadId]);

throw $e;
}
}
}
71 changes: 71 additions & 0 deletions src/Integration/Aws/SimpleS3/tests/Unit/SimpleS3ClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

use AsyncAws\Core\Credentials\NullProvider;
use AsyncAws\Core\Test\ResultMockFactory;
use AsyncAws\S3\Input\CompleteMultipartUploadRequest;
use AsyncAws\S3\Result\CreateMultipartUploadOutput;
use AsyncAws\S3\Result\HeadObjectOutput;
use AsyncAws\S3\Result\UploadPartCopyOutput;
use AsyncAws\S3\ValueObject\CopyPartResult;
use AsyncAws\SimpleS3\SimpleS3Client;
use PHPUnit\Framework\TestCase;
use Symfony\Component\HttpClient\MockHttpClient;
Expand Down Expand Up @@ -137,6 +141,73 @@ public function testUploadSmallFileEmptyClosure()
});
}

public function testCopySmallFileWithProvidedLength()
{
$megabyte = 1024 * 1024;
$s3 = $this->getMockBuilder(SimpleS3Client::class)
->disableOriginalConstructor()
->onlyMethods(['createMultipartUpload', 'abortMultipartUpload', 'copyObject', 'completeMultipartUpload'])
->getMock();

$s3->expects(self::never())->method('createMultipartUpload');
$s3->expects(self::never())->method('abortMultipartUpload');
$s3->expects(self::never())->method('completeMultipartUpload');
$s3->expects(self::once())->method('copyObject');

$s3->copy('bucket', 'robots.txt', 'bucket', 'copy-robots.txt', ['ContentLength' => 5 * $megabyte]);
}

public function testCopySmallFileWithoutProvidedLength()
{
$megabyte = 1024 * 1024;
$s3 = $this->getMockBuilder(SimpleS3Client::class)
->disableOriginalConstructor()
->onlyMethods(['createMultipartUpload', 'abortMultipartUpload', 'copyObject', 'completeMultipartUpload', 'headObject'])
->getMock();

$s3->expects(self::never())->method('createMultipartUpload');
$s3->expects(self::never())->method('abortMultipartUpload');
$s3->expects(self::never())->method('completeMultipartUpload');
$s3->expects(self::once())->method('copyObject');
$s3->expects(self::once())->method('headObject')
->willReturn(ResultMockFactory::create(HeadObjectOutput::class, ['ContentLength' => 50 * $megabyte]));

$s3->copy('bucket', 'robots.txt', 'bucket', 'copy-robots.txt');
}

public function testCopyLargeFile()
{
$megabyte = 1024 * 1024;
$uploadedParts = 0;
$completedParts = 0;

$s3 = $this->getMockBuilder(SimpleS3Client::class)
->disableOriginalConstructor()
->onlyMethods(['createMultipartUpload', 'abortMultipartUpload', 'copyObject', 'completeMultipartUpload', 'uploadPartCopy'])
->getMock();

$s3->expects(self::once())->method('createMultipartUpload')
->willReturn(ResultMockFactory::create(CreateMultipartUploadOutput::class, ['UploadId' => '4711']));
$s3->expects(self::never())->method('abortMultipartUpload');
$s3->expects(self::never())->method('copyObject');
$s3->expects(self::any())->method('uploadPartCopy')
->with(self::callback(function () use (&$uploadedParts) {
++$uploadedParts;

return true;
}))
->willReturn(ResultMockFactory::create(UploadPartCopyOutput::class, ['copyPartResult' => new CopyPartResult(['ETag' => 'etag-4711'])]));
$s3->expects(self::once())->method('completeMultipartUpload')->with(self::callback(function (CompleteMultipartUploadRequest $request) use (&$completedParts) {
$completedParts = \count($request->getMultipartUpload()->getParts());

return true;
}));

$s3->copy('bucket', 'robots.txt', 'bucket', 'copy-robots.txt', ['ContentLength' => 70 * $megabyte]);

self::assertEquals($completedParts, $uploadedParts);
}

private function assertSmallFileUpload(\Closure $callback, string $bucket, string $file, $object): void
{
$s3 = $this->getMockBuilder(SimpleS3Client::class)
Expand Down
132 changes: 132 additions & 0 deletions src/Service/S3/src/S3Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
use AsyncAws\S3\Input\PutObjectAclRequest;
use AsyncAws\S3\Input\PutObjectRequest;
use AsyncAws\S3\Input\PutObjectTaggingRequest;
use AsyncAws\S3\Input\UploadPartCopyRequest;
use AsyncAws\S3\Input\UploadPartRequest;
use AsyncAws\S3\Result\AbortMultipartUploadOutput;
use AsyncAws\S3\Result\BucketExistsWaiter;
Expand All @@ -82,6 +83,7 @@
use AsyncAws\S3\Result\PutObjectAclOutput;
use AsyncAws\S3\Result\PutObjectOutput;
use AsyncAws\S3\Result\PutObjectTaggingOutput;
use AsyncAws\S3\Result\UploadPartCopyOutput;
use AsyncAws\S3\Result\UploadPartOutput;
use AsyncAws\S3\Signer\SignerV4ForS3;
use AsyncAws\S3\ValueObject\AccessControlPolicy;
Expand Down Expand Up @@ -2432,6 +2434,136 @@ public function uploadPart($input): UploadPartOutput
return new UploadPartOutput($response);
}

/**
* Uploads a part by copying data from an existing object as data source. You specify the data source by adding the
* request header `x-amz-copy-source` in your request and a byte range by adding the request header
* `x-amz-copy-source-range` in your request.
*
* For information about maximum and minimum part sizes and other multipart upload specifications, see Multipart upload
* limits [^1] in the *Amazon S3 User Guide*.
*
* > Instead of using an existing object as part data, you might use the UploadPart [^2] action and provide data in your
* > request.
*
* You must initiate a multipart upload before you can upload any part. In response to your initiate request. Amazon S3
* returns a unique identifier, the upload ID, that you must include in your upload part request.
*
* For more information about using the `UploadPartCopy` operation, see the following:
*
* - For conceptual information about multipart uploads, see Uploading Objects Using Multipart Upload [^3] in the
* *Amazon S3 User Guide*.
* - For information about permissions required to use the multipart upload API, see Multipart Upload and Permissions
* [^4] in the *Amazon S3 User Guide*.
* - For information about copying objects using a single atomic action vs. a multipart upload, see Operations on
* Objects [^5] in the *Amazon S3 User Guide*.
* - For information about using server-side encryption with customer-provided encryption keys with the `UploadPartCopy`
* operation, see CopyObject [^6] and UploadPart [^7].
*
* Note the following additional considerations about the request headers `x-amz-copy-source-if-match`,
* `x-amz-copy-source-if-none-match`, `x-amz-copy-source-if-unmodified-since`, and
* `x-amz-copy-source-if-modified-since`:
*
* - **Consideration 1** - If both of the `x-amz-copy-source-if-match` and `x-amz-copy-source-if-unmodified-since`
* headers are present in the request as follows:
*
* `x-amz-copy-source-if-match` condition evaluates to `true`, and;
*
* `x-amz-copy-source-if-unmodified-since` condition evaluates to `false`;
*
* Amazon S3 returns `200 OK` and copies the data.
* - **Consideration 2** - If both of the `x-amz-copy-source-if-none-match` and `x-amz-copy-source-if-modified-since`
* headers are present in the request as follows:
*
* `x-amz-copy-source-if-none-match` condition evaluates to `false`, and;
*
* `x-amz-copy-source-if-modified-since` condition evaluates to `true`;
*
* Amazon S3 returns `412 Precondition Failed` response code.
*
* - `Versioning`:
*
* If your bucket has versioning enabled, you could have multiple versions of the same object. By default,
* `x-amz-copy-source` identifies the current version of the object to copy. If the current version is a delete marker
* and you don't specify a versionId in the `x-amz-copy-source`, Amazon S3 returns a 404 error, because the object
* does not exist. If you specify versionId in the `x-amz-copy-source` and the versionId is a delete marker, Amazon S3
* returns an HTTP 400 error, because you are not allowed to specify a delete marker as a version for the
* `x-amz-copy-source`.
*
* You can optionally specify a specific version of the source object to copy by adding the `versionId` subresource as
* shown in the following example:
*
* `x-amz-copy-source: /bucket/object?versionId=version id`
* - `Special errors`:
*
* - - *Code: NoSuchUpload*
* - - *Cause: The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload
* - might have been aborted or completed.*
* - - *HTTP Status Code: 404 Not Found*
* -
* - - *Code: InvalidRequest*
* - - *Cause: The specified copy source is not supported as a byte-range copy source.*
* - - *HTTP Status Code: 400 Bad Request*
* -
*
*
* The following operations are related to `UploadPartCopy`:
*
* - CreateMultipartUpload [^8]
* - UploadPart [^9]
* - CompleteMultipartUpload [^10]
* - AbortMultipartUpload [^11]
* - ListParts [^12]
* - ListMultipartUploads [^13]
*
* [^1]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
* [^2]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html
* [^3]: https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html
* [^4]: https://docs.aws.amazon.com/AmazonS3/latest/dev/mpuAndPermissions.html
* [^5]: https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectOperations.html
* [^6]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html
* [^7]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html
* [^8]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html
* [^9]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html
* [^10]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html
* [^11]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html
* [^12]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html
* [^13]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html
*
* @see http://docs.amazonwebservices.com/AmazonS3/latest/API/mpUploadUploadPartCopy.html
* @see https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html
* @see https://docs.aws.amazon.com/aws-sdk-php/v3/api/api-s3-2006-03-01.html#uploadpartcopy
*
* @param array{
* Bucket: string,
* CopySource: string,
* CopySourceIfMatch?: null|string,
* CopySourceIfModifiedSince?: null|\DateTimeImmutable|string,
* CopySourceIfNoneMatch?: null|string,
* CopySourceIfUnmodifiedSince?: null|\DateTimeImmutable|string,
* CopySourceRange?: null|string,
* Key: string,
* PartNumber: int,
* UploadId: string,
* SSECustomerAlgorithm?: null|string,
* SSECustomerKey?: null|string,
* SSECustomerKeyMD5?: null|string,
* CopySourceSSECustomerAlgorithm?: null|string,
* CopySourceSSECustomerKey?: null|string,
* CopySourceSSECustomerKeyMD5?: null|string,
* RequestPayer?: null|RequestPayer::*,
* ExpectedBucketOwner?: null|string,
* ExpectedSourceBucketOwner?: null|string,
* '@region'?: string|null,
* }|UploadPartCopyRequest $input
*/
public function uploadPartCopy($input): UploadPartCopyOutput
{
$input = UploadPartCopyRequest::create($input);
$response = $this->getResponse($input->request(), new RequestContext(['operation' => 'UploadPartCopy', 'region' => $input->getRegion()]));

return new UploadPartCopyOutput($response);
}

protected function getAwsErrorFactory(): AwsErrorFactoryInterface
{
return new XmlAwsErrorFactory();
Expand Down
Loading

0 comments on commit 261722d

Please sign in to comment.