From cda979739d9bbc74ff3c72d4aa4bd61017ed0905 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Wed, 21 Feb 2024 16:38:41 -0500 Subject: [PATCH] Add min size to OCI download (#3044) * add min size * logs * log info * swap * min 1 * remove log --- composer/utils/object_store/oci_object_store.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index d36b13e03b..72898464cc 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -137,6 +137,7 @@ def download_object( filename: Union[str, pathlib.Path], overwrite: bool = False, callback: Optional[Callable[[int, int], None]] = None, + min_part_size: int = 128000000, num_parts: int = 10, ): del callback @@ -151,11 +152,14 @@ def download_object( object_size = 0 try: head_object_response = self.client.head_object(self.namespace, self.bucket, object_name) - object_size = head_object_response.headers['content-length'] # pyright: ignore[reportOptionalMemberAccess] + object_size = int(head_object_response.headers['content-length']) # pyright: ignore[reportOptionalMemberAccess] except Exception as e: _reraise_oci_errors(self.get_uri(object_name), e) + # Calculate the part sizes - base_part_size, remainder = divmod(int(object_size), num_parts) + num_parts_from_size = max(object_size // min_part_size, 1) + num_parts = min(num_parts, num_parts_from_size) + base_part_size, remainder = divmod(object_size, num_parts) part_sizes = [base_part_size] * num_parts for i in range(remainder): part_sizes[i] += 1