From 1776776bcb77f5da806785ebbb533ba99fbbe30c Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 12:37:38 +0000 Subject: [PATCH 1/6] add notes on running tests in docker --- CONTRIBUTING.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b5908af497..df4f9d5dea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -72,9 +72,9 @@ Composer uses [pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks) To test your changes locally, run: -1. `make test` # run CPU tests -1. `make test-gpu` # run GPU tests -1. `cd docs && make doctest` # run doctests +* `make test` # run CPU tests +* `make test-gpu` # run GPU tests +* `cd docs && make doctest` # run doctests Some of our checks test distributed training as well. To test these, run: @@ -87,6 +87,17 @@ See the [Makefile](/Makefile) for more information. If you want to run pre-commit hooks manually, which check for code formatting and type annotations, run `pre-commit run --all-files` +### Docker +To run the tests in the provided docker containers: + +* `docker pull mosaicml/composer` (or an alternative image like `mosaicml/composer:latest_cpu`) +* `docker run --rm -v .:/composer -it mosaicml/composer` +* from inside the container + * `cd /composer` + * `pip install -e .` + * `pytest ` or `make ` to run the desired tests + + ## Code Style & Typing See the [Composer Style Guide](/STYLE_GUIDE.md) for guidelines on how to structure and format your code. From d0fce4d91301d6031217b0b90c528c9cfd65fc6f Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 12:38:54 +0000 Subject: [PATCH 2/6] improve error handling of CliCompressor --- composer/utils/compression.py | 11 +++++++++-- tests/utils/test_compression.py | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/composer/utils/compression.py b/composer/utils/compression.py index f5206887d3..fdee7da039 100644 --- a/composer/utils/compression.py +++ b/composer/utils/compression.py @@ -32,6 +32,9 @@ def __init__(self, extension: str, cmd: Optional[str] = None) -> None: self.extension = extension self.cmd = cmd if cmd is not None else extension + def __repr__(self) -> str: + return f'CliCompressor({self.extension!r}, {self.cmd!r})' + @property def exists(self) -> bool: return shutil.which(self.cmd) is not None @@ -55,7 +58,9 @@ def compress(self, filename: str) -> Iterator[IO[bytes]]: assert proc.stdin is not None yield proc.stdin proc.stdin.close() - proc.wait() + returncode = proc.wait() + if returncode != 0: + raise IOError(f'failed to compress "{filename}" using {self!r}') def _decompress_cmd(self, filename: str) -> List[str]: return [self.cmd, '-dc', filename] @@ -69,7 +74,9 @@ def decompress(self, in_filename: str) -> Iterator[IO[bytes]]: ) assert proc.stdout is not None yield proc.stdout - proc.wait() + returncode = proc.wait() + if returncode != 0: + raise IOError(f'failed to decompress "{in_filename}" using {self!r}') def get_compressor(filename: str) -> CliCompressor: diff --git a/tests/utils/test_compression.py b/tests/utils/test_compression.py index 2af427274e..b748735d05 100644 --- a/tests/utils/test_compression.py +++ b/tests/utils/test_compression.py @@ -54,4 +54,5 @@ def test_compressor(tmp_path: Path, compressor: CliCompressor) -> None: assert test_file.read_bytes() != test_data with compressor.decompress(str(test_file)) as f: - assert f.read() == test_data + decompressed = f.read() + assert decompressed == test_data From 9d241fdbb2d151276c039dc9323e69c0fa7385c4 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 12:39:25 +0000 Subject: [PATCH 3/6] remove problematic lzma package from docker image The version of lzma provided by the lzma package has a bug: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=700681 where it refuses to decompress if the file extension does not match, causing the test to fail. This could be worked around but the `xz-utils` package already provides a version of `lzma` that does not have this issue, so that can be used instead. --- docker/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 504ad78469..1cb86418cf 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -128,7 +128,6 @@ RUN apt-get update && \ bzip2 \ gzip \ lz4 \ - lzma \ lzop \ xz-utils \ zstd \ From 2cd79a8eccf7617d87714b830384af8acb5a76d2 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 12:51:49 +0000 Subject: [PATCH 4/6] run as current user in docker container --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index df4f9d5dea..a2dc1e6220 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -91,7 +91,7 @@ If you want to run pre-commit hooks manually, which check for code formatting an To run the tests in the provided docker containers: * `docker pull mosaicml/composer` (or an alternative image like `mosaicml/composer:latest_cpu`) -* `docker run --rm -v .:/composer -it mosaicml/composer` +* `docker run --rm -v ./:/composer --user $(id -u):$(id -g) -it mosaicml/composer` * from inside the container * `cd /composer` * `pip install -e .` From e0331327ef76f3b6f68b66e829feecd4372cc423 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 13:58:44 +0000 Subject: [PATCH 5/6] use compressor extension for test file --- tests/utils/test_compression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_compression.py b/tests/utils/test_compression.py index b748735d05..7518ccab28 100644 --- a/tests/utils/test_compression.py +++ b/tests/utils/test_compression.py @@ -45,7 +45,7 @@ def test_compressor(tmp_path: Path, compressor: CliCompressor) -> None: if not compressor.exists: pytest.skip(reason=f'compressor {compressor.cmd} not found') - test_file = tmp_path / 'my_file' + test_file = tmp_path / f'my_file.{compressor.extension}' test_data = b'foo foo foo' with compressor.compress(str(test_file)) as f: From ae7657d291e83d6500023a9bc9e0ab75df79e913 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 22 Mar 2024 14:06:53 +0000 Subject: [PATCH 6/6] improved documentation and error messages of CliCompressor --- composer/utils/compression.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/composer/utils/compression.py b/composer/utils/compression.py index fdee7da039..4ebd746820 100644 --- a/composer/utils/compression.py +++ b/composer/utils/compression.py @@ -26,7 +26,27 @@ def is_compressed_pt(filename: str) -> bool: class CliCompressor: - """Base class for data compression CLI tools.""" + """Base class for data compression CLI tools. + + This class handles compression and decompression of data by piping it through + CLI compressor tools installed on the system. e.g. the `gzip` command for producing `.gz` files. + + Example: + .. code-block:: python + + compressor = CliCompressor('gz', 'gzip') + + with compressor.compress('myfile.txt.gz') as f: + f.write('foo') + + with compressor.decompress('myfile.txt.gz') as f: + assert f.read() == 'foo' + + Args: + extension (str): The suffix used to identify files that the compressor supports (without a leading `.`). + cmd (str, optional): The name of the CLI tool that this compressor uses. Defaults to `None`, in which case + it is assumed that the tool name is the same as the extension. + """ def __init__(self, extension: str, cmd: Optional[str] = None) -> None: self.extension = extension @@ -37,6 +57,7 @@ def __repr__(self) -> str: @property def exists(self) -> bool: + """Whether the CLI tool used by this compressor can be found.""" return shutil.which(self.cmd) is not None def check_exists(self) -> None: @@ -47,9 +68,10 @@ def _compress_cmd(self) -> List[str]: return [self.cmd] @contextmanager - def compress(self, filename: str) -> Iterator[IO[bytes]]: + def compress(self, out_filename: str) -> Iterator[IO[bytes]]: + """Compress some data, saving to the given file.""" self.check_exists() - with open(filename, 'wb') as f: + with open(out_filename, 'wb') as f: proc = subprocess.Popen( self._compress_cmd(), stdin=subprocess.PIPE, @@ -60,13 +82,14 @@ def compress(self, filename: str) -> Iterator[IO[bytes]]: proc.stdin.close() returncode = proc.wait() if returncode != 0: - raise IOError(f'failed to compress "{filename}" using {self!r}') + raise IOError(f'failed to compress to "{out_filename}" using {self!r} (return code {returncode})') def _decompress_cmd(self, filename: str) -> List[str]: return [self.cmd, '-dc', filename] @contextmanager def decompress(self, in_filename: str) -> Iterator[IO[bytes]]: + """Decompress the content of the given file, providing the output as a file-like object.""" self.check_exists() proc = subprocess.Popen( self._decompress_cmd(in_filename), @@ -76,7 +99,7 @@ def decompress(self, in_filename: str) -> Iterator[IO[bytes]]: yield proc.stdout returncode = proc.wait() if returncode != 0: - raise IOError(f'failed to decompress "{in_filename}" using {self!r}') + raise IOError(f'failed to decompress "{in_filename}" using {self!r} (return code {returncode})') def get_compressor(filename: str) -> CliCompressor: