Skip to content

Commit

Permalink
Merge branch 'dev' into composer_retry
Browse files Browse the repository at this point in the history
  • Loading branch information
bigning authored Mar 22, 2024
2 parents 812797b + 00a07fa commit 34929c0
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 11 deletions.
17 changes: 14 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ Composer uses [pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks)

To test your changes locally, run:

1. `make test` # run CPU tests
1. `make test-gpu` # run GPU tests
1. `cd docs && make doctest` # run doctests
* `make test` # run CPU tests
* `make test-gpu` # run GPU tests
* `cd docs && make doctest` # run doctests

Some of our checks test distributed training as well. To test these, run:

Expand All @@ -87,6 +87,17 @@ See the [Makefile](/Makefile) for more information.

If you want to run pre-commit hooks manually, which check for code formatting and type annotations, run `pre-commit run --all-files`

### Docker
To run the tests in the provided docker containers:

* `docker pull mosaicml/composer` (or an alternative image like `mosaicml/composer:latest_cpu`)
* `docker run --rm -v ./:/composer --user $(id -u):$(id -g) -it mosaicml/composer`
* from inside the container
* `cd /composer`
* `pip install -e .`
* `pytest <args>` or `make <args>` to run the desired tests


## Code Style & Typing

See the [Composer Style Guide](/STYLE_GUIDE.md) for guidelines on how to structure and format your code.
Expand Down
40 changes: 35 additions & 5 deletions composer/utils/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,38 @@ def is_compressed_pt(filename: str) -> bool:


class CliCompressor:
"""Base class for data compression CLI tools."""
"""Base class for data compression CLI tools.
This class handles compression and decompression of data by piping it through
CLI compressor tools installed on the system. e.g. the `gzip` command for producing `.gz` files.
Example:
.. code-block:: python
compressor = CliCompressor('gz', 'gzip')
with compressor.compress('myfile.txt.gz') as f:
f.write('foo')
with compressor.decompress('myfile.txt.gz') as f:
assert f.read() == 'foo'
Args:
extension (str): The suffix used to identify files that the compressor supports (without a leading `.`).
cmd (str, optional): The name of the CLI tool that this compressor uses. Defaults to `None`, in which case
it is assumed that the tool name is the same as the extension.
"""

def __init__(self, extension: str, cmd: Optional[str] = None) -> None:
self.extension = extension
self.cmd = cmd if cmd is not None else extension

def __repr__(self) -> str:
return f'CliCompressor({self.extension!r}, {self.cmd!r})'

@property
def exists(self) -> bool:
"""Whether the CLI tool used by this compressor can be found."""
return shutil.which(self.cmd) is not None

def check_exists(self) -> None:
Expand All @@ -44,9 +68,10 @@ def _compress_cmd(self) -> List[str]:
return [self.cmd]

@contextmanager
def compress(self, filename: str) -> Iterator[IO[bytes]]:
def compress(self, out_filename: str) -> Iterator[IO[bytes]]:
"""Compress some data, saving to the given file."""
self.check_exists()
with open(filename, 'wb') as f:
with open(out_filename, 'wb') as f:
proc = subprocess.Popen(
self._compress_cmd(),
stdin=subprocess.PIPE,
Expand All @@ -55,21 +80,26 @@ def compress(self, filename: str) -> Iterator[IO[bytes]]:
assert proc.stdin is not None
yield proc.stdin
proc.stdin.close()
proc.wait()
returncode = proc.wait()
if returncode != 0:
raise IOError(f'failed to compress to "{out_filename}" using {self!r} (return code {returncode})')

def _decompress_cmd(self, filename: str) -> List[str]:
return [self.cmd, '-dc', filename]

@contextmanager
def decompress(self, in_filename: str) -> Iterator[IO[bytes]]:
"""Decompress the content of the given file, providing the output as a file-like object."""
self.check_exists()
proc = subprocess.Popen(
self._decompress_cmd(in_filename),
stdout=subprocess.PIPE,
)
assert proc.stdout is not None
yield proc.stdout
proc.wait()
returncode = proc.wait()
if returncode != 0:
raise IOError(f'failed to decompress "{in_filename}" using {self!r} (return code {returncode})')


def get_compressor(filename: str) -> CliCompressor:
Expand Down
1 change: 0 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ RUN apt-get update && \
bzip2 \
gzip \
lz4 \
lzma \
lzop \
xz-utils \
zstd \
Expand Down
5 changes: 3 additions & 2 deletions tests/utils/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_compressor(tmp_path: Path, compressor: CliCompressor) -> None:
if not compressor.exists:
pytest.skip(reason=f'compressor {compressor.cmd} not found')

test_file = tmp_path / 'my_file'
test_file = tmp_path / f'my_file.{compressor.extension}'
test_data = b'foo foo foo'

with compressor.compress(str(test_file)) as f:
Expand All @@ -54,4 +54,5 @@ def test_compressor(tmp_path: Path, compressor: CliCompressor) -> None:
assert test_file.read_bytes() != test_data

with compressor.decompress(str(test_file)) as f:
assert f.read() == test_data
decompressed = f.read()
assert decompressed == test_data

0 comments on commit 34929c0

Please sign in to comment.