From f2bb5500d5becba8d9aa8a92a4fc1b3424911cf8 Mon Sep 17 00:00:00 2001 From: willgraf <7930703+willgraf@users.noreply.github.com> Date: Thu, 15 Jul 2021 15:40:15 -0700 Subject: [PATCH] Compress `.trks` by using gzip compression when writing the tarfile. (#64) * Write tarfiles with gzip compression. * Use tmpdir test fixture for temporary directories. --- deepcell_tracking/tracking.py | 2 +- deepcell_tracking/tracking_test.py | 74 +++++++++++++----------------- deepcell_tracking/utils.py | 2 +- deepcell_tracking/utils_test.py | 41 ++++++----------- 4 files changed, 49 insertions(+), 70 deletions(-) diff --git a/deepcell_tracking/tracking.py b/deepcell_tracking/tracking.py index 5d069e8..8f5bd2d 100644 --- a/deepcell_tracking/tracking.py +++ b/deepcell_tracking/tracking.py @@ -747,7 +747,7 @@ def dump(self, filename, track_review_dict=None): filename = str(filename) - with tarfile.open(filename, 'w') as trks: + with tarfile.open(filename, 'w:gz') as trks: # disable auto deletion and close/delete manually # to resolve double-opening issue on Windows. with tempfile.NamedTemporaryFile('w', delete=False) as lineage: diff --git a/deepcell_tracking/tracking_test.py b/deepcell_tracking/tracking_test.py index 3b810fd..5002307 100644 --- a/deepcell_tracking/tracking_test.py +++ b/deepcell_tracking/tracking_test.py @@ -29,10 +29,7 @@ from __future__ import division from __future__ import print_function -import errno import os -import shutil -import tempfile import numpy as np import pandas as pd @@ -123,7 +120,7 @@ def test_simple(self): neighborhood_encoder=encoder, data_format='invalid') - def test_track_cells(self): + def test_track_cells(self, tmpdir): frames = 10 track_length = 3 labels_per_frame = 3 @@ -172,41 +169,34 @@ def test_track_cells(self): with pytest.raises(ValueError): tracker.dataframe(bad_value=-1) - try: - # test tracker.postprocess - tempdir = tempfile.mkdtemp() # create dir - path = os.path.join(tempdir, 'postprocess.xyz') - tracker.postprocess(filename=path) - post_saved_path = os.path.join(tempdir, 'postprocess.trk') - assert os.path.isfile(post_saved_path) - - # test tracker.dump - path = os.path.join(tempdir, 'test.xyz') - tracker.dump(path) - dump_saved_path = os.path.join(tempdir, 'test.trk') - assert os.path.isfile(dump_saved_path) - - # utility tests for loading trk files - # TODO: move utility tests into utils_test.py - - # test trk_folder_to_trks - utils.trk_folder_to_trks(tempdir, os.path.join(tempdir, 'all.trks')) - assert os.path.isfile(os.path.join(tempdir, 'all.trks')) - - # test load_trks - data = utils.load_trks(post_saved_path) - assert isinstance(data['lineages'], list) - assert all(isinstance(d, dict) for d in data['lineages']) - np.testing.assert_equal(data['X'], tracker.X) - np.testing.assert_equal(data['y'], tracker.y_tracked) - # load trks instead of trk - data = utils.load_trks(os.path.join(tempdir, 'all.trks')) - - # test trks_stats - utils.trks_stats(os.path.join(tempdir, 'test.trk')) - finally: - try: - shutil.rmtree(tempdir) # delete directory - except OSError as exc: - if exc.errno != errno.ENOENT: # no such file or directory - raise # re-raise exception + # test tracker.postprocess + tempdir = str(tmpdir) + path = os.path.join(tempdir, 'postprocess.xyz') + tracker.postprocess(filename=path) + post_saved_path = os.path.join(tempdir, 'postprocess.trk') + assert os.path.isfile(post_saved_path) + + # test tracker.dump + path = os.path.join(tempdir, 'test.xyz') + tracker.dump(path) + dump_saved_path = os.path.join(tempdir, 'test.trk') + assert os.path.isfile(dump_saved_path) + + # utility tests for loading trk files + # TODO: move utility tests into utils_test.py + + # test trk_folder_to_trks + utils.trk_folder_to_trks(tempdir, os.path.join(tempdir, 'all.trks')) + assert os.path.isfile(os.path.join(tempdir, 'all.trks')) + + # test load_trks + data = utils.load_trks(post_saved_path) + assert isinstance(data['lineages'], list) + assert all(isinstance(d, dict) for d in data['lineages']) + np.testing.assert_equal(data['X'], tracker.X) + np.testing.assert_equal(data['y'], tracker.y_tracked) + # load trks instead of trk + data = utils.load_trks(os.path.join(tempdir, 'all.trks')) + + # test trks_stats + utils.trks_stats(os.path.join(tempdir, 'test.trk')) diff --git a/deepcell_tracking/utils.py b/deepcell_tracking/utils.py index 0ea4fdd..6f4f2bd 100644 --- a/deepcell_tracking/utils.py +++ b/deepcell_tracking/utils.py @@ -214,7 +214,7 @@ def save_trks(filename, lineages, raw, tracked): if not str(filename).lower().endswith('.trks'): raise ValueError('filename must end with `.trks`. Found %s' % filename) - with tarfile.open(filename, 'w') as trks: + with tarfile.open(filename, 'w:gz') as trks: with tempfile.NamedTemporaryFile('w', delete=False) as lineages_file: json.dump(lineages, lineages_file, indent=4) lineages_file.flush() diff --git a/deepcell_tracking/utils_test.py b/deepcell_tracking/utils_test.py index 6ebccb9..4742ca2 100644 --- a/deepcell_tracking/utils_test.py +++ b/deepcell_tracking/utils_test.py @@ -29,10 +29,7 @@ from __future__ import print_function import copy -import errno import os -import shutil -import tempfile import numpy as np import skimage as sk @@ -144,33 +141,25 @@ def test_count_pairs(self): y, same_probability=prob, data_format='channels_first') assert pairs == expected - def test_save_trks(self): + def test_save_trks(self, tmpdir): X = get_image(30, 30) y = np.random.randint(low=0, high=10, size=X.shape) lineage = [dict()] - try: - tempdir = tempfile.mkdtemp() # create dir - with pytest.raises(ValueError): - badfilename = os.path.join(tempdir, 'x.trk') - utils.save_trks(badfilename, lineage, X, y) - - filename = os.path.join(tempdir, 'x.trks') - utils.save_trks(filename, lineage, X, y) - assert os.path.isfile(filename) - - # test saved tracks can be loaded - loaded = utils.load_trks(filename) - assert loaded['lineages'] == lineage - np.testing.assert_array_equal(X, loaded['X']) - np.testing.assert_array_equal(y, loaded['y']) - - finally: - try: - shutil.rmtree(tempdir) # delete directory - except OSError as exc: - if exc.errno != errno.ENOENT: # no such file or directory - raise # re-raise exception + tempdir = str(tmpdir) + with pytest.raises(ValueError): + badfilename = os.path.join(tempdir, 'x.trk') + utils.save_trks(badfilename, lineage, X, y) + + filename = os.path.join(tempdir, 'x.trks') + utils.save_trks(filename, lineage, X, y) + assert os.path.isfile(filename) + + # test saved tracks can be loaded + loaded = utils.load_trks(filename) + assert loaded['lineages'] == lineage + np.testing.assert_array_equal(X, loaded['X']) + np.testing.assert_array_equal(y, loaded['y']) def test_normalize_adj_matrix(self): frames = 3