diff --git a/HISTORY.md b/HISTORY.md index abd36d2..e80269f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,10 @@ # History -## 0.15.0 (2024-01-06) +## 0.15.1 (2025-01-23) + +* Fix potential memory leaks by confining use of `pyvips` to subprocesses + +## 0.15.0 (2025-01-06) * Removed support for Python 3.9 diff --git a/panimg/image_builders/tiff.py b/panimg/image_builders/tiff.py index d62f10e..81c22dd 100644 --- a/panimg/image_builders/tiff.py +++ b/panimg/image_builders/tiff.py @@ -2,6 +2,7 @@ import re from collections import defaultdict from collections.abc import Callable, Iterator +from concurrent.futures import ProcessPoolExecutor from dataclasses import dataclass, field from pathlib import Path from tempfile import TemporaryDirectory @@ -17,17 +18,6 @@ from panimg.image_builders.dicom import get_dicom_headers_by_study from panimg.models import MAXIMUM_SEGMENTS_LENGTH, ColorSpace, TIFFImage -try: - import openslide -except (OSError, ModuleNotFoundError): - openslide = False - -try: - import pyvips -except OSError: - pyvips = False - - DICOM_WSI_STORAGE_ID = "1.2.840.10008.5.1.4.1.1.77.1.6" @@ -263,6 +253,8 @@ def _load_with_tiff( def _load_with_openslide( *, gc_file: GrandChallengeTiffFile ) -> GrandChallengeTiffFile: + import openslide + open_slide_file = openslide.open_slide(str(gc_file.path.absolute())) gc_file = _extract_openslide_properties( gc_file=gc_file, image=open_slide_file @@ -320,7 +312,6 @@ def _convert( *, files: list[Path], associated_files_getter: Callable[[Path], list[Path]] | None, - converter, output_directory: Path, file_errors: dict[Path, list[str]], ) -> list[GrandChallengeTiffFile]: @@ -334,12 +325,13 @@ def _convert( if associated_files_getter: associated_files = associated_files_getter(gc_file.path) - tiff_file = _convert_to_tiff( - path=file, - pk=gc_file.pk, - converter=converter, - output_directory=output_directory, - ) + with ProcessPoolExecutor(max_workers=1) as executor: + tiff_file = executor.submit( + _convert_to_tiff, + path=file, + pk=gc_file.pk, + output_directory=output_directory, + ).result() except Exception as e: file_errors[file].append( format_error( @@ -356,30 +348,24 @@ def _convert( return converted_files -def _convert_to_tiff( - *, path: Path, pk: UUID, converter, output_directory: Path -) -> Path: +def _convert_to_tiff(*, path: Path, pk: UUID, output_directory: Path) -> Path: + import pyvips + + major = pyvips.base.version(0) + minor = pyvips.base.version(1) + + if not (major > 8 or (major == 8 and minor >= 10)): + raise RuntimeError( + f"libvips {major}.{minor} is too old - requires >= 8.10" + ) + new_file_name = output_directory / path.name / f"{pk}.tif" new_file_name.parent.mkdir() - image = converter.Image.new_from_file( + image = pyvips.Image.new_from_file( str(path.absolute()), access="sequential" ) - using_old_vips = ( - pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10 - ) - if ( - using_old_vips - and image.get("xres") == 1 - and "openslide.mpp-x" in image.get_fields() - ): - # correct xres and yres if they have default value of 1 - # due to a bug that is resolved in pyvips 8.10 - x_res = 1000.0 / float(image.get("openslide.mpp-x")) - y_res = 1000.0 / float(image.get("openslide.mpp-y")) - image = image.copy(xres=x_res, yres=y_res) - image.write_to_file( str(new_file_name.absolute()), tile=True, @@ -470,7 +456,6 @@ def associated_files(file_path: Path): def _load_gc_files( *, files: set[Path], - converter, output_directory: Path, file_errors: DefaultDict[Path, list[str]], ) -> list[GrandChallengeTiffFile]: @@ -503,7 +488,6 @@ def _load_gc_files( converted_files = _convert( files=complex_files, associated_files_getter=handler, - converter=converter, output_directory=output_directory, file_errors=file_errors, ) @@ -525,26 +509,11 @@ def _load_gc_files( def image_builder_tiff( # noqa: C901 *, files: set[Path] ) -> Iterator[TIFFImage]: - if openslide is False: - raise ImportError( - f"Could not import openslide, which is required for the " - f"{__name__} image builder. Either ensure that libopenslide-dev " - f"is installed or remove {__name__} from your list of builders." - ) - - if pyvips is False: - raise ImportError( - f"Could not import pyvips, which is required for the " - f"{__name__} image builder. Either ensure that libvips-dev " - f"is installed or remove {__name__} from your list of builders." - ) - file_errors: DefaultDict[Path, list[str]] = defaultdict(list) with TemporaryDirectory() as output_directory: loaded_files = _load_gc_files( files=files, - converter=pyvips, output_directory=Path(output_directory), file_errors=file_errors, ) diff --git a/panimg/post_processors/tiff_to_dzi.py b/panimg/post_processors/tiff_to_dzi.py index 121f937..fe5fdd9 100644 --- a/panimg/post_processors/tiff_to_dzi.py +++ b/panimg/post_processors/tiff_to_dzi.py @@ -1,31 +1,22 @@ import logging +from concurrent.futures import ProcessPoolExecutor from panimg.models import ImageType, PanImgFile, PostProcessorResult from panimg.settings import DZI_TILE_SIZE -try: - import pyvips -except OSError: - pyvips = False - logger = logging.getLogger(__name__) def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult: - if pyvips is False: - raise ImportError( - f"Could not import pyvips, which is required for the " - f"{__name__} post processor. Either ensure that libvips-dev " - f"is installed or remove {__name__} from your list of post " - f"processors." - ) - new_image_files: set[PanImgFile] = set() for file in image_files: if file.image_type == ImageType.TIFF: try: - result = _create_dzi_image(tiff_file=file) + with ProcessPoolExecutor(max_workers=1) as executor: + result = executor.submit( + _create_dzi_image, tiff_file=file + ).result() except Exception as e: logger.warning(f"Could not create DZI for {file}: {e}") continue @@ -36,6 +27,8 @@ def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult: def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult: + import pyvips + # Creates a dzi file and corresponding tiles in directory {pk}_files dzi_output = tiff_file.file.parent / str(tiff_file.image_id) diff --git a/pyproject.toml b/pyproject.toml index 6b39792..9f85850 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "panimg" -version = "0.15.0" +version = "0.15.1" description = "Conversion of medical images to MHA and TIFF." license = "Apache-2.0" authors = ["James Meakin "] diff --git a/tests/test_tiff.py b/tests/test_tiff.py index f6847bc..f681df7 100644 --- a/tests/test_tiff.py +++ b/tests/test_tiff.py @@ -1,12 +1,9 @@ import os import shutil -from collections import defaultdict from pathlib import Path -from unittest.mock import MagicMock, Mock from uuid import uuid4 import pytest -import pyvips import tifffile as tiff_lib from pytest import approx from tifffile import tifffile @@ -14,10 +11,8 @@ from panimg.exceptions import ValidationError from panimg.image_builders.tiff import ( GrandChallengeTiffFile, - _convert, _extract_tags, _get_color_space, - _get_mrxs_files, _load_with_openslide, _load_with_tiff, image_builder_tiff, @@ -302,41 +297,6 @@ def test_image_builder_tiff(tmpdir_factory): assert len(image_builder_result.new_image_files) == 3 -def test_handle_complex_files(tmpdir_factory): - # Copy resource files to writable temp directory - temp_dir = Path(tmpdir_factory.mktemp("temp") / "resources") - shutil.copytree(RESOURCE_PATH / "complex_tiff", temp_dir) - files = [Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]] - - # set up mock object to mock pyvips - properties = { - "xres": 1, - "yres": 1, - "openslide.mpp-x": 0.2525, - "openslide.mpp-y": 0.2525, - } - - mock_converter = MagicMock(pyvips) - mock_image = mock_converter.Image.new_from_file.return_value - mock_image.get = Mock(return_value=1) - mock_image.get_fields = Mock(return_value=properties) - - _convert( - files=files, - associated_files_getter=_get_mrxs_files, - converter=mock_converter, - output_directory=Path(tmpdir_factory.mktemp("output")), - file_errors=defaultdict(list), - ) - - if pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10: - # work-around calculation of xres and yres in _convert_to_tiff function - mock_image.copy.assert_called() - assert "xres" in mock_image.copy.call_args[1] - else: - mock_image.copy.assert_not_called() - - @pytest.mark.xfail( reason="skip for now as we don't want to upload a large testset" ) @@ -381,4 +341,94 @@ def test_error_handling(tmpdir_factory): output_directory=Path(tmpdir_factory.mktemp("output")), ) - assert len(image_builder_result.file_errors) == 14 + output = {k.name: v for k, v in image_builder_result.file_errors.items()} + + assert output == { + "Mirax2-Fluorescence-1.mrxs": [ + "TIFF image builder: Could not convert file to TIFF: " + "Mirax2-Fluorescence-1.mrxs, " + "error:unable to call VipsForeignLoadOpenslideFile\n " + "openslide2vips: opening slide: " + "Index.dat doesn't have expected version\n", + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05.vms": [ + "TIFF image builder: Could not convert file to TIFF: " + "CMU-1-40x - 2010-01-12 13.24.05.vms, " + "error:unable to call VipsForeignLoadOpenslideFile\n " + "openslide2vips: opening slide: Can't validate JPEG 0: " + "No restart markers\n", + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "Data0001.dat": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "Index.dat": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05(0,1).jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05_map2.jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "Slidedat.ini": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05_macro.jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05.opt": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05(1,0).jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "Data0000.dat": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05.jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "CMU-1-40x - 2010-01-12 13.24.05(1,1).jpg": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + "Data0002.dat": [ + "TIFF image builder: Could not open file with tifffile.", + "TIFF image builder: Could not open file with OpenSlide.", + "TIFF image builder: Validation error: " + "Not a valid tif: Image width could not be determined.", + ], + }