Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix potential memory leaks by confining use of pyvips to subprocesses #117

Merged
merged 4 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# History

## 0.15.0 (2024-01-06)
## 0.15.1 (2025-01-23)

* Fix potential memory leaks by confining use of `pyvips` to subprocesses

## 0.15.0 (2025-01-06)

* Removed support for Python 3.9

Expand Down
75 changes: 22 additions & 53 deletions panimg/image_builders/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from collections import defaultdict
from collections.abc import Callable, Iterator
from concurrent.futures import ProcessPoolExecutor
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import TemporaryDirectory
Expand All @@ -17,17 +18,6 @@
from panimg.image_builders.dicom import get_dicom_headers_by_study
from panimg.models import MAXIMUM_SEGMENTS_LENGTH, ColorSpace, TIFFImage

try:
import openslide
except (OSError, ModuleNotFoundError):
openslide = False

try:
import pyvips
except OSError:
pyvips = False


DICOM_WSI_STORAGE_ID = "1.2.840.10008.5.1.4.1.1.77.1.6"


Expand Down Expand Up @@ -263,6 +253,8 @@ def _load_with_tiff(
def _load_with_openslide(
*, gc_file: GrandChallengeTiffFile
) -> GrandChallengeTiffFile:
import openslide

open_slide_file = openslide.open_slide(str(gc_file.path.absolute()))
gc_file = _extract_openslide_properties(
gc_file=gc_file, image=open_slide_file
Expand Down Expand Up @@ -320,7 +312,6 @@ def _convert(
*,
files: list[Path],
associated_files_getter: Callable[[Path], list[Path]] | None,
converter,
output_directory: Path,
file_errors: dict[Path, list[str]],
) -> list[GrandChallengeTiffFile]:
Expand All @@ -334,12 +325,13 @@ def _convert(
if associated_files_getter:
associated_files = associated_files_getter(gc_file.path)

tiff_file = _convert_to_tiff(
path=file,
pk=gc_file.pk,
converter=converter,
output_directory=output_directory,
)
with ProcessPoolExecutor(max_workers=1) as executor:
tiff_file = executor.submit(
_convert_to_tiff,
path=file,
pk=gc_file.pk,
output_directory=output_directory,
).result()
except Exception as e:
file_errors[file].append(
format_error(
Expand All @@ -356,30 +348,24 @@ def _convert(
return converted_files


def _convert_to_tiff(
*, path: Path, pk: UUID, converter, output_directory: Path
) -> Path:
def _convert_to_tiff(*, path: Path, pk: UUID, output_directory: Path) -> Path:
import pyvips

major = pyvips.base.version(0)
minor = pyvips.base.version(1)

if not (major > 8 or (major == 8 and minor >= 10)):
raise RuntimeError(
f"libvips {major}.{minor} is too old - requires >= 8.10"
)

new_file_name = output_directory / path.name / f"{pk}.tif"
new_file_name.parent.mkdir()

image = converter.Image.new_from_file(
image = pyvips.Image.new_from_file(
str(path.absolute()), access="sequential"
)

using_old_vips = (
pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10
)
if (
using_old_vips
and image.get("xres") == 1
and "openslide.mpp-x" in image.get_fields()
):
# correct xres and yres if they have default value of 1
# due to a bug that is resolved in pyvips 8.10
x_res = 1000.0 / float(image.get("openslide.mpp-x"))
y_res = 1000.0 / float(image.get("openslide.mpp-y"))
image = image.copy(xres=x_res, yres=y_res)

image.write_to_file(
str(new_file_name.absolute()),
tile=True,
Expand Down Expand Up @@ -470,7 +456,6 @@ def associated_files(file_path: Path):
def _load_gc_files(
*,
files: set[Path],
converter,
output_directory: Path,
file_errors: DefaultDict[Path, list[str]],
) -> list[GrandChallengeTiffFile]:
Expand Down Expand Up @@ -503,7 +488,6 @@ def _load_gc_files(
converted_files = _convert(
files=complex_files,
associated_files_getter=handler,
converter=converter,
output_directory=output_directory,
file_errors=file_errors,
)
Expand All @@ -525,26 +509,11 @@ def _load_gc_files(
def image_builder_tiff( # noqa: C901
*, files: set[Path]
) -> Iterator[TIFFImage]:
if openslide is False:
raise ImportError(
f"Could not import openslide, which is required for the "
f"{__name__} image builder. Either ensure that libopenslide-dev "
f"is installed or remove {__name__} from your list of builders."
)

if pyvips is False:
raise ImportError(
f"Could not import pyvips, which is required for the "
f"{__name__} image builder. Either ensure that libvips-dev "
f"is installed or remove {__name__} from your list of builders."
)

file_errors: DefaultDict[Path, list[str]] = defaultdict(list)

with TemporaryDirectory() as output_directory:
loaded_files = _load_gc_files(
files=files,
converter=pyvips,
output_directory=Path(output_directory),
file_errors=file_errors,
)
Expand Down
21 changes: 7 additions & 14 deletions panimg/post_processors/tiff_to_dzi.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
import logging
from concurrent.futures import ProcessPoolExecutor

from panimg.models import ImageType, PanImgFile, PostProcessorResult
from panimg.settings import DZI_TILE_SIZE

try:
import pyvips
except OSError:
pyvips = False

logger = logging.getLogger(__name__)


def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult:
if pyvips is False:
raise ImportError(
f"Could not import pyvips, which is required for the "
f"{__name__} post processor. Either ensure that libvips-dev "
f"is installed or remove {__name__} from your list of post "
f"processors."
)

new_image_files: set[PanImgFile] = set()

for file in image_files:
if file.image_type == ImageType.TIFF:
try:
result = _create_dzi_image(tiff_file=file)
with ProcessPoolExecutor(max_workers=1) as executor:
result = executor.submit(
_create_dzi_image, tiff_file=file
).result()
except Exception as e:
logger.warning(f"Could not create DZI for {file}: {e}")
continue
Expand All @@ -36,6 +27,8 @@ def tiff_to_dzi(*, image_files: set[PanImgFile]) -> PostProcessorResult:


def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult:
import pyvips

# Creates a dzi file and corresponding tiles in directory {pk}_files
dzi_output = tiff_file.file.parent / str(tiff_file.image_id)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "panimg"
version = "0.15.0"
version = "0.15.1"
description = "Conversion of medical images to MHA and TIFF."
license = "Apache-2.0"
authors = ["James Meakin <[email protected]>"]
Expand Down
132 changes: 91 additions & 41 deletions tests/test_tiff.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
import os
import shutil
from collections import defaultdict
from pathlib import Path
from unittest.mock import MagicMock, Mock
from uuid import uuid4

import pytest
import pyvips
import tifffile as tiff_lib
from pytest import approx
from tifffile import tifffile

from panimg.exceptions import ValidationError
from panimg.image_builders.tiff import (
GrandChallengeTiffFile,
_convert,
_extract_tags,
_get_color_space,
_get_mrxs_files,
_load_with_openslide,
_load_with_tiff,
image_builder_tiff,
Expand Down Expand Up @@ -302,41 +297,6 @@ def test_image_builder_tiff(tmpdir_factory):
assert len(image_builder_result.new_image_files) == 3


def test_handle_complex_files(tmpdir_factory):
# Copy resource files to writable temp directory
temp_dir = Path(tmpdir_factory.mktemp("temp") / "resources")
shutil.copytree(RESOURCE_PATH / "complex_tiff", temp_dir)
files = [Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]]

# set up mock object to mock pyvips
properties = {
"xres": 1,
"yres": 1,
"openslide.mpp-x": 0.2525,
"openslide.mpp-y": 0.2525,
}

mock_converter = MagicMock(pyvips)
mock_image = mock_converter.Image.new_from_file.return_value
mock_image.get = Mock(return_value=1)
mock_image.get_fields = Mock(return_value=properties)

_convert(
files=files,
associated_files_getter=_get_mrxs_files,
converter=mock_converter,
output_directory=Path(tmpdir_factory.mktemp("output")),
file_errors=defaultdict(list),
)

if pyvips.base.version(0) == 8 and pyvips.base.version(1) < 10:
# work-around calculation of xres and yres in _convert_to_tiff function
mock_image.copy.assert_called()
assert "xres" in mock_image.copy.call_args[1]
else:
mock_image.copy.assert_not_called()


@pytest.mark.xfail(
reason="skip for now as we don't want to upload a large testset"
)
Expand Down Expand Up @@ -381,4 +341,94 @@ def test_error_handling(tmpdir_factory):
output_directory=Path(tmpdir_factory.mktemp("output")),
)

assert len(image_builder_result.file_errors) == 14
output = {k.name: v for k, v in image_builder_result.file_errors.items()}

assert output == {
"Mirax2-Fluorescence-1.mrxs": [
"TIFF image builder: Could not convert file to TIFF: "
"Mirax2-Fluorescence-1.mrxs, "
"error:unable to call VipsForeignLoadOpenslideFile\n "
"openslide2vips: opening slide: "
"Index.dat doesn't have expected version\n",
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.vms": [
"TIFF image builder: Could not convert file to TIFF: "
"CMU-1-40x - 2010-01-12 13.24.05.vms, "
"error:unable to call VipsForeignLoadOpenslideFile\n "
"openslide2vips: opening slide: Can't validate JPEG 0: "
"No restart markers\n",
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0001.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Index.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(0,1).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05_map2.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Slidedat.ini": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05_macro.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.opt": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(1,0).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0000.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05.jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"CMU-1-40x - 2010-01-12 13.24.05(1,1).jpg": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
"Data0002.dat": [
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: "
"Not a valid tif: Image width could not be determined.",
],
}
Loading