-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathwsi.py
308 lines (260 loc) · 10 KB
/
wsi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
from __future__ import annotations
import logging
from fractions import Fraction
from pathlib import Path
from typing import Protocol
import tifffile
from PIL import Image
from .errors import BackendNotAvailable
from .errors import CannotReadSpacing
from .errors import DuplicateFilePrefixesFound
from .errors import NoBackendException
logger = logging.getLogger(__name__)
_BACKEND: str = "tiffslide"
_allowed_backends = {"openslide", "tiffslide"}
try:
import openslide
# Test that OpenSlide object exists. If it doesn't, an error will be thrown and
# caught. For some reason, it is possible that openslide-python can be installed
# but the OpenSlide object (and other openslide things) are not available.
openslide.OpenSlide # noqa: B018
HAS_OPENSLIDE = True
logger.debug("Imported openslide")
except Exception as err:
HAS_OPENSLIDE = False
logger.debug(f"Unable to import openslide due to error: {err}")
try:
import tiffslide
HAS_TIFFSLIDE = True
logger.debug("Imported tiffslide")
except Exception as err:
HAS_TIFFSLIDE = False
logger.debug(f"Unable to import tiffslide due to error: {err}")
if not HAS_TIFFSLIDE and not HAS_OPENSLIDE:
raise NoBackendException(
"No backend is available. Please install openslide or tiffslide."
)
def set_backend(name: str) -> None:
global _BACKEND
if name not in _allowed_backends:
raise ValueError(f"Unknown backend: '{name}'")
if name == "openslide" and not HAS_OPENSLIDE:
raise BackendNotAvailable(
"OpenSlide is not available. Please install the OpenSlide compiled"
" library and the Python package 'openslide-python'."
" See https://openslide.org/ for more information."
)
elif name == "tiffslide":
if not HAS_TIFFSLIDE:
raise BackendNotAvailable(
"TiffSlide is not available. Please install 'tiffslide'."
)
logger.debug(f"Set backend to {name}")
_BACKEND = name
def get_wsi_cls() -> type[openslide.OpenSlide] | type[tiffslide.TiffSlide]:
if _BACKEND not in _allowed_backends:
raise ValueError(
f"Unknown backend: '{_BACKEND}'. Please contact the developer!"
)
if _BACKEND == "openslide":
return openslide.OpenSlide # type: ignore
elif _BACKEND == "tiffslide":
return tiffslide.TiffSlide
else:
raise ValueError("Contact the developer, slide backend not known")
# Set the slide backend based on the environment.
# Prioritize TiffSlide if the user has it installed.
if HAS_TIFFSLIDE:
set_backend("tiffslide")
elif HAS_OPENSLIDE:
set_backend("openslide")
else:
raise NoBackendException("No backend found! Please install openslide or tiffslide")
# For typing an object that has a method `read_region`.
class CanReadRegion(Protocol):
def read_region(
self, location: tuple[int, int], level: int, size: tuple[int, int]
) -> Image.Image:
pass
def _get_mpp_openslide(slide_path: str | Path) -> tuple[float, float]:
"""Read MPP using OpenSlide.
Parameters
----------
slide_path : str or Path
The path to the whole slide image.
Returns
-------
mppx, mppy
Two floats representing the micrometers per pixel in x and y dimensions.
Raises
------
CannotReadSpacing if spacing cannot be read from the whole slide iamge.
"""
logger.debug("Attempting to read MPP using OpenSlide")
if not HAS_OPENSLIDE:
logger.critical(
"Cannot read MPP with OpenSlide because OpenSlide is not available"
)
raise CannotReadSpacing()
slide = openslide.OpenSlide(slide_path)
mppx: float | None = None
mppy: float | None = None
if (
openslide.PROPERTY_NAME_MPP_X in slide.properties
and openslide.PROPERTY_NAME_MPP_Y in slide.properties
):
logger.debug(
"Properties of the OpenSlide object contains keys"
f" {openslide.PROPERTY_NAME_MPP_X} and {openslide.PROPERTY_NAME_MPP_Y}"
)
mppx = slide.properties[openslide.PROPERTY_NAME_MPP_X]
mppy = slide.properties[openslide.PROPERTY_NAME_MPP_Y]
logger.debug(
f"Value of {openslide.PROPERTY_NAME_MPP_X} is {mppx} and value"
f" of {openslide.PROPERTY_NAME_MPP_Y} is {mppy}"
)
if mppx is not None and mppy is not None:
try:
logger.debug("Attempting to convert these MPP strings to floats")
mppx = float(mppx)
mppy = float(mppy)
return mppx, mppy
except Exception as err:
logger.debug(f"Exception caught while converting to float: {err}")
elif (
"tiff.ResolutionUnit" in slide.properties
and "tiff.XResolution" in slide.properties
and "tiff.YResolution" in slide.properties
):
logger.debug("Attempting to read spacing using openslide and tiff tags")
resunit = slide.properties["tiff.ResolutionUnit"].lower()
if resunit not in {"millimeter", "centimeter", "cm", "inch"}:
raise CannotReadSpacing(f"unknown resolution unit: '{resunit}'")
scale = {
"inch": 25400.0,
"centimeter": 10000.0,
"cm": 10000.0,
"millimeter": 1000.0,
}.get(resunit, None)
x_resolution = float(slide.properties["tiff.XResolution"])
y_resolution = float(slide.properties["tiff.YResolution"])
if scale is not None:
try:
mpp_x = scale / x_resolution
mpp_y = scale / y_resolution
return mpp_x, mpp_y
except ArithmeticError as err:
raise CannotReadSpacing(
f"error in math {scale} / {x_resolution}"
f" or {scale} / {y_resolution}"
) from err
else:
raise CannotReadSpacing()
else:
logger.debug(
"Properties of the OpenSlide object does not contain keys"
f" {openslide.PROPERTY_NAME_MPP_X} and {openslide.PROPERTY_NAME_MPP_Y}"
)
raise CannotReadSpacing()
def _get_mpp_tiffslide(
slide_path: str | Path,
) -> tuple[float, float]:
"""Read MPP using TiffSlide."""
logger.debug("Attempting to read MPP using TiffSlide")
if not HAS_TIFFSLIDE:
logger.critical(
"Cannot read MPP with TiffSlide because TiffSlide is not available"
)
raise CannotReadSpacing()
slide = tiffslide.TiffSlide(slide_path)
mppx: float | None = None
mppy: float | None = None
if (
tiffslide.PROPERTY_NAME_MPP_X in slide.properties
and tiffslide.PROPERTY_NAME_MPP_Y in slide.properties
):
mppx = slide.properties[tiffslide.PROPERTY_NAME_MPP_X]
mppy = slide.properties[tiffslide.PROPERTY_NAME_MPP_Y]
if mppx is None or mppy is None:
raise CannotReadSpacing()
else:
try:
mppx = float(mppx)
mppy = float(mppy)
return mppx, mppy
except Exception as err:
raise CannotReadSpacing() from err
raise CannotReadSpacing()
# Modified from
# https://github.com/bayer-science-for-a-better-life/tiffslide/blob/8bea5a4c8e1429071ade6d4c40169ce153786d19/tiffslide/tiffslide.py#L712-L745
def _get_mpp_tifffile(slide_path: str | Path) -> tuple[float, float]:
"""Read MPP using Tifffile."""
logger.debug("Attempting to read MPP using tifffile")
with tifffile.TiffFile(slide_path) as tif:
series0 = tif.series[0]
page0 = series0[0]
if not isinstance(page0, tifffile.TiffPage):
raise CannotReadSpacing("not a tifffile.TiffPage instance")
try:
resolution_unit = page0.tags["ResolutionUnit"].value
x_resolution = Fraction(*page0.tags["XResolution"].value)
y_resolution = Fraction(*page0.tags["YResolution"].value)
except KeyError as err:
raise CannotReadSpacing() from err
RESUNIT = tifffile.TIFF.RESUNIT
scale = {
RESUNIT.INCH: 25400.0,
RESUNIT.CENTIMETER: 10000.0,
RESUNIT.MILLIMETER: 1000.0,
RESUNIT.MICROMETER: 1.0,
RESUNIT.NONE: None,
}.get(resolution_unit, None)
if scale is not None:
try:
mpp_x = scale / x_resolution
mpp_y = scale / y_resolution
return mpp_x, mpp_y
except ArithmeticError as err:
raise CannotReadSpacing() from err
raise CannotReadSpacing()
def get_avg_mpp(slide_path: Path | str) -> float:
"""Return the average MPP of a whole slide image.
The value is in units of micrometers per pixel and is
the average of the X and Y dimensions.
Raises
------
CannotReadSpacing if the spacing cannot be read.
"""
mppx: float
mppy: float
if _BACKEND == "openslide":
try:
mppx, mppy = _get_mpp_openslide(slide_path)
return (mppx + mppy) / 2
except CannotReadSpacing:
pass
if _BACKEND == "tiffslide":
try:
mppx, mppy = _get_mpp_tiffslide(slide_path)
return (mppx + mppy) / 2
except CannotReadSpacing:
pass
logger.debug(f"Failed to read MPP using {_BACKEND}.")
logger.debug("Trying to read MPP with tifffile as last resort.")
# If tiffslide/openslide don't work, try tifffile.
try:
mppx, mppy = _get_mpp_tifffile(slide_path)
return (mppx + mppy) / 2
except CannotReadSpacing:
pass
raise CannotReadSpacing(slide_path)
def _validate_wsi_directory(wsi_dir: str | Path) -> None:
"""Validate a directory of whole slide images."""
wsi_dir = Path(wsi_dir)
maybe_slides = [p for p in wsi_dir.iterdir() if p.is_file()]
uniq_stems = set(p.stem for p in maybe_slides)
if len(uniq_stems) != len(maybe_slides):
raise DuplicateFilePrefixesFound(
"A slide with the same prefix but different extensions has been found"
" (like slide.svs and slide.tif). Slides must have unique prefixes."
)