Skip to content

Commit

Permalink
Fixes and test for ImageFileReader
Browse files Browse the repository at this point in the history
  • Loading branch information
CPBridge committed Jan 9, 2025
1 parent 8c2c1e0 commit f60e013
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 11 deletions.
44 changes: 33 additions & 11 deletions src/highdicom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
read_partial
)
from pydicom.tag import TupleTag, ItemTag, SequenceDelimiterTag
from pydicom.uid import UID
from pydicom.uid import UID, DeflatedExplicitVRLittleEndian

from highdicom.frame import decode_frame
from highdicom.color import ColorManager
Expand Down Expand Up @@ -159,8 +159,13 @@ def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
"""
initial_position = fp.tell()
offset_values = []
current_offset = 0

# We will keep two lists, one of all fragment boundaries (regardless of
# whether or not they are frame boundaries) and the other of just those
# frament boundaries that are known to be frame boundaries (as identified
# by JPEG start markers).
frame_offset_values = []
fragment_offset_values = []
i = 0
while True:
frame_position = fp.tell()
Expand All @@ -187,26 +192,33 @@ def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
f'Length of Frame item #{i} is zero.'
)

first_two_bytes = fp.read(2)
if not fp.is_little_endian:
first_two_bytes = first_two_bytes[::-1]
current_offset = frame_position - initial_position
fragment_offset_values.append(current_offset)

# In case of fragmentation, we only want to get the offsets to the
# first fragment of a given frame. We can identify those based on the
# JPEG and JPEG 2000 markers that should be found at the beginning and
# end of the compressed byte stream.
first_two_bytes = fp.read(2)
if not fp.is_little_endian:
first_two_bytes = first_two_bytes[::-1]

if first_two_bytes in _START_MARKERS:
current_offset = frame_position - initial_position
offset_values.append(current_offset)
frame_offset_values.append(current_offset)

i += 1
fp.seek(length - 2, 1) # minus the first two bytes

if len(offset_values) != number_of_frames:
if len(frame_offset_values) == number_of_frames:
basic_offset_table = frame_offset_values
elif len(fragment_offset_values) == number_of_frames:
# This covers RLE and others that have no frame markers but have a
# single fragment per frame
basic_offset_table = fragment_offset_values
else:
raise ValueError(
'Number of frame items does not match specified Number of Frames.'
)
else:
basic_offset_table = offset_values

fp.seek(initial_position, 0)
return basic_offset_table
Expand Down Expand Up @@ -426,6 +438,16 @@ def _read_metadata(self) -> None:
self._metadata = Dataset(metadata)

self._pixel_data_offset = self._fp.tell()

if self.transfer_syntax_uid == DeflatedExplicitVRLittleEndian:
# The entire file is compressed with DEFLATE. These cannot be used
# since the entire file must be decompressed to read or build the
# basic/extended offset
raise ValueError(
'Deflated transfer syntaxes cannot be used with the '
'ImageFileReader.'
)

# Determine whether dataset contains a Pixel Data element
try:
tag = TupleTag(self._fp.read_tag())
Expand Down
74 changes: 74 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from pydicom import dcmread
from pydicom.data import get_testdata_file
from pydicom.filebase import DicomBytesIO, DicomFileLike
import pytest

from highdicom.io import ImageFileReader
from tests.utils import find_readable_images


class TestImageFileReader(unittest.TestCase):
Expand Down Expand Up @@ -212,3 +214,75 @@ def test_read_single_frame_ct_image_dicom_file_like_opened(self):
reader.metadata.Columns,
)
np.testing.assert_array_equal(frame, pixel_array)

def test_read_rle_no_bot(self):
# This image is RLE compressed but has no BOT, requiring searching
# through the pixel data for delimiter tags
filename = Path(get_testdata_file('rtdose_rle.dcm'))

dataset = dcmread(filename)
pixel_array = dataset.pixel_array
with ImageFileReader(filename) as reader:
assert reader.number_of_frames == 15
for f in range(reader.number_of_frames):
frame = reader.read_frame(f, correct_color=False)
assert isinstance(frame, np.ndarray)
assert frame.ndim == 2
assert frame.dtype == np.uint32
assert frame.shape == (
reader.metadata.Rows,
reader.metadata.Columns,
)
np.testing.assert_array_equal(frame, pixel_array[f])

def test_disallow_deflated_dataset(self):
# Files with a deflated transfer
msg = (
'Deflated transfer syntaxes cannot be used with the '
'ImageFileReader.'
)
filename = get_testdata_file('image_dfl.dcm')

with pytest.raises(ValueError, match=msg):
with ImageFileReader(filename) as reader:
reader.read_frame(1)


@pytest.mark.parametrize(
'filename',
find_readable_images(),
)
def test_all_images(filename):
dataset = dcmread(filename)
pixel_array = dataset.pixel_array

is_color = dataset.SamplesPerPixel == 3
number_of_frames = dataset.get('NumberOfFrames', 1)
is_multiframe = number_of_frames > 1

if is_color:
ndim = 3
shape = (
dataset.Rows,
dataset.Columns,
3
)
else:
ndim = 2
shape = (
dataset.Rows,
dataset.Columns,
)

with ImageFileReader(filename) as reader:
assert reader.number_of_frames == number_of_frames
for f in range(reader.number_of_frames):
frame = reader.read_frame(f, correct_color=False)
assert isinstance(frame, np.ndarray)
assert frame.ndim == ndim
assert frame.dtype == pixel_array.dtype
assert frame.shape == shape
expected_frame = (
pixel_array[f] if is_multiframe else pixel_array
)
np.testing.assert_array_equal(frame, expected_frame)
68 changes: 68 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
from io import BytesIO

from pathlib import Path
from pydicom.data import get_testdata_files
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.filereader import dcmread


from highdicom._module_utils import (
does_iod_have_pixel_data,
)


def write_and_read_dataset(dataset: Dataset):
"""Write DICOM dataset to buffer and read it back from buffer."""
clone = Dataset(dataset)
Expand All @@ -21,3 +28,64 @@ def write_and_read_dataset(dataset: Dataset):
little_endian=little_endian,
)
return dcmread(fp, force=True)


def find_readable_images() -> list[str]:
"""Get a list of all images in highdicom and pydicom test data that should
be expected to work with image reading routines.
"""
# All pydicom test files
all_files = get_testdata_files()

# Add highdicom test files
file_path = Path(__file__)
data_dir = file_path.parent.parent.joinpath('data/test_files')
hd_files = [str(f) for f in data_dir.glob("*.dcm")]

all_files.extend(hd_files)

# Various files are not expected to work and should be excluded
exclusions = [
"badVR.dcm", # cannot be read due to bad VFR
"MR_truncated.dcm", # pixel data is truncated
"liver_1frame.dcm", # missing number of frames
"JPEG2000-embedded-sequence-delimiter.dcm", # pydicom cannot decode pixels
"image_dfl.dcm", # deflated transfer syntax cannot be read lazily
"JPEG-lossy.dcm", # pydicom cannot decode pixels
"TINY_ALPHA", # no pixels
"SC_rgb_jpeg.dcm", # messed up transder syntax
]

files_to_use = []

for f in all_files:
try:
# Skip image files that can't even be opened (the test files
# include some deliberately corrupted files)
dcm = dcmread(f)
except:
continue

excluded = False
if 'SOPClassUID' not in dcm:
# Some are missing this...
continue
if not does_iod_have_pixel_data(dcm.SOPClassUID):
# Exclude non images
continue
if not dcm.file_meta.TransferSyntaxUID.is_little_endian:
# We don't support little endian
continue

for exc in exclusions:
if exc in f:
excluded = True
break

if excluded:
continue

files_to_use.append(f)

return files_to_use

0 comments on commit f60e013

Please # to comment.