Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Handle sample numbers > 2**31 in annotation files #328

Merged
merged 8 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added sample-data/huge.qrs
Binary file not shown.
36 changes: 35 additions & 1 deletion tests/test_annotation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import re
import unittest

import numpy as np

import wfdb

class test_annotation():

class TestAnnotation(unittest.TestCase):
"""
Testing read and write of WFDB annotations, including Physionet
streaming.
Expand Down Expand Up @@ -183,3 +186,34 @@ def test_3(self):
assert (comp == [True] * 6)
assert annotation.__eq__(pn_annotation)
assert annotation.__eq__(write_annotation)

def test_4(self):
"""
Read and write annotations with large time skips

Annotation file created by:
echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs
"""
annotation = wfdb.rdann('sample-data/huge', 'qrs')
self.assertEqual(annotation.sample[0], 10000000000)
annotation.wrann()

annotation1 = wfdb.rdann('sample-data/huge', 'qrs')
annotation2 = wfdb.rdann('huge', 'qrs')
self.assertEqual(annotation1, annotation2)

@classmethod
def tearDownClass(cls):
writefiles = [
'100.atr',
'1003.atr',
'12726.anI',
'huge.qrs',
]
for file in writefiles:
if os.path.isfile(file):
os.remove(file)


if __name__ == '__main__':
unittest.main()
6 changes: 3 additions & 3 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,9 +521,9 @@ def test_header_with_non_utf8(self):
@classmethod
def tearDownClass(cls):
"Clean up written files"
writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat',
'100.hea','1003.atr','100_3chan.dat','100_3chan.hea',
'12726.anI','a103l.hea','a103l.mat','s0010_re.dat',
writefiles = ['03700181.dat','03700181.hea','100.dat',
'100.hea','100_3chan.dat','100_3chan.hea',
'a103l.hea','a103l.mat','s0010_re.dat',
's0010_re.hea','s0010_re.xyz','test01_00s.dat',
'test01_00s.hea','test01_00s_skewframe.hea',
'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea']
Expand Down
89 changes: 49 additions & 40 deletions wfdb/io/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,6 @@ def check_field(self, field):
raise ValueError("The 'sample' field must only contain non-negative integers")
if min(sampdiffs) < 0 :
raise ValueError("The 'sample' field must contain monotonically increasing sample numbers")
if max(sampdiffs) > 2147483648:
raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31')

elif field == 'label_store':
if min(item) < 1 or max(item) > 49:
Expand Down Expand Up @@ -1370,19 +1368,30 @@ def field2bytes(field, value):
# sample difference
sd = value[0]

# Add SKIP element if value is too large for single byte
if sd>1023:
# 8 bytes in total:
# - [0, 59>>2] indicates SKIP
# - Next 4 gives sample difference
# - Final 2 give 0 and sym
data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode]
# Just need samp and sym
else:
# - First byte stores low 8 bits of samp
# - Second byte stores high 2 bits of samp
# and sym
data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode]
data_bytes = []

# Add SKIP element(s) if the sample difference is too large to
# be stored in the annotation type word.
#
# Each SKIP element consists of three words (6 bytes):
# - Bytes 0-1 contain the SKIP indicator (59 << 10)
# - Bytes 2-3 contain the high 16 bits of the sample difference
# - Bytes 4-5 contain the low 16 bits of the sample difference
# If the total difference exceeds 2**31 - 1, multiple skips must
# be used.
while sd > 1023:
n = min(sd, 0x7fffffff)
data_bytes += [0, 59 << 2,
(n >> 16) & 255,
(n >> 24) & 255,
(n >> 0) & 255,
(n >> 8) & 255]
sd -= n

# Annotation type itself is stored as a single word:
# - bits 0 to 9 store the sample difference (0 to 1023)
# - bits 10 to 15 store the type code
data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]

elif field == 'num':
# First byte stores num
Expand Down Expand Up @@ -1653,8 +1662,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False,
subtype, chan, num, aux_note)

# Convert lists to numpy arrays dtype='int'
(sample, label_store, subtype,
chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num)
(label_store, subtype,
chan, num) = lists_to_int_arrays(label_store, subtype, chan, num)

# Convert sample numbers to a numpy array of 'int64'
sample = np.array(sample, dtype='int64')

# Try to get fs from the header file if it is not contained in the
# annotation file
Expand Down Expand Up @@ -1748,8 +1760,8 @@ def load_byte_pairs(record_name, extension, pn_dir):

Returns
-------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.

"""
# local file
Expand All @@ -1769,8 +1781,8 @@ def proc_ann_bytes(filebytes, sampto):

Parameters
----------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.
sampto : int
The maximum sample number for annotations to be returned.

Expand Down Expand Up @@ -1852,8 +1864,8 @@ def proc_core_fields(filebytes, bpi):

Parameters
----------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.
bpi : int
The index to start the conversion.

Expand All @@ -1869,31 +1881,28 @@ def proc_core_fields(filebytes, bpi):
The index to start the conversion.

"""
label_store = filebytes[bpi, 1] >> 2
sample_diff = 0

# The current byte pair will contain either the actual d_sample + annotation store value,
# or 0 + SKIP.

# Not a skip - it is the actual sample number + annotation type store value
if label_store != 59:
sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
bpi = bpi + 1
# Skip. Note: Could there be another skip after the first?
else:
while filebytes[bpi, 1] >> 2 == 59:
# 4 bytes storing dt
sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
+ filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
skip_diff = ((int(filebytes[bpi + 1, 0]) << 16)
+ (int(filebytes[bpi + 1, 1]) << 24)
+ (int(filebytes[bpi + 2, 0]) << 0)
+ (int(filebytes[bpi + 2, 1]) << 8))

# Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
if sample_diff > 2147483647:
sample_diff = sample_diff - 4294967296
if skip_diff > 2147483647:
skip_diff = skip_diff - 4294967296

# After the 4 bytes, the next pair's samp is also added
sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3)
sample_diff += skip_diff
bpi = bpi + 3

# The label is stored after the 4 bytes. Samples here should be 0.
label_store = filebytes[bpi + 3, 1] >> 2
bpi = bpi + 4
# Not a skip - it is the actual sample number + annotation type store value
label_store = filebytes[bpi, 1] >> 2
sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3))
bpi = bpi + 1

return sample_diff, label_store, bpi

Expand Down