Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

GCS refactor #1332

Merged
merged 2 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 1 addition & 22 deletions mimic-iv/concepts/measurement/gcs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -105,26 +105,6 @@ with base as
, EndoTrachFlag
from gcs gs
)
-- priority is:
-- (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime
, gcs_priority as
(
select
subject_id
, stay_id
, charttime
, gcs
, gcsmotor
, gcsverbal
, gcseyes
, EndoTrachFlag
, ROW_NUMBER() over
(
PARTITION BY stay_id, charttime
ORDER BY components_measured DESC, endotrachflag, gcs, charttime DESC
) as rn
from gcs_stg
)
select
gs.subject_id
, gs.stay_id
Expand All @@ -134,6 +114,5 @@ select
, GCSVerbal AS gcs_verbal
, GCSEyes AS gcs_eyes
, EndoTrachFlag AS gcs_unable
from gcs_priority gs
where rn = 1
from gcs_stg gs
;
61 changes: 61 additions & 0 deletions mimic-iv/tests/test_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,64 @@ def test_common_bg_exist(dataset, project_id):
missing_observations[c] = df.loc[0, c]

assert len(missing_observations) == 0, f'columns in bg missing data'


def test_gcs_score_calculated_correctly(dataset, project_id):
"""Verifies common blood gases occur > 50% of the time"""
# has verbal prev of 1 -> 11365767, 30015010, 2154-07-25T20:00:00
# has verbal prev of 0 -> 13182319, 30159144, 2161-06-19T00:16:00
query = f"""
SELECT g.stay_id
, g.charttime
, g.gcs
, g.gcs_motor
, g.gcs_verbal
, g.gcs_eyes
, g.gcs_unable
, gcs_v.valuenum AS gcs_verbal_numeric
, gcs_v.value AS gcs_verbal_text
FROM {dataset}.gcs g
LEFT JOIN (
SELECT stay_id, charttime, value, valuenum
FROM `physionet-data.mimiciv_icu.chartevents`
WHERE itemid = 223900 AND stay_id IN (30015010, 30159144)
) gcs_v
ON g.stay_id = gcs_v.stay_id
AND g.charttime = gcs_v.charttime
WHERE g.stay_id IN
(
30015010, -- subject_id: 11365767
30159144 -- subject_id: 13182319
)
"""
df = gbq.read_gbq(query, project_id=project_id, dialect="standard")
df = df.sort_values(['stay_id', 'charttime'])
df['charttime'] = pd.to_datetime(df['charttime'])
df['charttime_lag'] = df.groupby('stay_id')['charttime'].shift(1)
df['gcs_verbal_text_lag'] = df.groupby('stay_id')['gcs_verbal_text'].shift(1)
df['gcs_verbal_numeric_lag'] = df.groupby('stay_id')['gcs_verbal_numeric'].shift(1)

idxTime = (df['charttime'] - df['charttime_lag']).astype('timedelta64[h]') <= 6
# remove verbal value if occurring more than 6 hr later
df.loc[~idxTime, 'gcs_verbal_text_lag'] = None
df.loc[~idxTime, 'gcs_verbal_numeric_lag'] = None

# verify GCS logic:
# (1) verbal score is correctly carried forward if 0
# (2) verbal score is imputed at 5 if nothing to carry forward

# verbal score for this row is "unable"
idxETT = (df['gcs_verbal_text'] == 'No Response-ETT')
# and the previous row was not
idxETT &= (df['gcs_verbal_text_lag'] != 'No Response-ETT')

assert idxETT.sum() > 0, 'expected rows with gcs imputed, check stay_id/subject_id data'
assert (df.loc[idxETT, 'gcs_verbal'] > 0).all(), 'expected no rows with verbal of 0 when prev val available'

# verbal score for this row is "unable"
idxETT = (df['gcs_verbal_text'] == 'No Response-ETT')
# and the previous row was not
idxETT &= (df['gcs_verbal_text_lag'].isnull())

assert idxETT.sum() > 0, 'expected rows without prior GCS in 6 hours'
assert (df.loc[idxETT, 'gcs_verbal'] == 0).all(), 'found rows without imputed verbal score'