Skip to content

Commit

Permalink
Merge pull request #53 from Fyaushev/got-rid-of-mdai
Browse files Browse the repository at this point in the history
got rid of the mdai library
  • Loading branch information
maxme1 authored Aug 4, 2023
2 parents 6372a65 + 5ca16f3 commit 8259083
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 5 deletions.
2 changes: 1 addition & 1 deletion amid/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.12.4'
__version__ = '0.12.5'
108 changes: 105 additions & 3 deletions amid/midrc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import json
import os.path
import warnings
from functools import lru_cache
from pathlib import Path
from typing import Tuple

import mdai
import numpy as np
import pandas as pd
import pydicom
Expand Down Expand Up @@ -106,8 +106,7 @@ def ids(_joined):

def _annotation(_base):
json_path = 'MIDRC-RICORD-1a_annotations_labelgroup_all_2020-Dec-8.json'
# TODO: do we really need a whole lib to parse one json??? it also generates annoying pandas warning
return mdai.common_utils.json_to_dataframe(_base / json_path)['annotations']
return json_to_dataframe(_base / json_path)['annotations']

def _series(i, _base, _joined):
sub = _joined[_joined.SeriesInstanceUID == i]
Expand Down Expand Up @@ -179,3 +178,106 @@ def mask(i, image_meta: Output, _annotation, _pathologies):
ys, xs = np.array(row['data']['vertices']).T
mask[(pathology_index, *polygon(ys, xs, shape[:2]), slice_index)] = True
return mask


# TODO: simplify
def json_to_dataframe(json_file, datasets=None):
if datasets is None:
datasets = []
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)

a = pd.DataFrame([])
studies = pd.DataFrame([])
labels = None

# Gets annotations for all datasets
for d in data['datasets']:
if d['id'] in datasets or len(datasets) == 0:
study = pd.DataFrame(d['studies'])
study['dataset'] = d['name']
study['datasetId'] = d['id']
studies = pd.concat([studies, study], ignore_index=True, sort=False)

annots = pd.DataFrame(d['annotations'])
annots['dataset'] = d['name']
a = pd.concat([a, annots], ignore_index=True, sort=False)

if len(studies) > 0:
studies = studies[['StudyInstanceUID', 'dataset', 'datasetId', 'number']]
g = pd.DataFrame(data['labelGroups'])
# unpack arrays
result = pd.DataFrame([(d, tup.id, tup.name) for tup in g.itertuples() for d in tup.labels])
if len(result) > 0:
result.columns = ['labels', 'labelGroupId', 'labelGroupName']

def unpack_dictionary(df, column):
ret = None
ret = pd.concat([df, pd.DataFrame((d for idx, d in df[column].items()))], axis=1, sort=False)
del ret[column]
return ret

labels = unpack_dictionary(result, 'labels')
if 'parentId' in labels.columns:
labels = labels[
[
'labelGroupId',
'labelGroupName',
'annotationMode',
'color',
'description',
'id',
'name',
'radlexTagIds',
'scope',
'parentId',
]
]
labels.columns = [
'labelGroupId',
'labelGroupName',
'annotationMode',
'color',
'description',
'labelId',
'labelName',
'radlexTagIdsLabel',
'scope',
'parentLabelId',
]
else:
labels = labels[
[
'labelGroupId',
'labelGroupName',
'annotationMode',
'color',
'description',
'id',
'name',
'radlexTagIds',
'scope',
]
]
labels.columns = [
'labelGroupId',
'labelGroupName',
'annotationMode',
'color',
'description',
'labelId',
'labelName',
'radlexTagIdsLabel',
'scope',
]

if len(a) > 0:
a = a.merge(labels, on=['labelId'], sort=False)
if len(studies) > 0 and len(a) > 0:
a = a.merge(studies, on=['StudyInstanceUID', 'dataset'], sort=False)
# Format data
studies.number = studies.number.astype(int)
a.number = a.number.astype(int)
a.loc.createdAt = pd.to_datetime(a.createdAt)
a.loc.updatedAt = pd.to_datetime(a.updatedAt)
return {'annotations': a, 'studies': studies, 'labels': labels}
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ pandas
pylidc
pyyaml
requests
mdai
joblib
deli<1.0.0
typer<1.0.0
Expand Down

0 comments on commit 8259083

Please # to comment.