Merge pull request #53 from Fyaushev/got-rid-of-mdai

got rid of the mdai library
neuro-ml · Aug 4, 2023 · 8259083 · 8259083
2 parents 6372a65 + 5ca16f3
commit 8259083
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 5 deletions.
diff --git a/amid/__version__.py b/amid/__version__.py
@@ -1 +1 @@
-__version__ = '0.12.4'
+__version__ = '0.12.5'
diff --git a/amid/midrc.py b/amid/midrc.py
@@ -1,10 +1,10 @@
+import json
 import os.path
 import warnings
 from functools import lru_cache
 from pathlib import Path
 from typing import Tuple
 
-import mdai
 import numpy as np
 import pandas as pd
 import pydicom
@@ -106,8 +106,7 @@ def ids(_joined):
 
     def _annotation(_base):
         json_path = 'MIDRC-RICORD-1a_annotations_labelgroup_all_2020-Dec-8.json'
-        # TODO: do we really need a whole lib to parse one json??? it also generates annoying pandas warning
-        return mdai.common_utils.json_to_dataframe(_base / json_path)['annotations']
+        return json_to_dataframe(_base / json_path)['annotations']
 
     def _series(i, _base, _joined):
         sub = _joined[_joined.SeriesInstanceUID == i]
@@ -179,3 +178,106 @@ def mask(i, image_meta: Output, _annotation, _pathologies):
             ys, xs = np.array(row['data']['vertices']).T
             mask[(pathology_index, *polygon(ys, xs, shape[:2]), slice_index)] = True
         return mask
+
+
+# TODO: simplify
+def json_to_dataframe(json_file, datasets=None):
+    if datasets is None:
+        datasets = []
+    with open(json_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    a = pd.DataFrame([])
+    studies = pd.DataFrame([])
+    labels = None
+
+    # Gets annotations for all datasets
+    for d in data['datasets']:
+        if d['id'] in datasets or len(datasets) == 0:
+            study = pd.DataFrame(d['studies'])
+            study['dataset'] = d['name']
+            study['datasetId'] = d['id']
+            studies = pd.concat([studies, study], ignore_index=True, sort=False)
+
+            annots = pd.DataFrame(d['annotations'])
+            annots['dataset'] = d['name']
+            a = pd.concat([a, annots], ignore_index=True, sort=False)
+
+    if len(studies) > 0:
+        studies = studies[['StudyInstanceUID', 'dataset', 'datasetId', 'number']]
+    g = pd.DataFrame(data['labelGroups'])
+    # unpack arrays
+    result = pd.DataFrame([(d, tup.id, tup.name) for tup in g.itertuples() for d in tup.labels])
+    if len(result) > 0:
+        result.columns = ['labels', 'labelGroupId', 'labelGroupName']
+
+        def unpack_dictionary(df, column):
+            ret = None
+            ret = pd.concat([df, pd.DataFrame((d for idx, d in df[column].items()))], axis=1, sort=False)
+            del ret[column]
+            return ret
+
+        labels = unpack_dictionary(result, 'labels')
+        if 'parentId' in labels.columns:
+            labels = labels[
+                [
+                    'labelGroupId',
+                    'labelGroupName',
+                    'annotationMode',
+                    'color',
+                    'description',
+                    'id',
+                    'name',
+                    'radlexTagIds',
+                    'scope',
+                    'parentId',
+                ]
+            ]
+            labels.columns = [
+                'labelGroupId',
+                'labelGroupName',
+                'annotationMode',
+                'color',
+                'description',
+                'labelId',
+                'labelName',
+                'radlexTagIdsLabel',
+                'scope',
+                'parentLabelId',
+            ]
+        else:
+            labels = labels[
+                [
+                    'labelGroupId',
+                    'labelGroupName',
+                    'annotationMode',
+                    'color',
+                    'description',
+                    'id',
+                    'name',
+                    'radlexTagIds',
+                    'scope',
+                ]
+            ]
+            labels.columns = [
+                'labelGroupId',
+                'labelGroupName',
+                'annotationMode',
+                'color',
+                'description',
+                'labelId',
+                'labelName',
+                'radlexTagIdsLabel',
+                'scope',
+            ]
+
+        if len(a) > 0:
+            a = a.merge(labels, on=['labelId'], sort=False)
+    if len(studies) > 0 and len(a) > 0:
+        a = a.merge(studies, on=['StudyInstanceUID', 'dataset'], sort=False)
+        # Format data
+        studies.number = studies.number.astype(int)
+        a.number = a.number.astype(int)
+        a.loc.createdAt = pd.to_datetime(a.createdAt)
+        a.loc.updatedAt = pd.to_datetime(a.updatedAt)
+    return {'annotations': a, 'studies': studies, 'labels': labels}
diff --git a/requirements.txt b/requirements.txt
@@ -10,7 +10,6 @@ pandas
 pylidc
 pyyaml
 requests
-mdai
 joblib
 deli<1.0.0
 typer<1.0.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,7 +10,6 @@ pandas @@
     pylidc
     pyyaml
     requests
-    mdai
     joblib
     deli<1.0.0
     typer<1.0.0
@@ Expand Down @@