ME-ICA · oesteban · Nov 11, 2020 · Nov 23, 2021 · Nov 23, 2021 · Nov 23, 2021
diff --git a/aroma/aroma.py b/aroma/aroma.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pandas as pd
 
-from aroma import _version, features, utils
+from aroma import _version, classification, features, io, utils
 
 LGR = logging.getLogger(__name__)
 
@@ -77,25 +77,22 @@ def aroma_workflow(
         )
         return
     elif op.isdir(out_dir) and overwrite:
-        LGR.warning(
-            "Output directory {} exists and will be overwritten."
-            "\n".format(out_dir)
-        )
+        LGR.warning("Output directory {} exists and will be overwritten.\n".format(out_dir))
         shutil.rmtree(out_dir)
         os.makedirs(out_dir)
     else:
         os.makedirs(out_dir)
 
     # Create logfile name
-    basename = 'aroma_'
-    extension = 'tsv'
-    isotime = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S')
-    logname = os.path.join(out_dir, (basename + isotime + '.' + extension))
+    basename = "aroma_"
+    extension = "tsv"
+    isotime = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S")
+    logname = os.path.join(out_dir, (basename + isotime + "." + extension))
 
     # Set logging format
     log_formatter = logging.Formatter(
-        '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
-        datefmt='%Y-%m-%dT%H:%M:%S')
+        "%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s", datefmt="%Y-%m-%dT%H:%M:%S"
+    )
 
     # Set up logging file and open it for writing
     log_handler = logging.FileHandler(logname)
@@ -104,20 +101,22 @@ def aroma_workflow(
 
     # add logger mode options
     if quiet:
-        logging.basicConfig(level=logging.WARNING,
-                            handlers=[log_handler, sh],
-                            format='%(levelname)-10s %(message)s')
+        logging.basicConfig(
+            level=logging.WARNING,
+            handlers=[log_handler, sh],
+            format="%(levelname)-10s %(message)s",
+        )
     elif debug:
-        logging.basicConfig(level=logging.DEBUG,
-                            handlers=[log_handler, sh],
-                            format='%(levelname)-10s %(message)s')
+        logging.basicConfig(
+            level=logging.DEBUG, handlers=[log_handler, sh], format="%(levelname)-10s %(message)s"
+        )
     else:
-        logging.basicConfig(level=logging.INFO,
-                            handlers=[log_handler, sh],
-                            format='%(levelname)-10s %(message)s')
+        logging.basicConfig(
+            level=logging.INFO, handlers=[log_handler, sh], format="%(levelname)-10s %(message)s"
+        )
 
-    version_number = _version.get_versions()['version']
-    LGR.info(f'Currently running ICA-AROMA version {version_number}')
+    version_number = _version.get_versions()["version"]
+    LGR.info(f"Currently running ICA-AROMA version {version_number}")
 
     # Check if the type of denoising is correctly specified, when specified
     if den_type not in ("nonaggr", "aggr", "both", "no"):
@@ -135,11 +134,7 @@ def aroma_workflow(
 
     # Check TR
     if TR == 1:
-        LGR.warning(
-            "Please check whether the determined TR (of "
-            + str(TR)
-            + "s) is correct!\n"
-        )
+        LGR.warning("Please check whether the determined TR (of " + str(TR) + "s) is correct!\n")
     elif TR == 0:
         raise Exception(
             "TR is zero. ICA-AROMA requires a valid TR and will therefore "
@@ -170,7 +165,7 @@ def aroma_workflow(
     (
         features_df["edge_fract"],
         features_df["csf_fract"],
-        metric_metadata
+        metric_metadata,
     ) = features.feature_spatial(component_maps, metric_metadata)
 
     LGR.info("  - extracting the Maximum RP correlation feature")
@@ -190,17 +185,19 @@ def aroma_workflow(
     )
 
     LGR.info("  - classification")
-    features_df, metric_metadata = utils.classification(features_df, metric_metadata)
-    motion_ICs = utils.write_metrics(features_df, out_dir, metric_metadata)
+    classification_labels = classification.predict(features_df, metric_metadata=metric_metadata)
+    features_df["classification"] = classification_labels
+    motion_ICs = io.write_metrics(features_df, out_dir, metric_metadata)
 
     if generate_plots:
-        from . import plotting
-        plotting.classification_plot(
-            op.join(out_dir, "desc-AROMA_metrics.tsv"), out_dir
-        )
+        from aroma import plotting
+
+        plotting.classification_plot(op.join(out_dir, "desc-AROMA_metrics.tsv"), out_dir)
 
     if den_type != "no":
         LGR.info("Step 3) Data denoising")
-        utils.denoising(in_file, out_dir, mixing, den_type, motion_ICs)
+        # Index of the components that were classified as "rejected"
+        rejected_components = np.where(classification_labels == "rejected")[0]
+        utils.denoising(in_file, out_dir, mixing, den_type, rejected_components)
 
     LGR.info("Finished")
diff --git a/aroma/classification.py b/aroma/classification.py
@@ -0,0 +1,110 @@
+# CHANGES
+# -------
+# Log of changes as mandated by the original Apache 2.0 License of ICA-AROMA
+#
+#   * Drop ``runICA`` and ``register2MNI`` functions
+#   * Base ``classifier`` on Pandas, and revise signature (``predict(X)``)
+#     to make it more similar to scikit learn
+#   * Return classification labels directly on ``predict``
+#
+"""Classification functions for ICA-AROMA."""
+import logging
+
+import numpy as np
+
+LGR = logging.getLogger(__name__)
+
+# Define criteria needed for classification (thresholds and
+# hyperplane-parameters)
+THR_CSF = 0.10
+THR_HFC = 0.35
+HYPERPLANE = np.array([-19.9751070082159, 9.95127547670627, 24.8333160239175])
+
+
+def hfc_criteria(x, thr_hfc=THR_HFC):
-def hfc_criteria(x, thr_hfc=THR_HFC):
+def hfc_criterion(x, thr_hfc=THR_HFC):
-def hfc_criteria(x, thr_hfc=THR_HFC):
+def hfc_criterion(x, thr_hfc=THR_HFC):
+    """
+    Compute the HFC criteria for classification.
+
+    Parameters
+    ----------
+    x : numpy.ndarray
+        Projection of HFC feature scores to new 1D space.
+
+    Returns
+    -------
+    numpy.ndarray
+        Classification (``True`` if the component is a motion one).
+    """
+    return x > thr_hfc
+
+
+def csf_criteria(x, thr_csf=THR_CSF):
+    """
+    Compute the CSF criteria for classification.
+
+    Parameters
+    ----------
+    x : numpy.ndarray
+        Projection of CSF-fraction feature scores to new 1D space.
+
+    Returns
+    -------
+    numpy.ndarray
+        Classification (``True`` if the component is a CSF one).
+    """
+    return x > thr_csf
+
+
+def hplane_criteria(x, hplane=HYPERPLANE):
+    """
+    Compute the hyperplane criteria for classification.
+
+    Parameters
+    ----------
+    x : numpy.ndarray
+        Projection of edge & max_RP_corr feature scores to new 1D space.
+
+    Returns
+    -------
+    :obj:`pandas.DataFrame`
+        Features table with additional column "classification".
-    :obj:`pandas.DataFrame`
-        Features table with additional column "classification".
+    :obj:`numpy.ndarray`
+        Classification (``True`` if the component is a CSF one).
-    :obj:`pandas.DataFrame`
-        Features table with additional column "classification".
+    :obj:`numpy.ndarray`
+        Classification (``True`` if the component is a CSF one).
+
+    """
+    return (hplane[0] + np.dot(x, hplane[1:])) > 0
+
+
+def predict(X, thr_csf=THR_CSF, thr_hfc=THR_HFC, hplane=HYPERPLANE, metric_metadata=None):
+    """
+    Classify components as motion or non-motion based on four features.
+
+    The four features used for classification are: maximum RP correlation,
+    high-frequency content, edge-fraction, and CSF-fraction.
+
+    Parameters
+    ----------
+    X : :obj:`pandas.DataFrame`
+        Features table (C x 4), must contain the following columns:
+        "edge_fract", "csf_fract", "max_RP_corr", and "HFC".
+
+    Returns
+    -------
+    y : array_like
+        Classification (``True`` if the component is a motion one).
+    """
+    # Project edge & max_RP_corr feature scores to new 1D space
+    proj = hplane_criteria(X[["max_RP_corr", "edge_fract"]].values, hplane=hplane)
+
+    # Compute the CSF criteria
+    csf = csf_criteria(X["csf_fract"].values, thr_csf=thr_csf)
+
+    # Compute the HFC criteria
+    hfc = hfc_criteria(X["HFC"].values, thr_hfc=thr_hfc)
+
+    # Combine the criteria
+    classification = csf | hfc | proj
+
+    #  Turn classification into a list of string labels with rejected if true, accepted if false
+    classification = ["rejected" if c else "accepted" for c in classification]
+
+    # Classify the ICs
+    return classification
diff --git a/aroma/io.py b/aroma/io.py
@@ -0,0 +1,45 @@
+"""Input/output functions for the Aroma project."""
+import json
+import os.path as op
+
+
+def write_metrics(features_df, out_dir, metric_metadata=None):
+    """Write out feature/classification information and metadata.
+     Parameters
+    ----------
+    features_df : (C x 5) :obj:`pandas.DataFrame`
+        DataFrame with metric values and classifications.
+        Must have the following columns: "edge_fract", "csf_fract", "max_RP_corr", "HFC", and
+        "classification".
+    out_dir : :obj:`str`
+        Output directory.
+    metric_metadata : :obj:`dict` or None, optional
+        Metric metadata in a dictionary.
+     Returns
+    -------
+    motion_ICs : array_like
+        Array containing the indices of the components identified as motion components.
+     Output
+    ------
+    AROMAnoiseICs.csv : A text file containing the indices of the
+                        components identified as motion components
+    desc-AROMA_metrics.tsv
+    desc-AROMA_metrics.json
+    """
+    # Put the indices of motion-classified ICs in a text file (starting with 1)
+    motion_ICs = features_df["classification"][features_df["classification"] == "rejected"].index
+    motion_ICs = motion_ICs.values
+
+    with open(op.join(out_dir, "AROMAnoiseICs.csv"), "w") as fo:
+        out_str = ",".join(motion_ICs.astype(str))
+        fo.write(out_str)
+
+    # Create a summary overview of the classification
+    out_file = op.join(out_dir, "desc-AROMA_metrics.tsv")
+    features_df.to_csv(out_file, sep="\t", index_label="IC")
+
+    if isinstance(metric_metadata, dict):
+        with open(op.join(out_dir, "desc-AROMA_metrics.json"), "w") as fo:
+            json.dump(metric_metadata, fo, sort_keys=True, indent=4)
+
+    return motion_ICs
diff --git a/aroma/tests/test_classification.py b/aroma/tests/test_classification.py
@@ -0,0 +1,12 @@
+"""Tests for aroma.classification."""
+import pandas as pd
+from aroma import classification
+
+
+def test_classification(classification_overview):
+    """Test aroma.utils.classification and ensure classifications come out the same."""
+    clf_overview_df = pd.read_table(classification_overview)
+    test_df = clf_overview_df[["edge_fract", "csf_fract", "max_RP_corr", "HFC"]]
+    test_classifications = classification.predict(test_df, metric_metadata={})
+    true_classifications = clf_overview_df["classification"].tolist()
+    assert true_classifications == test_classifications
diff --git a/aroma/tests/test_utils.py b/aroma/tests/test_utils.py
@@ -1,22 +1,10 @@
 """Tests for aroma.utils."""
-import os
-
 import numpy as np
 import pandas as pd
 import pytest
 from aroma import utils
 
 
-def test_classification(classification_overview):
-    """Test aroma.utils.classification and ensure classifications come out the same."""
-    clf_overview_df = pd.read_table(classification_overview)
-    test_df = clf_overview_df[["edge_fract", "csf_fract", "max_RP_corr", "HFC"]]
-    test_df, metadata = utils.classification(test_df, {})
-    true_classifications = clf_overview_df["classification"].tolist()
-    test_classifications = test_df["classification"].tolist()
-    assert true_classifications == test_classifications
-
-
 def test_load_motpars_manual(motion_parameters):
     """Test aroma.utils.load_motpars with manual source determination."""
     fsl = utils.load_motpars(motion_parameters["FSL"], source="fsl")