diff --git a/README.md b/README.md index a6630f4..ab59589 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,15 @@ # Breast cancer [![Actions Status](https://github.com/pasmopy/breast_cancer/workflows/Tests/badge.svg)](https://github.com/pasmopy/breast_cancer/actions) -Workflow for classifying breast cancer subtypes based on intracellular signaling dynamics. +This repository contains analysis code for the following paper: + + ## Requirements | Language | Dependent packages | | ------------- | -------------------------------------------------------------- | | Python >= 3.7 | See [`requirements.txt`](requirements.txt) | -| Julia >= 1.5 | [BioMASS.jl](https://github.com/biomass-dev/BioMASS.jl) | +| Julia >= 1.5 | [`BioMASS.jl==0.5.0`](https://github.com/biomass-dev/BioMASS.jl) | | R >= 4.0 | TCGAbiolinks, sva, biomaRt, edgeR, ComplexHeatmap, viridisLite | ## Table of contents @@ -171,11 +173,11 @@ Workflow for classifying breast cancer subtypes based on intracellular signaling breast_cancer_models = [] path_to_models = os.path.join("models", "breast") - for f in os.listdir(path_to_models): - if os.path.isdir(os.path.join(path_to_models, f)) and ( - f.startswith("TCGA_") or f.endswith("_BREAST") + for model in os.listdir(path_to_models): + if os.path.isdir(os.path.join(path_to_models, model)) and ( + model.startswith("TCGA_") or model.endswith("_BREAST") ): - breast_cancer_models.append(f) + breast_cancer_models.append(model) # Set optimized parameters for model in breast_cancer_models: shutil.copytree( @@ -241,11 +243,14 @@ Workflow for classifying breast cancer subtypes based on intracellular signaling - Calculate sensitivity coefficients by varying the amount of each nonzero species ```python + import os + from pasmopy import PatientModelAnalyses import models.breast + - with open ("selected_tnbc.txt" mode="r") as f: + with open (os.path.join("models", "breast", "selected_tnbc.txt"), mode="r") as f: TNBC_ID = f.read().splitlines() analyses = PatientModelAnalyses( models.breast.__package__, diff --git a/drug_response/drug/database.py b/drug_response/drug/database.py index 4a1aad4..083f80b 100644 --- a/drug_response/drug/database.py +++ b/drug_response/drug/database.py @@ -115,14 +115,60 @@ def _get_drug_response( ) return drug_response_info - def _check_args(self, drug: str, save_format: str) -> Optional[NoReturn]: + def _check_args(self, drug: str) -> Optional[NoReturn]: if drug not in set(self.drug_response_data["Compound"]): raise ValueError( f"{drug} doesn't exist in the CCLE drug response data.\n" f"Should be one of {', '.join(list(set(self.drug_response_data['Compound'])))}" ) - if save_format not in ["pdf", "png"]: - raise ValueError("save_format must be either 'pdf' of 'png'.") + + @staticmethod + def _plot_dose_response_curve( + x: List[float], + population: List[DrugResponse], + color: str, + label: str, + show_individual: bool, + error :str, + ): + if show_individual: + for i, _ in enumerate(population): + plt.plot( + x, + np.interp(x, population[i].doses, population[i].activity_data) + 100, + "o", + color=color, + ) + y_mean = np.mean( + [ + (np.interp(x, population[i].doses, population[i].activity_data) + 100) + for i, _ in enumerate(population) + ], + axis=0 + ) + y_err = np.std( + [ + (np.interp(x, population[i].doses, population[i].activity_data) + 100) + for i, _ in enumerate(population) + ], + axis=0, + ddof=1 + ) / (1 if error == "std" else np.sqrt(len(population))) + plt.plot( + x, + y_mean, + "-", + color=color, + label=label, + ) + plt.fill_between( + x, + y_mean - y_err, + y_mean + y_err, + lw=0, + color=color, + alpha=0.1, + ) def save_dose_response_curve( self, @@ -132,17 +178,13 @@ def save_dose_response_curve( config: Optional[dict] = None, show_individual: bool = False, error: str = "std", - save_format: str = "pdf", ) -> None: - self._check_args(drug, save_format) + self._check_args(drug) if error not in ["std", "sem"]: raise ValueError("error must be either 'std' or 'sem'.") os.makedirs( - os.path.join( - "dose_response", - f"{self._drug2target(drug)}", - ), + os.path.join("dose_response", f"{self._drug2target(drug)}"), exist_ok=True ) @@ -154,107 +196,36 @@ def save_dose_response_curve( elif level == "low": bottom30.append(erbb_expression_ratio.index[i]) - egfr_high = self._get_drug_response( - top30, - [drug] * len(top30), - ) - egfr_low = self._get_drug_response( - bottom30, - [drug] * len(bottom30), - ) + egfr_high = self._get_drug_response(top30, [drug] * len(top30)) + egfr_low = self._get_drug_response(bottom30, [drug] * len(bottom30)) if config is None: config = {} - config.setdefault('figure.figsize', (7, 5)) - config.setdefault('font.size', 24) - config.setdefault('font.family', 'Arial') - config.setdefault('axes.linewidth', 2.4) - config.setdefault('xtick.major.width', 2.4) - config.setdefault('ytick.major.width', 2.4) - config.setdefault('lines.linewidth', 3) + config.setdefault("figure.figsize", (7, 5)) + config.setdefault("font.size", 24) + config.setdefault("font.family", "Arial") + config.setdefault("axes.linewidth", 2.4) + config.setdefault("xtick.major.width", 2.4) + config.setdefault("ytick.major.width", 2.4) + config.setdefault("lines.linewidth", 3) config.setdefault("lines.markersize", 1) + config.setdefault("savefig.bbox", "tight") + config.setdefault("savefig.format", "pdf") + plt.rcParams.update(config) + plt.gca().spines["right"].set_visible(False) plt.gca().spines["top"].set_visible(False) DOSE = [2.50e-03, 8.00e-03, 2.50e-02, 8.00e-02, 2.50e-01, 8.00e-01, 2.53e+00, 8.00e+00] - if show_individual: - for i, _ in enumerate(egfr_high): - plt.plot( - DOSE, - np.interp(DOSE, egfr_high[i].doses, egfr_high[i].activity_data) + 100, - "o", - color="darkmagenta", - ) - y_mean = np.mean( - [ - (np.interp(DOSE, egfr_high[i].doses, egfr_high[i].activity_data) + 100) - for i, _ in enumerate(egfr_high) - ], - axis=0 + self._plot_dose_response_curve( + DOSE, egfr_high, color="darkmagenta", label="EGFR high", + show_individual=show_individual, error=error ) - y_err = np.std( - [ - (np.interp(DOSE, egfr_high[i].doses, egfr_high[i].activity_data) + 100) - for i, _ in enumerate(egfr_high) - ], - axis=0, - ddof=1 - ) / (1 if error == "std" else np.sqrt(len(egfr_high))) - plt.plot( - DOSE, - y_mean, - "-", - color="darkmagenta", - label="EGFR high", - ) - plt.fill_between( - DOSE, - y_mean - y_err, - y_mean + y_err, - lw=0, - color="darkmagenta", - alpha=0.1, - ) - - if show_individual: - for i, _ in enumerate(egfr_low): - plt.plot( - DOSE, - np.interp(DOSE, egfr_low[i].doses, egfr_low[i].activity_data) + 100, - "o", - color="goldenrod", - ) - y_mean = np.mean( - [ - (np.interp(DOSE, egfr_low[i].doses, egfr_low[i].activity_data) + 100) - for i, _ in enumerate(egfr_low) - ], - axis=0 - ) - y_err = np.std( - [ - (np.interp(DOSE, egfr_low[i].doses, egfr_low[i].activity_data) + 100) - for i, _ in enumerate(egfr_low) - ], - axis=0, - ddof=1 - ) / (1 if error == "std" else np.sqrt(len(egfr_high))) - plt.plot( - DOSE, - y_mean, - "-", - color="goldenrod", - label="EGFR low", - ) - plt.fill_between( - DOSE, - y_mean - y_err, - y_mean + y_err, - lw=0, - color="goldenrod", - alpha=0.1, + self._plot_dose_response_curve( + DOSE, egfr_low, color="goldenrod", label="EGFR low", + show_individual=show_individual, error=error ) plt.xscale("log") @@ -270,10 +241,8 @@ def save_dose_response_curve( os.path.join( "dose_response", f"{self._drug2target(drug)}", - f"{self._convert_drug_name(drug)}.{save_format}", + f"{self._convert_drug_name(drug)}", ), - dpi=1200 if save_format == "png" else None, - bbox_inches="tight", ) plt.close() @@ -284,9 +253,8 @@ def save_activity_area( drug: str, *, config: Optional[dict] = None, - save_format: str = "pdf", ) -> None: - self._check_args(drug, save_format) + self._check_args(drug) os.makedirs( os.path.join( @@ -304,33 +272,30 @@ def save_activity_area( elif level == "low": bottom30.append(erbb_expression_ratio.index[i]) - egfr_high = self._get_drug_response( - top30, - [drug]*len(top30), - ) - egfr_low = self._get_drug_response( - bottom30, - [drug]*len(bottom30), - ) - + egfr_high = self._get_drug_response(top30, [drug] * len(top30)) + egfr_low = self._get_drug_response(bottom30, [drug] * len(bottom30)) + activity_area = np.array( [ [egfr_high[i].act_area for i, _ in enumerate(egfr_high)], [egfr_low[i].act_area for i, _ in enumerate(egfr_low)], ], + dtype=object, ) if config is None: config = {} - config.setdefault('figure.figsize', (3, 5)) - config.setdefault('font.family', 'Arial') - config.setdefault('mathtext.it', 'Arial:italic') - config.setdefault('font.size', 24) - config.setdefault('axes.linewidth', 2.4) - config.setdefault('xtick.major.width', 2.4) - config.setdefault('ytick.major.width', 2.4) - config.setdefault('lines.linewidth', 1.8) - config.setdefault('lines.markersize', 11) + config.setdefault("figure.figsize", (3, 5)) + config.setdefault("font.family", "Arial") + config.setdefault("mathtext.it", "Arial:italic") + config.setdefault("font.size", 24) + config.setdefault("axes.linewidth", 2.4) + config.setdefault("xtick.major.width", 2.4) + config.setdefault("ytick.major.width", 2.4) + config.setdefault("lines.linewidth", 1.8) + config.setdefault("lines.markersize", 11) + config.setdefault("savefig.bbox", "tight") + config.setdefault("savefig.format", "pdf") plt.rcParams.update(config) @@ -357,10 +322,8 @@ def save_activity_area( os.path.join( "activity_area", f"{self._drug2target(drug)}", - f"{self._convert_drug_name(drug)}.{save_format}", + f"{self._convert_drug_name(drug)}", ), - dpi=1200 if save_format == "png" else None, - bbox_inches="tight", ) plt.close() @@ -371,17 +334,14 @@ def save_all( *, config_dose_response_curve: Optional[dict] = None, config_activity_area: Optional[dict] = None, - save_format: str = "pdf", ) -> None: self.save_dose_response_curve( erbb_expression_ratio, drug, config=config_dose_response_curve, - save_format=save_format, ) self.save_activity_area( erbb_expression_ratio, drug, config=config_activity_area, - save_format=save_format, ) diff --git a/pasmopy/patient_model.py b/pasmopy/patient_model.py index 02b3099..66c1123 100644 --- a/pasmopy/patient_model.py +++ b/pasmopy/patient_model.py @@ -3,7 +3,7 @@ import os import platform from dataclasses import dataclass, field -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, List, Optional, Union import numpy as np import pandas as pd @@ -78,9 +78,19 @@ class PatientModelSimulations(InSilico): ---------- biomass_kws : dict, optional Keyword arguments to pass to biomass.run_simulation. + response_characteristics : dict[str, Callable[[1d-array], int ot float]] + A dictionary containing functions to extract dynamic response characteristics + from time-course simulations. """ biomass_kws: Optional[dict] = field(default=None) + response_characteristics: Dict[str, Callable[[np.ndarray], Union[int, float]]] = field( + default_factory=lambda: dict( + max=np.max, + AUC=simps, + ), + init=False, + ) def _run_single_patient(self, patient: str) -> None: """ @@ -109,24 +119,6 @@ def run(self, n_proc: Optional[int] = None) -> None: n_proc = multiprocessing.cpu_count() - 1 self.parallel_execute(self._run_single_patient, n_proc) - @staticmethod - def _calc_response_characteristics( - time_course_data: np.ndarray, - metric: str, - ) -> str: - if metric.lower() == "max": - response_characteristics = np.max(time_course_data) - elif metric.lower() == "auc": - response_characteristics = simps(time_course_data) - elif metric.lower() == "droprate": - response_characteristics = (np.max(time_course_data) - time_course_data[-1]) / ( - len(time_course_data) - np.argmax(time_course_data) - ) - else: - raise ValueError("Available metrics are: 'max', 'AUC', 'droprate'.") - - return str(response_characteristics) - def _extract( self, dynamic_characteristics: Dict[str, Dict[str, List[str]]], @@ -177,9 +169,10 @@ def _extract( for h in header[1:]: condition, metric = h.split("_") patient_specific_characteristics.append( - self._calc_response_characteristics( - data[:, patient_specific.problem.conditions.index(condition)], - metric, + str( + self.response_characteristics[metric]( + data[:, patient_specific.problem.conditions.index(condition)], + ) ) ) writer = csv.writer(f, lineterminator="\n") @@ -214,6 +207,8 @@ def subtyping( Examples -------- + Subtype classification + >>> with open ("models/breast/sample_names.txt", mode="r") as f: ... TCGA_ID = f.read().splitlines() >>> from pasmopy import PatientModelSimulations @@ -228,6 +223,11 @@ def subtyping( ... clustermap_kws={"figsize": (9, 12)} ... ) + Add new characteristics + + >>> def droprate(time_course: np.ndarray) -> float: + ... return - (time_course[-1] - np.max(time_course)) / (len(time_course) - np.argmax(time_course)) + >>> simulations.response_characteristics["droprate"] = droprate """ # seaborn clustermap if clustermap_kws is None: diff --git a/tests/test_patient_specific_modeling.py b/tests/test_patient_specific_modeling.py index 13552c8..9b8e8d5 100644 --- a/tests/test_patient_specific_modeling.py +++ b/tests/test_patient_specific_modeling.py @@ -3,8 +3,9 @@ import shutil import sys import time -from typing import List +from typing import Callable, List +import numpy as np from pasmopy import PatientModelSimulations try: @@ -57,13 +58,19 @@ def test_patient_model_simulations(): assert simulations.run() is None elapsed = time.time() - start print(f"Computation time for simulating 10 patients: {elapsed/60:.1f} [min]") + # Add new response characteristics + _droprate: Callable[[np.ndarray], float] = lambda time_course: -( + time_course[-1] - np.max(time_course) + ) / (len(time_course) - np.argmax(time_course)) + simulations.response_characteristics["droprate"] = _droprate + simulations.response_characteristics["argmax"] = np.argmax # Extract response characteristics and visualize patient classification simulations.subtyping( "subtype_classification.pdf", { - "Phosphorylated_Akt": {"EGF": ["max"], "HRG": ["max"]}, - "Phosphorylated_ERK": {"EGF": ["max"], "HRG": ["max"]}, - "Phosphorylated_c-Myc": {"EGF": ["max"], "HRG": ["max"]}, + "Phosphorylated_Akt": {"EGF": ["max"], "HRG": ["AUC"]}, + "Phosphorylated_ERK": {"EGF": ["droprate"], "HRG": ["droprate"]}, + "Phosphorylated_c-Myc": {"EGF": ["argmax"], "HRG": ["argmax"]}, }, ) observables = ["Phosphorylated_Akt", "Phosphorylated_ERK", "Phosphorylated_c-Myc"] diff --git a/training/README.md b/training/README.md index 2b24296..2458eeb 100644 --- a/training/README.md +++ b/training/README.md @@ -6,7 +6,7 @@ Using experimental datasets obtained from breast cancer cell lines to identify m | Language | Package | Desctiption | | ----------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| [Julia 1.5+](https://julialang.org) | [`BioMASS.jl`](https://github.com/biomass-dev/BioMASS.jl) | This module provides a Julia interface to the [biomass](https://github.com/biomass-dev/biomass) parameter estimation. | +| [Julia 1.5+](https://julialang.org) | [`BioMASS.jl==0.5.0`](https://github.com/biomass-dev/BioMASS.jl) | This module provides a Julia interface to the [biomass](https://github.com/biomass-dev/biomass) parameter estimation. | ### Run optimization diff --git a/training/main.jl b/training/main.jl index 37d0454..bb7abb3 100755 --- a/training/main.jl +++ b/training/main.jl @@ -1,6 +1,6 @@ using BioMASS -const model = load_model("erbb_network_jl") +const model = Model("erbb_network_jl") if abspath(PROGRAM_FILE) == @__FILE__ optimize(