From 40cd369757b398170e2b0083df0855bbe7d39fa5 Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Fri, 30 Apr 2021 13:19:00 +0100 Subject: [PATCH 1/5] Initial --- InnerEye/ML/model_config_base.py | 7 ++----- InnerEye/ML/model_testing.py | 17 +++++++++++++++-- InnerEye/ML/run_ml.py | 5 +---- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/InnerEye/ML/model_config_base.py b/InnerEye/ML/model_config_base.py index 745d248ba..47c660fc2 100644 --- a/InnerEye/ML/model_config_base.py +++ b/InnerEye/ML/model_config_base.py @@ -13,6 +13,7 @@ from pandas import DataFrame from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY +from InnerEye.Common.common_util import ModelProcessing from InnerEye.Common.metrics_constants import TrackedMetrics from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode, STORED_CSV_FILE_NAMES from InnerEye.ML.deep_learning_config import DeepLearningConfig @@ -248,16 +249,12 @@ def set_derived_model_properties(self, model: Any) -> None: """ pass - def generate_custom_report(self, report_dir: Path, train_metrics: Path, val_metrics: Path, - test_metrics: Path) -> Path: + def generate_custom_report(self, report_dir: Path, model_proc: ModelProcessing) -> Path: """ Enables creating a custom results report, given the metrics files written during model training and inference. By default, this method is a no-op. :param report_dir: The output directory where the generated report should be saved. - :param train_metrics: The CSV file with training metrics. - :param val_metrics: The CSV file with validation metrics. - :param test_metrics: The CSV file with test metrics. :return: The path to the generated report file. """ pass diff --git a/InnerEye/ML/model_testing.py b/InnerEye/ML/model_testing.py index 7b7421f4b..f5b5f1223 100644 --- a/InnerEye/ML/model_testing.py +++ b/InnerEye/ML/model_testing.py @@ -17,7 +17,7 @@ from InnerEye.Common.common_util import BEST_EPOCH_FOLDER_NAME, METRICS_AGGREGATES_FILE, ModelProcessing, \ SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, is_linux, logging_section from InnerEye.Common.fixed_paths import DEFAULT_RESULT_IMAGE_NAME -from InnerEye.Common.metrics_constants import MetricType, MetricsFileColumns +from InnerEye.Common.metrics_constants import MetricType, MetricsFileColumns, LoggingColumns from InnerEye.ML import metrics, plotting from InnerEye.ML.common import ModelExecutionMode, STORED_CSV_FILE_NAMES from InnerEye.ML.config import DATASET_ID_FILE, GROUND_TRUTH_IDS_FILE, IMAGE_CHANNEL_IDS_FILE, SegmentationModelBase @@ -43,6 +43,7 @@ BOXPLOT_FILE = "metrics_boxplot.png" THUMBNAILS_FOLDER = "thumbnails" +MODEL_OUTPUT_CSV = "model_outputs.csv" def model_test(config: ModelConfigBase, data_split: ModelExecutionMode, @@ -428,12 +429,24 @@ def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]: logging.info(f"Starting to evaluate model on {data_split.value} set.") metrics_dict = create_metrics_dict_for_scalar_models(config) + + results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc) + csv_file = results_folder / MODEL_OUTPUT_CSV + os.makedirs(str(results_folder), exist_ok=True) + with open(csv_file, "w") as f: + f.write(f"{LoggingColumns.Patient.value},{LoggingColumns.Hue.value},{LoggingColumns.Label.value}," + f"{LoggingColumns.ModelOutput.value},{LoggingColumns.CrossValidationSplitIndex.value}\n") + for sample in ds: result = pipeline.predict(sample) model_output = result.posteriors label = result.labels.to(device=model_output.device) label = posthoc_label_transform(label) sample_id = result.subject_ids[0] + with open(csv_file, "a") as f: + for i in range(len(config.target_names)): + f.write(f"{sample_id},{config.target_names[i]},{label[0][i].item()},{model_output[0][i].item()}," + f"{cross_val_split_index}\n") compute_scalar_metrics(metrics_dict, subject_ids=[sample_id], model_output=model_output, @@ -465,7 +478,7 @@ def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]: # during train time. If this is not the case, or we are running on the test set, create the metrics # file. if not csv_file.exists(): - os.makedirs(str(results_folder), exist_ok=False) + os.makedirs(str(results_folder), exist_ok=True) df_logger = DataframeLogger(csv_file) # For test if ensemble split should be default, else record which fold produced this prediction cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \ diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py index 48ee4131c..efe747244 100644 --- a/InnerEye/ML/run_ml.py +++ b/InnerEye/ML/run_ml.py @@ -889,10 +889,7 @@ def get_epoch_path(mode: ModelExecutionMode) -> Path: else: logging.info(f"Cannot create report for config of type {type(config)}.") - config.generate_custom_report(report_dir=reports_dir, - train_metrics=path_to_best_epoch_train, - val_metrics=path_to_best_epoch_val, - test_metrics=path_to_best_epoch_test) + config.generate_custom_report(report_dir=reports_dir, model_proc=model_proc) except Exception as ex: print_exception(ex, "Failed to generated reporting notebook.") raise From 35e3a918e1259bb7a0a0a59a0fcac66bfd9908e6 Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Tue, 4 May 2021 12:47:55 +0100 Subject: [PATCH 2/5] Refactor --- InnerEye/ML/model_config_base.py | 1 + InnerEye/ML/model_testing.py | 151 +++++++++++++++---------------- 2 files changed, 74 insertions(+), 78 deletions(-) diff --git a/InnerEye/ML/model_config_base.py b/InnerEye/ML/model_config_base.py index 47c660fc2..ed483444e 100644 --- a/InnerEye/ML/model_config_base.py +++ b/InnerEye/ML/model_config_base.py @@ -255,6 +255,7 @@ def generate_custom_report(self, report_dir: Path, model_proc: ModelProcessing) By default, this method is a no-op. :param report_dir: The output directory where the generated report should be saved. + :param model_proc: The type of model that is registered (single or ensemble) :return: The path to the generated report file. """ pass diff --git a/InnerEye/ML/model_testing.py b/InnerEye/ML/model_testing.py index f5b5f1223..e043f6fe9 100644 --- a/InnerEye/ML/model_testing.py +++ b/InnerEye/ML/model_testing.py @@ -42,9 +42,9 @@ BOXPLOT_FILE = "metrics_boxplot.png" THUMBNAILS_FOLDER = "thumbnails" - MODEL_OUTPUT_CSV = "model_outputs.csv" + def model_test(config: ModelConfigBase, data_split: ModelExecutionMode, checkpoint_handler: CheckpointHandler, @@ -410,84 +410,79 @@ def classification_model_test(config: ScalarModelBase, """ posthoc_label_transform = config.get_posthoc_label_transform() - def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]: - pipeline = create_inference_pipeline(config=config, - checkpoint_paths=checkpoint_paths) - - if pipeline is None: - return None - - # for mypy - assert isinstance(pipeline, ScalarInferencePipelineBase) - - ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing") - ds = config.get_torch_dataset_for_inference(data_split).as_data_loader( - shuffle=False, - batch_size=1, - num_dataload_workers=0 - ) - - logging.info(f"Starting to evaluate model on {data_split.value} set.") - metrics_dict = create_metrics_dict_for_scalar_models(config) - - results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc) - csv_file = results_folder / MODEL_OUTPUT_CSV - os.makedirs(str(results_folder), exist_ok=True) - with open(csv_file, "w") as f: - f.write(f"{LoggingColumns.Patient.value},{LoggingColumns.Hue.value},{LoggingColumns.Label.value}," - f"{LoggingColumns.ModelOutput.value},{LoggingColumns.CrossValidationSplitIndex.value}\n") - - for sample in ds: - result = pipeline.predict(sample) - model_output = result.posteriors - label = result.labels.to(device=model_output.device) - label = posthoc_label_transform(label) - sample_id = result.subject_ids[0] - with open(csv_file, "a") as f: - for i in range(len(config.target_names)): - f.write(f"{sample_id},{config.target_names[i]},{label[0][i].item()},{model_output[0][i].item()}," - f"{cross_val_split_index}\n") - compute_scalar_metrics(metrics_dict, - subject_ids=[sample_id], - model_output=model_output, - labels=label, - loss_type=config.loss_type) - logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}") - - average = metrics_dict.average(across_hues=False) - logging.info(average.to_string()) - - return metrics_dict - - checkpoints_to_test = checkpoint_handler.get_checkpoints_to_test() - - if not checkpoints_to_test: + checkpoint_paths = checkpoint_handler.get_checkpoints_to_test() + if not checkpoint_paths: raise ValueError("There were no checkpoints available for model testing.") - result = test_epoch(checkpoint_paths=checkpoints_to_test) - if result is None: + pipeline = create_inference_pipeline(config=config, + checkpoint_paths=checkpoint_paths) + if pipeline is None: raise ValueError("There was no single checkpoint file available for model testing.") + + # for mypy + assert isinstance(pipeline, ScalarInferencePipelineBase) + + ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing") + ds = config.get_torch_dataset_for_inference(data_split).as_data_loader( + shuffle=False, + batch_size=1, + num_dataload_workers=0 + ) + + logging.info(f"Starting to evaluate model on {data_split.value} set.") + results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc) + os.makedirs(str(results_folder), exist_ok=True) + metrics_dict = create_metrics_dict_for_scalar_models(config) + if not isinstance(config, SequenceModelBase): + output_logger: Optional[DataframeLogger] = DataframeLogger(csv_path=results_folder / MODEL_OUTPUT_CSV) else: - if isinstance(result, ScalarMetricsDict): - results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc) - csv_file = results_folder / SUBJECT_METRICS_FILE_NAME - - logging.info(f"Writing {data_split.value} metrics to file {str(csv_file)}") - - # If we are running inference after a training run, the validation set metrics may have been written - # during train time. If this is not the case, or we are running on the test set, create the metrics - # file. - if not csv_file.exists(): - os.makedirs(str(results_folder), exist_ok=True) - df_logger = DataframeLogger(csv_file) - # For test if ensemble split should be default, else record which fold produced this prediction - cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \ - else cross_val_split_index - result.store_metrics_per_subject(df_logger=df_logger, - mode=data_split, - cross_validation_split_index=cv_index, - epoch=BEST_EPOCH_FOLDER_NAME) - # write to disk - df_logger.flush() - - return InferenceMetricsForClassification(metrics=result) + output_logger = None + + for sample in ds: + result = pipeline.predict(sample) + model_output = result.posteriors + label = result.labels.to(device=model_output.device) + label = posthoc_label_transform(label) + sample_id = result.subject_ids[0] + if output_logger: + for i in range(len(config.target_names)): + output_logger.add_record({LoggingColumns.Patient.value: sample_id, + LoggingColumns.Hue.value: config.target_names[i], + LoggingColumns.Label.value: label[0][i].item(), + LoggingColumns.ModelOutput.value: model_output[0][i].item(), + LoggingColumns.CrossValidationSplitIndex.value: cross_val_split_index}) + + compute_scalar_metrics(metrics_dict, + subject_ids=[sample_id], + model_output=model_output, + labels=label, + loss_type=config.loss_type) + logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}") + + average = metrics_dict.average(across_hues=False) + logging.info(average.to_string()) + + if isinstance(metrics_dict, ScalarMetricsDict): + csv_file = results_folder / SUBJECT_METRICS_FILE_NAME + + logging.info(f"Writing {data_split.value} metrics to file {str(csv_file)}") + + # If we are running inference after a training run, the validation set metrics may have been written + # during train time. If this is not the case, or we are running on the test set, create the metrics + # file. + if not csv_file.exists(): + df_logger = DataframeLogger(csv_file) + # For test if ensemble split should be default, else record which fold produced this prediction + cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \ + else cross_val_split_index + metrics_dict.store_metrics_per_subject(df_logger=df_logger, + mode=data_split, + cross_validation_split_index=cv_index, + epoch=BEST_EPOCH_FOLDER_NAME) + # write to disk + df_logger.flush() + + if output_logger: + output_logger.flush() + + return InferenceMetricsForClassification(metrics=metrics_dict) From 7f41dffebdf3657db0d0535b35a53ea9bce65db2 Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Tue, 4 May 2021 13:23:26 +0100 Subject: [PATCH 3/5] Add test --- Tests/ML/models/test_scalar_model.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Tests/ML/models/test_scalar_model.py b/Tests/ML/models/test_scalar_model.py index 2f709cd84..6194aa7ad 100644 --- a/Tests/ML/models/test_scalar_model.py +++ b/Tests/ML/models/test_scalar_model.py @@ -139,7 +139,7 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol 3,S4,{class_name},0.521128,0,Train,-1 """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) - # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. + # Check log METRICS_FILE_NAME inside of the folder best_validation_epoch/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME @@ -150,6 +150,14 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[]) + inference_model_output_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \ + model_testing.MODEL_OUTPUT_CSV + inference_model_output_expected = \ + f"""subject,prediction_target,label,model_output,cross_validation_split_index +S2,{class_name},1.000000,0.529399,-1 +S4,{class_name},0.000000,0.521128,-1""" + check_log_file(inference_model_output_path, inference_model_output_expected, ignore_columns=[]) + @pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build") @pytest.mark.cpu_and_gpu From 2d09a4c78e0e57619bae4205f7a77c7b05039f6e Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Tue, 4 May 2021 13:26:53 +0100 Subject: [PATCH 4/5] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e6807bd4..000215458 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,8 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y - ([#445](https://github.com/microsoft/InnerEye-DeepLearning/pull/445)) Adding test coverage for the `HelloContainer` model with multiple GPUs - ([#450](https://github.com/microsoft/InnerEye-DeepLearning/pull/450)) Adds the metric "Accuracy at threshold 0.5" to the classification report (`classification_crossval_report.ipynb`). +- ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Write a file `model_outputs.csv` with columns + `subject`, `prediction_target`, `label`, `model_output` and `cross_validation_split_index`. This file is not written out for sequence models. ### Changed From 27a16ee046a15a365bf56f34cfa7a3f4ffe006a0 Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Tue, 4 May 2021 13:49:26 +0100 Subject: [PATCH 5/5] Address PR comments --- CHANGELOG.md | 2 ++ InnerEye/ML/model_testing.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 000215458..d97088508 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -79,6 +79,8 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y - ([#437])(https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Upgrade to PyTorch-Lightning 1.2.8. - ([#439](https://github.com/microsoft/InnerEye-DeepLearning/pull/439)) Recovery checkpoints are now named `recovery_epoch=x.ckpt` instead of `recovery.ckpt` or `recovery-v0.ckpt`. +- ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Change the signature for function `generate_custom_report` + in `ModelConfigBase` to take only the path to the reports folder and a `ModelProcessing` object. ### Fixed diff --git a/InnerEye/ML/model_testing.py b/InnerEye/ML/model_testing.py index e043f6fe9..f9f709321 100644 --- a/InnerEye/ML/model_testing.py +++ b/InnerEye/ML/model_testing.py @@ -417,7 +417,7 @@ def classification_model_test(config: ScalarModelBase, pipeline = create_inference_pipeline(config=config, checkpoint_paths=checkpoint_paths) if pipeline is None: - raise ValueError("There was no single checkpoint file available for model testing.") + raise ValueError("Inference pipeline could not be created.") # for mypy assert isinstance(pipeline, ScalarInferencePipelineBase)