Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Write classification model output to a file for later use in reports. #451

Merged
merged 6 commits into from
May 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y
- ([#445](https://github.com/microsoft/InnerEye-DeepLearning/pull/445)) Adding test coverage for the `HelloContainer`
model with multiple GPUs
- ([#450](https://github.com/microsoft/InnerEye-DeepLearning/pull/450)) Adds the metric "Accuracy at threshold 0.5" to the classification report (`classification_crossval_report.ipynb`).
- ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Write a file `model_outputs.csv` with columns
`subject`, `prediction_target`, `label`, `model_output` and `cross_validation_split_index`. This file is not written out for sequence models.

### Changed

Expand All @@ -77,6 +79,8 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y
- ([#437])(https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Upgrade to PyTorch-Lightning 1.2.8.
- ([#439](https://github.com/microsoft/InnerEye-DeepLearning/pull/439)) Recovery checkpoints are now
named `recovery_epoch=x.ckpt` instead of `recovery.ckpt` or `recovery-v0.ckpt`.
- ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Change the signature for function `generate_custom_report`
in `ModelConfigBase` to take only the path to the reports folder and a `ModelProcessing` object.

### Fixed

Expand Down
8 changes: 3 additions & 5 deletions InnerEye/ML/model_config_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas import DataFrame

from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY
from InnerEye.Common.common_util import ModelProcessing
from InnerEye.Common.metrics_constants import TrackedMetrics
from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode, STORED_CSV_FILE_NAMES
from InnerEye.ML.deep_learning_config import DeepLearningConfig
Expand Down Expand Up @@ -248,16 +249,13 @@ def set_derived_model_properties(self, model: Any) -> None:
"""
pass

def generate_custom_report(self, report_dir: Path, train_metrics: Path, val_metrics: Path,
test_metrics: Path) -> Path:
def generate_custom_report(self, report_dir: Path, model_proc: ModelProcessing) -> Path:
"""
Enables creating a custom results report, given the metrics files written during model training and inference.
By default, this method is a no-op.

:param report_dir: The output directory where the generated report should be saved.
:param train_metrics: The CSV file with training metrics.
:param val_metrics: The CSV file with validation metrics.
:param test_metrics: The CSV file with test metrics.
:param model_proc: The type of model that is registered (single or ensemble)
:return: The path to the generated report file.
"""
pass
Expand Down
142 changes: 75 additions & 67 deletions InnerEye/ML/model_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from InnerEye.Common.common_util import BEST_EPOCH_FOLDER_NAME, METRICS_AGGREGATES_FILE, ModelProcessing, \
SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, is_linux, logging_section
from InnerEye.Common.fixed_paths import DEFAULT_RESULT_IMAGE_NAME
from InnerEye.Common.metrics_constants import MetricType, MetricsFileColumns
from InnerEye.Common.metrics_constants import MetricType, MetricsFileColumns, LoggingColumns
from InnerEye.ML import metrics, plotting
from InnerEye.ML.common import ModelExecutionMode, STORED_CSV_FILE_NAMES
from InnerEye.ML.config import DATASET_ID_FILE, GROUND_TRUTH_IDS_FILE, IMAGE_CHANNEL_IDS_FILE, SegmentationModelBase
Expand All @@ -42,6 +42,7 @@

BOXPLOT_FILE = "metrics_boxplot.png"
THUMBNAILS_FOLDER = "thumbnails"
MODEL_OUTPUT_CSV = "model_outputs.csv"


def model_test(config: ModelConfigBase,
Expand Down Expand Up @@ -409,72 +410,79 @@ def classification_model_test(config: ScalarModelBase,
"""
posthoc_label_transform = config.get_posthoc_label_transform()

def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]:
pipeline = create_inference_pipeline(config=config,
checkpoint_paths=checkpoint_paths)

if pipeline is None:
return None

# for mypy
assert isinstance(pipeline, ScalarInferencePipelineBase)

ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing")
ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
shuffle=False,
batch_size=1,
num_dataload_workers=0
)

logging.info(f"Starting to evaluate model on {data_split.value} set.")
metrics_dict = create_metrics_dict_for_scalar_models(config)
for sample in ds:
result = pipeline.predict(sample)
model_output = result.posteriors
label = result.labels.to(device=model_output.device)
label = posthoc_label_transform(label)
sample_id = result.subject_ids[0]
compute_scalar_metrics(metrics_dict,
subject_ids=[sample_id],
model_output=model_output,
labels=label,
loss_type=config.loss_type)
logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

average = metrics_dict.average(across_hues=False)
logging.info(average.to_string())

return metrics_dict

checkpoints_to_test = checkpoint_handler.get_checkpoints_to_test()

if not checkpoints_to_test:
checkpoint_paths = checkpoint_handler.get_checkpoints_to_test()
if not checkpoint_paths:
raise ValueError("There were no checkpoints available for model testing.")

result = test_epoch(checkpoint_paths=checkpoints_to_test)
if result is None:
raise ValueError("There was no single checkpoint file available for model testing.")
pipeline = create_inference_pipeline(config=config,
checkpoint_paths=checkpoint_paths)
if pipeline is None:
raise ValueError("Inference pipeline could not be created.")

# for mypy
assert isinstance(pipeline, ScalarInferencePipelineBase)

ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing")
ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
shuffle=False,
batch_size=1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the batch size was set to 1 in the past and it is not related to this PR. However, if you think that inference metric computation takes long, we could do the inference, metric computation, and logging in batches as well.

num_dataload_workers=0
)

logging.info(f"Starting to evaluate model on {data_split.value} set.")
results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc)
os.makedirs(str(results_folder), exist_ok=True)
metrics_dict = create_metrics_dict_for_scalar_models(config)
if not isinstance(config, SequenceModelBase):
output_logger: Optional[DataframeLogger] = DataframeLogger(csv_path=results_folder / MODEL_OUTPUT_CSV)
else:
if isinstance(result, ScalarMetricsDict):
results_folder = config.outputs_folder / get_best_epoch_results_path(data_split, model_proc)
csv_file = results_folder / SUBJECT_METRICS_FILE_NAME

logging.info(f"Writing {data_split.value} metrics to file {str(csv_file)}")

# If we are running inference after a training run, the validation set metrics may have been written
# during train time. If this is not the case, or we are running on the test set, create the metrics
# file.
if not csv_file.exists():
os.makedirs(str(results_folder), exist_ok=False)
df_logger = DataframeLogger(csv_file)
# For test if ensemble split should be default, else record which fold produced this prediction
cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \
else cross_val_split_index
result.store_metrics_per_subject(df_logger=df_logger,
mode=data_split,
cross_validation_split_index=cv_index,
epoch=BEST_EPOCH_FOLDER_NAME)
# write to disk
df_logger.flush()

return InferenceMetricsForClassification(metrics=result)
output_logger = None

for sample in ds:
result = pipeline.predict(sample)
model_output = result.posteriors
label = result.labels.to(device=model_output.device)
label = posthoc_label_transform(label)
sample_id = result.subject_ids[0]
if output_logger:
for i in range(len(config.target_names)):
output_logger.add_record({LoggingColumns.Patient.value: sample_id,
LoggingColumns.Hue.value: config.target_names[i],
LoggingColumns.Label.value: label[0][i].item(),
LoggingColumns.ModelOutput.value: model_output[0][i].item(),
LoggingColumns.CrossValidationSplitIndex.value: cross_val_split_index})

compute_scalar_metrics(metrics_dict,
subject_ids=[sample_id],
model_output=model_output,
labels=label,
loss_type=config.loss_type)
logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

average = metrics_dict.average(across_hues=False)
logging.info(average.to_string())

if isinstance(metrics_dict, ScalarMetricsDict):
csv_file = results_folder / SUBJECT_METRICS_FILE_NAME

logging.info(f"Writing {data_split.value} metrics to file {str(csv_file)}")

# If we are running inference after a training run, the validation set metrics may have been written
# during train time. If this is not the case, or we are running on the test set, create the metrics
# file.
if not csv_file.exists():
df_logger = DataframeLogger(csv_file)
# For test if ensemble split should be default, else record which fold produced this prediction
cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \
else cross_val_split_index
metrics_dict.store_metrics_per_subject(df_logger=df_logger,
mode=data_split,
cross_validation_split_index=cv_index,
epoch=BEST_EPOCH_FOLDER_NAME)
# write to disk
df_logger.flush()

if output_logger:
output_logger.flush()

return InferenceMetricsForClassification(metrics=metrics_dict)
5 changes: 1 addition & 4 deletions InnerEye/ML/run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,10 +889,7 @@ def get_epoch_path(mode: ModelExecutionMode) -> Path:
else:
logging.info(f"Cannot create report for config of type {type(config)}.")

config.generate_custom_report(report_dir=reports_dir,
train_metrics=path_to_best_epoch_train,
val_metrics=path_to_best_epoch_val,
test_metrics=path_to_best_epoch_test)
config.generate_custom_report(report_dir=reports_dir, model_proc=model_proc)
except Exception as ex:
print_exception(ex, "Failed to generate reporting notebook.")
raise
10 changes: 9 additions & 1 deletion Tests/ML/models/test_scalar_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
3,S4,{class_name},0.521128,0,Train,-1
"""
check_log_file(metrics_path, metrics_expected, ignore_columns=[])
# Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test.
# Check log METRICS_FILE_NAME inside of the folder best_validation_epoch/Train, which is written when we run model_test.
# Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here.
inference_metrics_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \
SUBJECT_METRICS_FILE_NAME
Expand All @@ -150,6 +150,14 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
"""
check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])

inference_model_output_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \
model_testing.MODEL_OUTPUT_CSV
inference_model_output_expected = \
f"""subject,prediction_target,label,model_output,cross_validation_split_index
S2,{class_name},1.000000,0.529399,-1
S4,{class_name},0.000000,0.521128,-1"""
check_log_file(inference_model_output_path, inference_model_output_expected, ignore_columns=[])


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
@pytest.mark.cpu_and_gpu
Expand Down