Bug fix for regression test (#496)

- The regresssion test code in #492 missed out files in AzureML, because the wrong function was called in the main workflow. - Cleaner folder structure for regression test results
microsoft · Jun 21, 2021 · be36e39 · be36e39
1 parent 01c31ed
commit be36e39
Show file tree

Hide file tree

Showing 49 changed files with 86 additions and 108 deletions.
diff --git a/.amlignore b/.amlignore
@@ -3,6 +3,8 @@
 .pytest_cache
 .mypy_cache
 __pycache__
+azure-pipelines
+.github
 datasets
 modelweights
 outputs

diff --git a/.idea/runConfigurations/Template__Run_ML_on_AzureML.xml b/.idea/runConfigurations/Template__Run_ML_on_AzureML.xml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ created.
 jobs that run in AzureML.
 
 ### Changed
+- ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.
 
 ### Fixed
 

diff --git a/InnerEye/Common/Statistics/wilcoxon_signed_rank_test.py b/InnerEye/Common/Statistics/wilcoxon_signed_rank_test.py
@@ -309,7 +309,7 @@ def main() -> None:
     for line in lines:
         print(line)
     for basename, fig in plots.items():
-        fig.savefig(f"{basename}.jpg")
+        fig.savefig(f"{basename}.png")
 
 
 if __name__ == "__main__":

diff --git a/InnerEye/ML/baselines_util.py b/InnerEye/ML/baselines_util.py
@@ -29,10 +29,11 @@
 from InnerEye.ML.visualizers.metrics_scatterplot import write_to_scatterplot_directory
 from InnerEye.ML.visualizers.plot_cross_validation import convert_rows_for_comparisons, may_write_lines_to_file
 
+REGRESSION_TEST_OUTPUT_FOLDER = "OUTPUT"
 REGRESSION_TEST_AZUREML_FOLDER = "AZUREML_OUTPUT"
 REGRESSION_TEST_AZUREML_PARENT_FOLDER = "AZUREML_PARENT_OUTPUT"
 CONTENTS_MISMATCH = "Contents mismatch"
-MISSING_FILE = "Missing file"
+MISSING_FILE = "Missing"
 TEXT_FILE_SUFFIXES = [".txt", ".csv", ".json", ".html", ".md"]
 
 
@@ -222,56 +223,46 @@ def print_lines(prefix: str, lines: List[str]) -> None:
 
 def compare_folder_contents(expected_folder: Path,
                             actual_folder: Optional[Path] = None,
-                            run: Optional[Run] = None) -> None:
+                            run: Optional[Run] = None) -> List[str]:
     """
     Compares a set of files in a folder, against files in either the other folder or files stored in the given
     AzureML run. Each file that is present in the "expected" folder must be also present in the "actual" folder
     (or the AzureML run), with exactly the same contents, in the same folder structure.
     For example, if there is a file "<expected>/foo/bar/contents.txt", then there must also be a file
     "<actual>/foo/bar/contents.txt"
-    If a file is missing, or does not have the expected contents, an exception is raised.
     :param expected_folder: A folder with files that are expected to be present.
     :param actual_folder: The output folder with the actually produced files.
     :param run: An AzureML run
+    :return: A list of human readable error messages, with message and file path. If no errors are found, the list is
+    empty.
     """
-    logging.debug(f"Checking job output against expected files in folder {expected_folder}")
-    logging.debug(f"Current working directory: {Path.cwd()}")
     messages = []
-    if not expected_folder.is_dir():
-        raise ValueError(f"Folder with expected files does not exist: {expected_folder}")
     if run and is_offline_run_context(run):
         logging.warning("Skipping file comparison because the given run context is an AzureML offline run.")
-        return
+        return []
     files_in_run: List[str] = run.get_file_names() if run else []
     temp_folder = Path(tempfile.mkdtemp()) if run else None
     for file in expected_folder.rglob("*"):
         # rglob also returns folders, skip those
         if file.is_dir():
             continue
-        logging.debug(f"Checking file {file}")
         # All files stored in AzureML runs use Linux-style path
         file_relative = file.relative_to(expected_folder).as_posix()
-        if str(file_relative).startswith(REGRESSION_TEST_AZUREML_FOLDER) or \
-                str(file_relative).startswith(REGRESSION_TEST_AZUREML_PARENT_FOLDER):
-            continue
-        actual_file: Optional[Path] = None
         if actual_folder:
             actual_file = actual_folder / file_relative
-            if not actual_file.is_file():
-                actual_file = None
         elif temp_folder is not None and run is not None:
+            actual_file = temp_folder / file_relative
             if file_relative in files_in_run:
-                actual_file = temp_folder / file_relative
                 run.download_file(name=str(file_relative), output_file_path=str(actual_file))
-        message = compare_files(expected=file, actual=actual_file) if actual_file else "Missing file"
+        else:
+            raise ValueError("One of the two arguments run, actual_folder must be provided.")
+        message = compare_files(expected=file, actual=actual_file) if actual_file.exists() else MISSING_FILE
         if message:
-            logging.debug(f"Error: {message}")
             messages.append(f"{message}: {file_relative}")
+        logging.info(f"File {file_relative}: {message or 'OK'}")
     if temp_folder:
         shutil.rmtree(temp_folder)
-    if messages:
-        raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
-                         f"{os.linesep.join(messages)}")
+    return messages
 
 
 def compare_folders_and_run_outputs(expected: Path, actual: Path) -> None:
@@ -286,17 +277,24 @@ def compare_folders_and_run_outputs(expected: Path, actual: Path) -> None:
     """
     if not expected.is_dir():
         raise ValueError(f"Folder with expected files does not exist: {expected}")
-    # First compare the normal output files that the run produces
-    compare_folder_contents(expected, actual)
-    # Compare the set of files in the magic folder with the outputs stored in the run context
-    azureml_folder = expected / REGRESSION_TEST_AZUREML_FOLDER
-    if azureml_folder.is_dir():
-        compare_folder_contents(azureml_folder, run=RUN_CONTEXT)
-    # Compare the set of files in the magic folder with the outputs stored in the run context of the parent run
-    azureml_parent_folder = expected / REGRESSION_TEST_AZUREML_PARENT_FOLDER
-    if azureml_parent_folder.is_dir():
-        if PARENT_RUN_CONTEXT is None:
-            raise ValueError(f"The set of expected test results in {expected} contains a folder "
-                             f"{REGRESSION_TEST_AZUREML_PARENT_FOLDER}, but the present run is not a cross-validation "
-                             "child run")
-        compare_folder_contents(azureml_parent_folder, run=PARENT_RUN_CONTEXT)
+    logging.debug(f"Current working directory: {Path.cwd()}")
+    messages = []
+    for (subfolder, message_prefix, actual_folder, run_to_compare) in \
+            [(REGRESSION_TEST_OUTPUT_FOLDER, "run output files", actual, None),
+             (REGRESSION_TEST_AZUREML_FOLDER, "AzureML outputs in present run", None, RUN_CONTEXT),
+             (REGRESSION_TEST_AZUREML_PARENT_FOLDER, "AzureML outputs in parent run", None, PARENT_RUN_CONTEXT)]:
+        folder = expected / subfolder
+        if folder.is_dir():
+            logging.info(f"Comparing results in {folder} against {message_prefix}:")
+            if actual_folder is None and run_to_compare is None:
+                raise ValueError(f"The set of expected test results in {expected} contains a folder "
+                                 f"{subfolder}, but there is no (parent) run to compare against.")
+            new_messages = compare_folder_contents(folder, actual_folder=actual_folder, run=run_to_compare)
+            if new_messages:
+                messages.append(f"Issues in {message_prefix}:")
+                messages.extend(new_messages)
+        else:
+            logging.info(f"Folder {subfolder} not found, skipping comparison against {message_prefix}.")
+    if messages:
+        raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
+                         f"{os.linesep.join(messages)}")
diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
@@ -34,7 +34,7 @@
     change_working_directory, get_best_epoch_results_path, is_windows, logging_section, logging_to_file, \
     print_exception, remove_file_or_directory
 from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, LOG_FILE_NAME, PYTHON_ENVIRONMENT_NAME
-from InnerEye.ML.baselines_util import compare_folder_contents
+from InnerEye.ML.baselines_util import compare_folders_and_run_outputs
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import SegmentationModelBase
 from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, DeepLearningConfig, FINAL_ENSEMBLE_MODEL_FOLDER, \
@@ -411,8 +411,8 @@ def run(self) -> None:
             # run context.
             logging.info("Comparing the current results against stored results")
             if self.is_normal_run_or_crossval_child_0():
-                compare_folder_contents(expected_folder=self.container.regression_test_folder,
-                                        actual_folder=self.container.outputs_folder)
+                compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
+                                                actual=self.container.outputs_folder)
             else:
                 logging.info("Skipping because this is not cross-validation child run 0.")
 
@@ -712,9 +712,11 @@ def copy_file(source: Path, destination_file: str) -> None:
             else:
                 raise ValueError(f"Expected an absolute path to a checkpoint file, but got: {checkpoint}")
         model_folder.mkdir(parents=True, exist_ok=True)
+        # For reproducibility of the files used in regression tests, checkpoint paths need to be sorted.
+        checkpoints_sorted = sorted(relative_checkpoint_paths)
         model_inference_config = ModelInferenceConfig(model_name=self.container.model_name,
                                                       model_configs_namespace=self.config_namespace,
-                                                      checkpoint_paths=relative_checkpoint_paths)
+                                                      checkpoint_paths=checkpoints_sorted)
         # Inference configuration must live in the root folder of the registered model
         full_path_to_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
         full_path_to_config.write_text(model_inference_config.to_json(), encoding='utf-8')  # type: ignore

diff --git a/InnerEye/ML/visualizers/metrics_scatterplot.py b/InnerEye/ML/visualizers/metrics_scatterplot.py
@@ -206,7 +206,7 @@ def to_dict(data: pd.DataFrame) -> Dict[str, Dict[str, float]]:
 
 def write_to_scatterplot_directory(root_folder: Path, plots: Dict[str, plt.Figure]) -> None:
     """
-    Writes a file root_folder/scatterplots/basename.jpg for every plot in plots with key "basename".
+    Writes a file root_folder/scatterplots/basename.png for every plot in plots with key "basename".
     :param root_folder: path to a folder
     :param plots: dictionary from plot basenames to plots (plt.Figure objects)
     """
@@ -217,7 +217,7 @@ def write_to_scatterplot_directory(root_folder: Path, plots: Dict[str, plt.Figur
     scatterplot_dir.mkdir(parents=True, exist_ok=True)
     logging.info(f"There are {len(plots)} plots to write to {scatterplot_dir}")
     for basename, fig in plots.items():
-        fig.savefig(scatterplot_dir / f"{basename}.jpg")
+        fig.savefig(scatterplot_dir / f"{basename}.png")
 
 
 def main() -> None:

diff --git a/InnerEye/ML/visualizers/plot_cross_validation.py b/InnerEye/ML/visualizers/plot_cross_validation.py
@@ -647,7 +647,7 @@ def plot_metrics(config: PlotCrossValidationConfig,
 
                 # save plot
                 suffix = f"_{sub_df_index}" if len(df_list) > 1 else ""
-                plot_dst = root / f"{metric_type}_{mode.value}_splits{suffix}.jpg"
+                plot_dst = root / f"{metric_type}_{mode.value}_splits{suffix}.png"
                 fig.savefig(plot_dst, bbox_inches='tight')
                 logging.info("Saved box-plots to: {}".format(plot_dst))
 

diff --git a/...BasicModel2Epochs/Train/epoch_metrics.csv → ...del2Epochs/OUTPUT/Train/epoch_metrics.csv b/...BasicModel2Epochs/Train/epoch_metrics.csv → ...del2Epochs/OUTPUT/Train/epoch_metrics.csv
diff --git a/...R_BasicModel2Epochs/Val/epoch_metrics.csv → ...Model2Epochs/OUTPUT/Val/epoch_metrics.csv b/...R_BasicModel2Epochs/Val/epoch_metrics.csv → ...Model2Epochs/OUTPUT/Val/epoch_metrics.csv
diff --git a/...lidation_epoch/Test/005/background.nii.gz → ...lidation_epoch/Test/005/background.nii.gz b/...lidation_epoch/Test/005/background.nii.gz → ...lidation_epoch/Test/005/background.nii.gz
diff --git a/...on_epoch/Test/005/posterior_lung_l.nii.gz → ...on_epoch/Test/005/posterior_lung_l.nii.gz b/...on_epoch/Test/005/posterior_lung_l.nii.gz → ...on_epoch/Test/005/posterior_lung_l.nii.gz
diff --git a/...idation_epoch/Test/005/uncertainty.nii.gz → ...idation_epoch/Test/005/uncertainty.nii.gz b/...idation_epoch/Test/005/uncertainty.nii.gz → ...idation_epoch/Test/005/uncertainty.nii.gz
diff --git a/...best_validation_epoch/Test/dataset_id.txt → ...best_validation_epoch/Test/dataset_id.txt b/...best_validation_epoch/Test/dataset_id.txt → ...best_validation_epoch/Test/dataset_id.txt
diff --git a/...alidation_epoch/Test/ground_truth_ids.txt → ...alidation_epoch/Test/ground_truth_ids.txt b/...alidation_epoch/Test/ground_truth_ids.txt → ...alidation_epoch/Test/ground_truth_ids.txt
diff --git a/...lidation_epoch/Test/image_channel_ids.txt → ...lidation_epoch/Test/image_channel_ids.txt b/...lidation_epoch/Test/image_channel_ids.txt → ...lidation_epoch/Test/image_channel_ids.txt
diff --git a/...hs/best_validation_epoch/Test/metrics.csv → ...UT/best_validation_epoch/Test/metrics.csv b/...hs/best_validation_epoch/Test/metrics.csv → ...UT/best_validation_epoch/Test/metrics.csv
diff --git a/...idation_epoch/Test/metrics_aggregates.csv → ...idation_epoch/Test/metrics_aggregates.csv b/...idation_epoch/Test/metrics_aggregates.csv → ...idation_epoch/Test/metrics_aggregates.csv
diff --git a/...validation_epoch/Test/metrics_boxplot.png → ...validation_epoch/Test/metrics_boxplot.png b/...validation_epoch/Test/metrics_boxplot.png → ...validation_epoch/Test/metrics_boxplot.png
diff --git a/.../Test/thumbnails/005_lung_l_slice_053.png → .../Test/thumbnails/005_lung_l_slice_053.png b/.../Test/thumbnails/005_lung_l_slice_053.png → .../Test/thumbnails/005_lung_l_slice_053.png
diff --git a/.../Test/thumbnails/005_lung_r_slice_037.png → .../Test/thumbnails/005_lung_r_slice_037.png b/.../Test/thumbnails/005_lung_r_slice_037.png → .../Test/thumbnails/005_lung_r_slice_037.png
diff --git a/...t/thumbnails/005_spinalcord_slice_088.png → ...t/thumbnails/005_spinalcord_slice_088.png b/...t/thumbnails/005_spinalcord_slice_088.png → ...t/thumbnails/005_spinalcord_slice_088.png
diff --git a/...alidation_epoch/Val/002/background.nii.gz → ...alidation_epoch/Val/002/background.nii.gz b/...alidation_epoch/Val/002/background.nii.gz → ...alidation_epoch/Val/002/background.nii.gz
diff --git a/...ion_epoch/Val/002/posterior_lung_r.nii.gz → ...ion_epoch/Val/002/posterior_lung_r.nii.gz b/...ion_epoch/Val/002/posterior_lung_r.nii.gz → ...ion_epoch/Val/002/posterior_lung_r.nii.gz
diff --git a/...chs/best_validation_epoch/Val/metrics.csv → ...PUT/best_validation_epoch/Val/metrics.csv b/...chs/best_validation_epoch/Val/metrics.csv → ...PUT/best_validation_epoch/Val/metrics.csv
diff --git a/...lidation_epoch/Val/metrics_aggregates.csv → ...lidation_epoch/Val/metrics_aggregates.csv b/...lidation_epoch/Val/metrics_aggregates.csv → ...lidation_epoch/Val/metrics_aggregates.csv
diff --git a/..._validation_epoch/Val/metrics_boxplot.png → ..._validation_epoch/Val/metrics_boxplot.png b/..._validation_epoch/Val/metrics_boxplot.png → ..._validation_epoch/Val/metrics_boxplot.png
diff --git a/...h/Val/thumbnails/002_lung_l_slice_069.png → ...h/Val/thumbnails/002_lung_l_slice_069.png b/...h/Val/thumbnails/002_lung_l_slice_069.png → ...h/Val/thumbnails/002_lung_l_slice_069.png
diff --git a/...h/Val/thumbnails/002_lung_r_slice_052.png → ...h/Val/thumbnails/002_lung_r_slice_052.png b/...h/Val/thumbnails/002_lung_r_slice_052.png → ...h/Val/thumbnails/002_lung_r_slice_052.png
diff --git a/...l/thumbnails/002_spinalcord_slice_113.png → ...l/thumbnails/002_spinalcord_slice_113.png b/...l/thumbnails/002_spinalcord_slice_113.png → ...l/thumbnails/002_spinalcord_slice_113.png
diff --git a/...patch_sampling/0_sampled_patches_dim0.png → ...patch_sampling/0_sampled_patches_dim0.png b/...patch_sampling/0_sampled_patches_dim0.png → ...patch_sampling/0_sampled_patches_dim0.png
diff --git a/...patch_sampling/0_sampled_patches_dim1.png → ...patch_sampling/0_sampled_patches_dim1.png b/...patch_sampling/0_sampled_patches_dim1.png → ...patch_sampling/0_sampled_patches_dim1.png
diff --git a/...patch_sampling/0_sampled_patches_dim2.png → ...patch_sampling/0_sampled_patches_dim2.png b/...patch_sampling/0_sampled_patches_dim2.png → ...patch_sampling/0_sampled_patches_dim2.png
diff --git a/...ZUREML_OUTPUT/model_inference_config.json → ...T/final_model/model_inference_config.json b/...ZUREML_OUTPUT/model_inference_config.json → ...T/final_model/model_inference_config.json
diff --git a/...estResults/PR_HelloContainer/test_mse.txt → ...lts/PR_HelloContainer/OUTPUT/test_mse.txt b/...estResults/PR_HelloContainer/test_mse.txt → ...lts/PR_HelloContainer/OUTPUT/test_mse.txt
diff --git a/...ts/PR_Train2Nodes/Train/epoch_metrics.csv → ...rain2Nodes/OUTPUT/Train/epoch_metrics.csv b/...ts/PR_Train2Nodes/Train/epoch_metrics.csv → ...rain2Nodes/OUTPUT/Train/epoch_metrics.csv
diff --git a/...ults/PR_Train2Nodes/Val/epoch_metrics.csv → ..._Train2Nodes/OUTPUT/Val/epoch_metrics.csv b/...ults/PR_Train2Nodes/Val/epoch_metrics.csv → ..._Train2Nodes/OUTPUT/Val/epoch_metrics.csv
diff --git a/...es/best_validation_epoch/Test/metrics.csv → ...UT/best_validation_epoch/Test/metrics.csv b/...es/best_validation_epoch/Test/metrics.csv → ...UT/best_validation_epoch/Test/metrics.csv
diff --git a/...des/best_validation_epoch/Val/metrics.csv → ...PUT/best_validation_epoch/Val/metrics.csv b/...des/best_validation_epoch/Val/metrics.csv → ...PUT/best_validation_epoch/Val/metrics.csv
diff --git a/...lts/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/Dice_Test_splits.jpg b/...lts/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/Dice_Test_splits.jpg
diff --git a/...ults/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/Dice_Val_splits.jpg b/...ults/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/Dice_Val_splits.jpg
diff --git a/...mble/AZUREML_PARENT_OUTPUT/CrossValResults/HausdorffDistance_mm_Test_splits.jpg b/...mble/AZUREML_PARENT_OUTPUT/CrossValResults/HausdorffDistance_mm_Test_splits.jpg
diff --git a/...emble/AZUREML_PARENT_OUTPUT/CrossValResults/HausdorffDistance_mm_Val_splits.jpg b/...emble/AZUREML_PARENT_OUTPUT/CrossValResults/HausdorffDistance_mm_Val_splits.jpg
diff --git a/...stResults/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/MetricsAcrossAllRuns.csv b/...stResults/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/CrossValResults/MetricsAcrossAllRuns.csv
diff --git a/...s/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/final_ensemble_model/model_inference_config.json b/...s/PR_TrainEnsemble/AZUREML_PARENT_OUTPUT/final_ensemble_model/model_inference_config.json
@@ -1 +1 @@
-{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/best_checkpoint.ckpt", "checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}
+{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt", "checkpoints/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}
diff --git a/.../PR_TrainEnsemble/Train/epoch_metrics.csv → ...inEnsemble/OUTPUT/Train/epoch_metrics.csv b/.../PR_TrainEnsemble/Train/epoch_metrics.csv → ...inEnsemble/OUTPUT/Train/epoch_metrics.csv
diff --git a/...ts/PR_TrainEnsemble/Val/epoch_metrics.csv → ...rainEnsemble/OUTPUT/Val/epoch_metrics.csv b/...ts/PR_TrainEnsemble/Val/epoch_metrics.csv → ...rainEnsemble/OUTPUT/Val/epoch_metrics.csv
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/best_checkpoint.ckpt", "checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}
		{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt", "checkpoints/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}