From d1331e5f36c352cc81c86679037a221269a493b0 Mon Sep 17 00:00:00 2001
From: Juan Nunez-Iglesias <jni@fastmail.com>
Date: Tue, 12 Nov 2024 17:23:03 -0600
Subject: [PATCH] Make autoloading more robust to bad yaml files (#114)

Having an incorrect yaml file in the default paths (root dir and config dir)
can cause the plugin to crash completely. I ran into this in #104, which was
particularly bad because the plugin wrote the inconsistent file in the first
place. However, there's many reasons why bad yaml might exist in the
default directories, including:

- user incorrectly edits a file
- the schema changes
- valid action IDs change
- the version of the plugin changes (schema and paths aren't pinned to
  versions)

Non-yaml files are already ignored, so I don't think it's a big difference if
we also ignore invalid yaml files. Until we provide utilities to clean up the
directories, I think robustness to bad input is very important.

In this PR, if we encounter any error while loading all models from given
directories, we ignore the error and move on to the next file.
---
 .../models/_tests/test_utils.py               | 27 +++++++++++++
 midi_app_controller/models/utils.py           | 39 ++++++++++++-------
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/midi_app_controller/models/_tests/test_utils.py b/midi_app_controller/models/_tests/test_utils.py
index ff35970..368f54b 100644
--- a/midi_app_controller/models/_tests/test_utils.py
+++ b/midi_app_controller/models/_tests/test_utils.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from typing import Optional
 
@@ -24,6 +25,11 @@ def yaml_data() -> dict:
     return {"key2": "value2", "key1": "value1", "key3": ["a", "b"], "key4": None}
 
 
+@pytest.fixture
+def other_yaml_data() -> dict:
+    return {"other_key": "hello"}
+
+
 @pytest.fixture
 def yaml_str(yaml_data) -> str:
     dumped = yaml.safe_dump(yaml_data, default_flow_style=False, sort_keys=False)
@@ -51,6 +57,27 @@ def test_load_from_when_invalid_data(yaml_file, yaml_data):
         OtherTempYamlModel.load_from(yaml_file)
 
 
+def test_load_all_from_robustness(tmp_path, yaml_data, other_yaml_data):
+    d1 = tmp_path / "1"
+    os.mkdir(d1)
+    d2 = tmp_path / "2"
+    os.mkdir(d2)
+    non_existent_dir = tmp_path / "none"
+    with open(d1 / "m1.yaml", "w") as f:
+        yaml.safe_dump(yaml_data, f)
+    with open(d1 / "m2.yaml", "w") as f:
+        yaml.safe_dump(other_yaml_data, f)
+    with open(d2 / "m1.yml", "w") as f:
+        yaml.safe_dump(yaml_data, f)
+    with open(d2 / "distractor.txt", "w") as f:
+        f.write("not relevant\n")
+    with pytest.warns(UserWarning, match="Unable to load model"):
+        models = TempYamlModel.load_all_from([d1, d2, non_existent_dir])
+    assert len(models) == 2
+    assert models[0][1] == d1 / "m1.yaml"
+    assert models[1][1] == d2 / "m1.yml"
+
+
 def test_save_to(yaml_data, yaml_str, tmp_path):
     model = TempYamlModel(**yaml_data)
     yaml_file = tmp_path / "saved.yaml"
diff --git a/midi_app_controller/models/utils.py b/midi_app_controller/models/utils.py
index 6960bab..57187be 100644
--- a/midi_app_controller/models/utils.py
+++ b/midi_app_controller/models/utils.py
@@ -1,10 +1,12 @@
+import itertools
 import os
 import uuid
 from pathlib import Path
 from typing import Any, Optional
+from warnings import warn
 
 import yaml
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 
 from midi_app_controller.config import Config
 from midi_app_controller.gui.utils import is_subpath
@@ -19,6 +21,11 @@ def _path_representer(dumper, data):
 yaml.SafeDumper.add_multi_representer(Path, _path_representer)
 
 
+def _abs_listdir(d: Path) -> list[Path]:
+    """List the contents of directory as absolute paths."""
+    return [d / p for p in os.listdir(d)]
+
+
 class YamlBaseModel(BaseModel):
     @classmethod
     def load_from(cls, path: Path) -> "YamlBaseModel":
@@ -43,8 +50,9 @@ def load_from(cls, path: Path) -> "YamlBaseModel":
     def load_all_from(
         cls, directories: list[Path]
     ) -> list[tuple["YamlBaseModel", Path]]:
-        """Creates models initialized with data from all YAML files in
-        multiple directories.
+        """Return models with data from all YAML files in multiple directories.
+
+        If a yaml file fails to load, it is skipped and a warning is emitted.
 
         Parameters
         ----------
@@ -56,16 +64,21 @@ def load_all_from(
         list[tuple[cls, Path]]
             List of created models with paths to corresponding YAML files.
         """
-        return [
-            (
-                cls.load_from(directory / filename),
-                directory / filename,
-            )
-            for directory in directories
-            if directory.exists()
-            for filename in os.listdir(directory)
-            if filename.lower().endswith((".yaml", ".yml"))
-        ]
+        all_models = []
+        real_directories = filter(os.path.exists, directories)
+        fns = itertools.chain(*map(_abs_listdir, real_directories))
+        yamls = (fn for fn in fns if fn.suffix in {".yaml", ".yml"})
+        for fn in yamls:
+            try:
+                model = cls.load_from(fn)
+                all_models.append((model, fn))
+            except (ValidationError, Exception) as e:
+                warn(
+                    f"Unable to load model from file {fn}; got error:\n"
+                    f"{e.__class__}: {e}",
+                    stacklevel=2,
+                )
+        return all_models
 
     def save_to(self, path: Path) -> None:
         """Saves the model's data to a YAML file.