diff --git a/odml/doc.py b/odml/doc.py index 29f366a0..fa8592bf 100644 --- a/odml/doc.py +++ b/odml/doc.py @@ -8,6 +8,7 @@ from . import dtypes from . import format as fmt from . import terminology +from . import validation from .tools.doc_inherit import inherit_docstring, allow_inherit_docstring @@ -152,6 +153,14 @@ def finalize(self): if sec._include is not None: sec.include = sec._include + def validate(self): + """ + Runs a validation on itself and returns the Validation object. + + :return: odml.Validation + """ + return validation.Validation(self) + @inherit_docstring def get_terminology_equivalent(self): if self.repository is None: diff --git a/odml/property.py b/odml/property.py index 54d267fa..82537bda 100644 --- a/odml/property.py +++ b/odml/property.py @@ -213,6 +213,11 @@ def name(self, new_name): if self.name == new_name: return + # Make sure name cannot be set to None or empty + if not new_name: + self._name = self._id + return + curr_parent = self.parent if hasattr(curr_parent, "properties") and new_name in curr_parent.properties: @@ -569,11 +574,12 @@ def _values_cardinality_validation(self): is respected and prints a warning message otherwise. """ valid = validation.Validation(self) + val_id = validation.IssueID.property_values_cardinality # Make sure to display only warnings of the current property - res = [curr for curr in valid.errors if self.id == curr.obj.id] - for err in res: - print("%s: %s" % (err.rank.capitalize(), err.msg)) + for curr in valid.errors: + if curr.validation_id == val_id and self.id == curr.obj.id: + print("%s: %s" % (curr.rank.capitalize(), curr.msg)) def set_values_cardinality(self, min_val=None, max_val=None): """ diff --git a/odml/section.py b/odml/section.py index 536a56ef..3a015978 100644 --- a/odml/section.py +++ b/odml/section.py @@ -171,6 +171,11 @@ def name(self, new_value): if self.name == new_value: return + # Make sure name cannot be set to None or empty + if not new_value: + self._name = self._id + return + curr_parent = self.parent if hasattr(curr_parent, "sections") and new_value in curr_parent.sections: raise KeyError("Object with the same name already exists!") @@ -416,10 +421,12 @@ def _sections_cardinality_validation(self): is respected and prints a warning message otherwise. """ valid = validation.Validation(self) + val_id = validation.IssueID.section_sections_cardinality + # Make sure to display only warnings of the current section - res = [curr for curr in valid.errors if self.id == curr.obj.id] - for err in res: - print("%s: %s" % (err.rank.capitalize(), err.msg)) + for curr in valid.errors: + if curr.validation_id == val_id and self.id == curr.obj.id: + print("%s: %s" % (curr.rank.capitalize(), curr.msg)) @property def prop_cardinality(self): @@ -469,10 +476,12 @@ def _properties_cardinality_validation(self): is respected and prints a warning message otherwise. """ valid = validation.Validation(self) + val_id = validation.IssueID.section_properties_cardinality + # Make sure to display only warnings of the current section - res = [curr for curr in valid.errors if self.id == curr.obj.id] - for err in res: - print("%s: %s" % (err.rank.capitalize(), err.msg)) + for curr in valid.errors: + if curr.validation_id == val_id and self.id == curr.obj.id: + print("%s: %s" % (curr.rank.capitalize(), curr.msg)) @inherit_docstring def get_terminology_equivalent(self): diff --git a/odml/tools/odmlparser.py b/odml/tools/odmlparser.py index 8a25a61e..1e3f8f6a 100644 --- a/odml/tools/odmlparser.py +++ b/odml/tools/odmlparser.py @@ -7,6 +7,7 @@ import datetime import json import sys +import warnings from os.path import basename @@ -61,11 +62,18 @@ def write_file(self, odml_document, filename): msg = "" for err in validation.errors: if err.is_error: - msg += "\n\t- %s %s: %s" % (err.obj, err.rank, err.msg) + # msg += "\n\t- %s %s: %s" % (err.obj, err.rank, err.msg) + msg += "\n- %s" % err if msg != "": msg = "Resolve document validation errors before saving %s" % msg raise ParserException(msg) + report = validation.report() + if report: + msg += "The saved Document contains unresolved issues." + msg += " Run the Documents 'validate' method to access them.\n%s" % report + warnings.warn(msg) + with open(filename, 'w') as file: # Add XML header to support odML stylesheets. if self.parser == 'XML': @@ -153,6 +161,13 @@ def __init__(self, parser='XML', show_warnings=True): self.show_warnings = show_warnings self.warnings = [] + def _validation_warning(self): + report = Validation(self.doc).report() + if report: + msg = "The loaded Document contains unresolved issues." + msg += " Run the Documents 'validate' method to access them.\n%s" % report + warnings.warn(msg) + def from_file(self, file, doc_format=None): """ Loads an odML document from a file. The ODMLReader.parser specifies the @@ -171,6 +186,11 @@ def from_file(self, file, doc_format=None): show_warnings=self.show_warnings) self.warnings = par.warnings self.doc = par.from_file(file) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'YAML': @@ -188,6 +208,11 @@ def from_file(self, file, doc_format=None): self.doc = par.to_odml(self.parsed_doc) # Provide original file name via the in memory document self.doc.origin_file_name = basename(file) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'JSON': @@ -202,13 +227,27 @@ def from_file(self, file, doc_format=None): self.doc = par.to_odml(self.parsed_doc) # Provide original file name via the in memory document self.doc.origin_file_name = basename(file) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'RDF': if not doc_format: raise ValueError("Format of the rdf file was not specified") + # Importing from an RDF graph can return multiple documents self.doc = RDFReader().from_file(file, doc_format) + + for doc in self.doc: + report = Validation(doc).report() + if report: + msg = "The loaded Document contains unresolved issues." + msg += " Run the Documents 'validate' method to access them.\n%s" % report + warnings.warn(msg) + return self.doc def from_string(self, string, doc_format=None): @@ -227,6 +266,11 @@ def from_string(self, string, doc_format=None): if self.parser == 'XML': self.doc = xmlparser.XMLReader().from_string(string) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'YAML': @@ -237,6 +281,11 @@ def from_string(self, string, doc_format=None): return self.doc = DictReader().to_odml(self.parsed_doc) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'JSON': @@ -247,13 +296,27 @@ def from_string(self, string, doc_format=None): return self.doc = DictReader().to_odml(self.parsed_doc) + + # Print validation warnings after parsing + if self.show_warnings: + self._validation_warning() + return self.doc if self.parser == 'RDF': if not doc_format: raise ValueError("Format of the rdf file was not specified") + # Importing from an RDF graph can return multiple documents self.doc = RDFReader().from_string(string, doc_format) + + for doc in self.doc: + report = Validation(doc).report() + if report: + msg = "The loaded Document contains unresolved issues." + msg += " Run the Documents 'validate' method to access them.\n%s" % report + warnings.warn(msg) + return self.doc diff --git a/odml/validation.py b/odml/validation.py index 2107e5ac..800fc793 100644 --- a/odml/validation.py +++ b/odml/validation.py @@ -4,12 +4,54 @@ """ import re + +from enum import Enum + from . import dtypes +try: + unicode = unicode +except NameError: + unicode = str + LABEL_ERROR = 'error' LABEL_WARNING = 'warning' +class IssueID(Enum): + """ + IDs identifying registered validation handlers. + """ + unspecified = 1 + + # Required attributes validations + object_required_attributes = 101 + section_type_must_be_defined = 102 + + # Unique id, name and type validations + section_unique_ids = 200 + property_unique_ids = 201 + section_unique_name_type = 202 + property_unique_name = 203 + + # Good form validations + object_name_readable = 300 + + # Property specific validations + property_terminology_check = 400 + property_dependency_check = 401 + property_values_check = 402 + property_values_string_check = 403 + + # Cardinality validations + section_properties_cardinality = 500 + section_sections_cardinality = 501 + property_values_cardinality = 502 + + # Optional validations + section_repository_present = 600 + + class ValidationError(object): """ Represents an error found in the validation process. @@ -18,10 +60,11 @@ class ValidationError(object): a message and a rank which may be one of: 'error', 'warning'. """ - def __init__(self, obj, msg, rank=LABEL_ERROR): + def __init__(self, obj, msg, rank=LABEL_ERROR, validation_id=None): self.obj = obj self.msg = msg self.rank = rank + self.validation_id = validation_id @property def is_warning(self): @@ -45,9 +88,15 @@ def path(self): return self.obj.get_path() def __repr__(self): - return "" % (self.rank, - self.obj, - self.msg) + # Cleanup the odml object print strings + print_str = unicode(self.obj).split()[0].split("[")[0].split(":")[0] + # Document has no name attribute and should not print id or name info + if hasattr(self.obj, "name"): + if self.obj.name and self.obj.name != self.obj.id: + print_str = "%s[%s]" % (print_str, self.obj.name) + else: + print_str = "%s[%s]" % (print_str, self.obj.id) + return "Validation%s: %s '%s'" % (self.rank.capitalize(), print_str, self.msg) class Validation(object): @@ -124,6 +173,36 @@ def run_validation(self): for prop in sec.properties: self.validate(prop) + def report(self): + """ + Validates the registered object and returns a results report. + """ + self.run_validation() + + err_count = 0 + reduce = set() + sec_count = 0 + prop_count = 0 + + for i in self.errors: + if i.is_error: + err_count += 1 + + if i.obj not in reduce and 'section' in str(i.obj).lower(): + sec_count += 1 + elif i.obj not in reduce and 'property' in str(i.obj).lower(): + prop_count += 1 + + reduce.add(i.obj) + + warn_count = len(self.errors) - err_count + msg = "" + if err_count or warn_count: + msg = "Validation found %s errors and %s warnings" % (err_count, warn_count) + msg += " in %s Sections and %s Properties." % (sec_count, prop_count) + + return msg + def register_custom_handler(self, klass, handler): """ Adds a validation handler for an odml class. The handler is called in the @@ -160,17 +239,18 @@ def object_required_attributes(obj): :param obj: document, section or property. """ + validation_id = IssueID.object_required_attributes args = obj.format().arguments for arg in args: if arg[1] == 1: msg = "Missing required attribute '%s'" % (arg[0]) if not hasattr(obj, arg[0]): - yield ValidationError(obj, msg, LABEL_ERROR) + yield ValidationError(obj, msg, LABEL_ERROR, validation_id) continue obj_arg = getattr(obj, arg[0]) if not obj_arg and not isinstance(obj_arg, bool): - yield ValidationError(obj, msg, LABEL_ERROR) + yield ValidationError(obj, msg, LABEL_ERROR, validation_id) Validation.register_handler('odML', object_required_attributes) @@ -184,37 +264,40 @@ def section_type_must_be_defined(sec): :param sec: odml.Section. """ + validation_id = IssueID.section_type_must_be_defined + if sec.type and sec.type == "n.s.": - yield ValidationError(sec, "Section type not specified", LABEL_WARNING) + yield ValidationError(sec, "Section type not specified", LABEL_WARNING, validation_id) Validation.register_handler('section', section_type_must_be_defined) +# The Section repository present is no longer part of the default validation +# and should be added on demand. def section_repository_present(sec): """ 1. warn, if a section has no repository or 2. the section type is not present in the repository """ + validation_id = IssueID.section_repository_present + repo = sec.get_repository() if repo is None: msg = "A section should have an associated repository" - yield ValidationError(sec, msg, LABEL_WARNING) + yield ValidationError(sec, msg, LABEL_WARNING, validation_id) return try: tsec = sec.get_terminology_equivalent() except Exception as exc: msg = "Could not load terminology: %s" % exc - yield ValidationError(sec, msg, LABEL_WARNING) + yield ValidationError(sec, msg, LABEL_WARNING, validation_id) return if tsec is None: msg = "Section type '%s' not found in terminology" % sec.type - yield ValidationError(sec, msg, LABEL_WARNING) - - -Validation.register_handler('section', section_repository_present) + yield ValidationError(sec, msg, LABEL_WARNING, validation_id) def document_unique_ids(doc): @@ -244,6 +327,8 @@ def section_unique_ids(parent, id_map=None): :param parent: odML Document or Section :param id_map: "id":"odML object / path" dictionary """ + validation_id = IssueID.section_unique_ids + if not id_map: id_map = {} @@ -253,7 +338,7 @@ def section_unique_ids(parent, id_map=None): if sec.id in id_map: msg = "Duplicate id in Section '%s' and %s" % (sec.get_path(), id_map[sec.id]) - yield ValidationError(sec, msg) + yield ValidationError(sec, msg, validation_id=validation_id) else: id_map[sec.id] = "Section '%s'" % sec.get_path() @@ -273,6 +358,8 @@ def property_unique_ids(section, id_map=None): :param section: odML Section :param id_map: "id":"odML object / path" dictionary """ + validation_id = IssueID.property_unique_ids + if not id_map: id_map = {} @@ -280,7 +367,7 @@ def property_unique_ids(section, id_map=None): if prop.id in id_map: msg = "Duplicate id in Property '%s' and %s" % (prop.get_path(), id_map[prop.id]) - yield ValidationError(prop, msg) + yield ValidationError(prop, msg, validation_id=validation_id) else: id_map[prop.id] = "Property '%s'" % prop.get_path() @@ -288,24 +375,26 @@ def property_unique_ids(section, id_map=None): Validation.register_handler('odML', document_unique_ids) -def object_unique_names(obj, children, attr=lambda x: x.name, +def object_unique_names(obj, validation_id, children, attr=lambda x: x.name, msg="Object names must be unique"): """ - Tests that object names within one Section are unique. + Tests that object names within a Section are unique. :param obj: odml class instance the validation is applied on. + :param validation_id: id of the :param children: a function that returns the children to be considered. - This is to be able to use the same function for sections and properties. - :param attr: a function that returns the item that needs to be unique - :param msg: error message that will be registered upon a ValidationError. + Required when handling Sections. + :param attr: a function that returns the attribute that needs to be unique. + :param msg: error message that will be registered with a ValidationError. """ names = set(map(attr, children(obj))) if len(names) == len(children(obj)): - return # quick exit + return + names = set() for i in children(obj): if attr(i) in names: - yield ValidationError(i, msg, LABEL_ERROR) + yield ValidationError(i, msg, LABEL_ERROR, validation_id) names.add(attr(i)) @@ -317,6 +406,7 @@ def section_unique_name_type(obj): """ for i in object_unique_names( obj, + validation_id=IssueID.section_unique_name_type, attr=lambda x: (x.name, x.type), children=lambda x: x.sections, msg="name/type combination must be unique"): @@ -329,7 +419,9 @@ def property_unique_names(obj): :param obj: odml class instance the validation is applied on. """ - for i in object_unique_names(obj, lambda x: x.properties): + for i in object_unique_names(obj, + validation_id=IssueID.property_unique_name, + children=lambda x: x.properties): yield i @@ -344,8 +436,10 @@ def object_name_readable(obj): :param obj: odml.Section or odml.Property. """ + validation_id = IssueID.object_name_readable + if obj.name == obj.id: - yield ValidationError(obj, 'Name should be readable', LABEL_WARNING) + yield ValidationError(obj, "Name not assigned", LABEL_WARNING, validation_id) Validation.register_handler('section', object_name_readable) @@ -358,6 +452,8 @@ def property_terminology_check(prop): 2. warn, if there are multiple values with different units or the unit does not match the one in the terminology. """ + validation_id = IssueID.property_terminology_check + if not prop.parent: return @@ -368,7 +464,7 @@ def property_terminology_check(prop): tsec.properties[prop.name] except KeyError: msg = "Property '%s' not found in terminology" % prop.name - yield ValidationError(prop, msg, LABEL_WARNING) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) Validation.register_handler('property', property_terminology_check) @@ -379,6 +475,8 @@ def property_dependency_check(prop): Produces a warning if the dependency attribute refers to a non-existent attribute or the dependency_value does not match. """ + validation_id = IssueID.property_dependency_check + if not prop.parent: return @@ -390,12 +488,12 @@ def property_dependency_check(prop): dep_obj = prop.parent[dep] except KeyError: msg = "Property refers to a non-existent dependency object" - yield ValidationError(prop, msg, LABEL_WARNING) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) return if prop.dependency_value not in dep_obj.values[0]: msg = "Dependency-value is not equal to value of the property's dependency" - yield ValidationError(prop, msg, LABEL_WARNING) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) Validation.register_handler('property', property_dependency_check) @@ -408,6 +506,7 @@ def property_values_check(prop): :param prop: property the validation is applied on. """ + validation_id = IssueID.property_values_check if prop.dtype is not None and prop.dtype != "": dtype = prop.dtype @@ -421,13 +520,13 @@ def property_values_check(prop): tuple_len = int(dtype[:-6]) if len(val) != tuple_len: msg = "Tuple of length %s not consistent with dtype %s!" % (len(val), dtype) - yield ValidationError(prop, msg, LABEL_WARNING) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) else: try: dtypes.get(val, dtype) except ValueError: msg = "Property values not of consistent dtype!" - yield ValidationError(prop, msg, LABEL_WARNING) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) Validation.register_handler('property', property_values_check) @@ -441,6 +540,7 @@ def property_values_string_check(prop): :param prop: property the validation is applied on. """ + validation_id = IssueID.property_values_string_check if prop.dtype != "string" or not prop.values: return @@ -479,14 +579,15 @@ def property_values_string_check(prop): res_dtype = "string" if res_dtype != "string": - msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % (prop.name, res_dtype) - yield ValidationError(prop, msg, LABEL_WARNING) + msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % \ + (prop.name, res_dtype) + yield ValidationError(prop, msg, LABEL_WARNING, validation_id) Validation.register_handler('property', property_values_string_check) -def _cardinality_validation(obj, cardinality, card_target_attr, validation_rank): +def _cardinality_validation(obj, cardinality, card_target_attr, validation_rank, validation_id): """ Helper function that validates the cardinality of an odml object attribute. Valid object-attribute combinations are Section-sections, Section-properties and @@ -498,6 +599,7 @@ def _cardinality_validation(obj, cardinality, card_target_attr, validation_rank) applied against. Supported values are: 'sections', 'properties' or 'values' :param validation_rank: Rank of the yielded ValidationError. + :param validation_id: string containing the id of the parent validation. :return: Returns a ValidationError, if a set cardinality is not met or None. """ @@ -521,7 +623,7 @@ def _cardinality_validation(obj, cardinality, card_target_attr, validation_rank) msg = "%s %s cardinality violated" % (obj_name, card_target_attr) msg += " (%s values, %s found)" % (invalid_cause, val_len) - err = ValidationError(obj, msg, validation_rank) + err = ValidationError(obj, msg, validation_rank, validation_id) return err @@ -534,7 +636,10 @@ def section_properties_cardinality(obj): :return: Yields a ValidationError warning, if a set cardinality is not met. """ - err = _cardinality_validation(obj, obj.prop_cardinality, 'properties', LABEL_WARNING) + validation_id = IssueID.section_properties_cardinality + + err = _cardinality_validation(obj, obj.prop_cardinality, 'properties', + LABEL_WARNING, validation_id) if err: yield err @@ -550,7 +655,10 @@ def section_sections_cardinality(obj): :return: Yields a ValidationError warning, if a set cardinality is not met. """ - err = _cardinality_validation(obj, obj.sec_cardinality, 'sections', LABEL_WARNING) + validation_id = IssueID.section_sections_cardinality + + err = _cardinality_validation(obj, obj.sec_cardinality, 'sections', + LABEL_WARNING, validation_id) if err: yield err @@ -566,7 +674,10 @@ def property_values_cardinality(obj): :return: Yields a ValidationError warning, if a set cardinality is not met. """ - err = _cardinality_validation(obj, obj.val_cardinality, 'values', LABEL_WARNING) + validation_id = IssueID.property_values_cardinality + + err = _cardinality_validation(obj, obj.val_cardinality, 'values', + LABEL_WARNING, validation_id) if err: yield err diff --git a/test/test_validation.py b/test/test_validation.py index d5aded91..a03b1725 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -195,21 +195,6 @@ def test_section_sections_cardinality(self): self.assertTrue(found) - def test_section_in_terminology(self): - doc = samplefile.parse("""s1[T1]""") - res = Validate(doc) - self.assertError(res, "A section should have an associated repository", - filter_rep=False) - - odml.terminology.terminologies['map'] = samplefile.parse(""" - s0[t0] - - S1[T1] - """) - doc.sections[0].repository = 'map' - res = Validate(doc) - # self.assertEqual(list(self.filter_mapping_errors(res.errors)), []) - self.assertEqual(res.errors, []) - def test_uniques(self): self.assertRaises(KeyError, samplefile.parse, """ s1[t1] @@ -291,7 +276,7 @@ def test_section_name_readable(self): sec = odml.Section("sec", parent=doc) sec.name = sec.id res = Validate(doc) - self.assertError(res, "Name should be readable") + self.assertError(res, "Name not assigned") def test_property_name_readable(self): """ @@ -302,7 +287,7 @@ def test_property_name_readable(self): prop = odml.Property("prop", parent=sec) prop.name = prop.id res = Validate(doc) - self.assertError(res, "Name should be readable") + self.assertError(res, "Name not assigned") def test_standalone_section(self): """ @@ -482,3 +467,27 @@ def test_load_dtypes_yaml(self): path = os.path.join(RES_DIR, "validation_dtypes.yaml") doc = odml.load(path, "YAML") self.load_dtypes_validation(doc) + + def test_section_in_terminology(self): + """ + Test optional section in terminology validation. + """ + doc = samplefile.parse("""s1[T1]""") + + # Set up custom validation and add section_repository_present handler + res = odml.validation.Validation(doc, validate=False, reset=True) + handler = odml.validation.section_repository_present + res.register_custom_handler('section', handler) + + res.run_validation() + self.assertError(res, "A section should have an associated repository", + filter_rep=False) + + odml.terminology.terminologies['map'] = samplefile.parse(""" + s0[t0] + - S1[T1] + """) + doc.sections[0].repository = 'map' + res = Validate(doc) + # self.assertEqual(list(self.filter_mapping_errors(res.errors)), []) + self.assertEqual(res.errors, [])