Improve validation of modules.yaml (#9878)

This PR improves the validation of `modules.yaml` by introducing a custom validator that checks if an attribute listed in `properties` or `patternProperties` is a valid spec. This new check applied to the test case in #9857 gives:

```console
$ spack install szip
==> Error: /home/mculpo/.spack/linux/modules.yaml:5: "^python@2.7@" is an invalid spec [Invalid version specifier]
```

Details: 
* Moved the set-up of a custom validator class to spack.schema
  * In Spack we use `jsonschema` to validate configuration files 
    against a schema. We also need custom validators to enforce
    writing default values within "properties" or "patternProperties"
    attributes.

  * Currently, validators were customized at the place of use and with the
    recent introduction of environments that meant we were setting-up and
    using 2 different validator classes in two different modules.

  * This commit moves the set-up of a custom validator class in the
    `spack.schema` module and refactors the code in `spack.config` and
    `spack.environments` to use it.

* Added a custom validator to check if an attribute is a valid spec
  * Added a custom validator that can be used on objects, which yields an
    error if the attribute is not a valid spec.

* Updated the schema for modules.yaml

* Updated modules.yaml to fix a few inconsistencies:
  - a few attributes were not tested properly using 'anyOf'
  - suffixes has been updated to also check that the attribute is a spec
  - hierarchical_scheme has been updated to hierarchy

* Removed $ref from every schema
  * $ref is not composable or particularly legible
  * Use python dicts and regular old variables instead.
This commit is contained in:
Massimiliano Culpo 2019-01-01 09:11:49 +01:00 committed by Todd Gamblin
parent e6b2f0c179
commit 3b8b13809e
8 changed files with 269 additions and 169 deletions

View File

@ -562,6 +562,9 @@ def __getitem__(self, name):
def __contains__(self, element): def __contains__(self, element):
return element in self.instance return element in self.instance
def __call__(self, *args, **kwargs):
return self.instance(*args, **kwargs)
def __iter__(self): def __iter__(self):
return iter(self.instance) return iter(self.instance)

View File

@ -32,7 +32,6 @@
import copy import copy
import os import os
import re
import sys import sys
import multiprocessing import multiprocessing
from contextlib import contextmanager from contextlib import contextmanager
@ -49,6 +48,7 @@
import spack.paths import spack.paths
import spack.architecture import spack.architecture
import spack.schema
import spack.schema.compilers import spack.schema.compilers
import spack.schema.mirrors import spack.schema.mirrors
import spack.schema.repos import spack.schema.repos
@ -115,47 +115,6 @@ def first_existing(dictionary, keys):
raise KeyError("None of %s is in dict!" % keys) raise KeyError("None of %s is in dict!" % keys)
def _extend_with_default(validator_class):
"""Add support for the 'default' attr for properties and patternProperties.
jsonschema does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
import jsonschema
validate_properties = validator_class.VALIDATORS["properties"]
validate_pattern_properties = validator_class.VALIDATORS[
"patternProperties"]
def set_defaults(validator, properties, instance, schema):
for property, subschema in iteritems(properties):
if "default" in subschema:
instance.setdefault(
property, copy.deepcopy(subschema["default"]))
for err in validate_properties(
validator, properties, instance, schema):
yield err
def set_pp_defaults(validator, properties, instance, schema):
for property, subschema in iteritems(properties):
if "default" in subschema:
if isinstance(instance, dict):
for key, val in iteritems(instance):
if re.match(property, key) and val is None:
instance[key] = copy.deepcopy(subschema["default"])
for err in validate_pattern_properties(
validator, properties, instance, schema):
yield err
return jsonschema.validators.extend(validator_class, {
"properties": set_defaults,
"patternProperties": set_pp_defaults
})
class ConfigScope(object): class ConfigScope(object):
"""This class represents a configuration scope. """This class represents a configuration scope.
@ -697,17 +656,10 @@ def _validate(data, schema, set_defaults=True):
This leverages the line information (start_mark, end_mark) stored This leverages the line information (start_mark, end_mark) stored
on Spack YAML structures. on Spack YAML structures.
""" """
import jsonschema import jsonschema
if not hasattr(_validate, 'validator'):
default_setting_validator = _extend_with_default(
jsonschema.Draft4Validator)
_validate.validator = default_setting_validator
try: try:
_validate.validator(schema).validate(data) spack.schema.Validator(schema).validate(data)
except jsonschema.ValidationError as e: except jsonschema.ValidationError as e:
raise ConfigFormatError(e, data) raise ConfigFormatError(e, data)

View File

@ -8,7 +8,6 @@
import sys import sys
import shutil import shutil
import jsonschema
import ruamel.yaml import ruamel.yaml
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
@ -67,9 +66,6 @@
#: legal first keys in the spack.yaml manifest file #: legal first keys in the spack.yaml manifest file
env_schema_keys = ('spack', 'env') env_schema_keys = ('spack', 'env')
#: jsonschema validator for environments
_validator = None
def valid_env_name(name): def valid_env_name(name):
return re.match(valid_environment_name_re, name) return re.match(valid_environment_name_re, name)
@ -306,11 +302,9 @@ def all_environments():
def validate(data, filename=None): def validate(data, filename=None):
global _validator import jsonschema
if _validator is None:
_validator = jsonschema.Draft4Validator(spack.schema.env.schema)
try: try:
_validator.validate(data) spack.schema.Validator(spack.schema.env.schema).validate(data)
except jsonschema.ValidationError as e: except jsonschema.ValidationError as e:
raise spack.config.ConfigFormatError( raise spack.config.ConfigFormatError(
e, data, filename, e.instance.lc.line + 1) e, data, filename, e.instance.lc.line + 1)

View File

@ -4,3 +4,81 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""This module contains jsonschema files for all of Spack's YAML formats.""" """This module contains jsonschema files for all of Spack's YAML formats."""
import copy
import re
import six
import llnl.util.lang
import spack.spec
# jsonschema is imported lazily as it is heavy to import
# and increases the start-up time
def _make_validator():
import jsonschema
_validate_properties = jsonschema.Draft4Validator.VALIDATORS["properties"]
_validate_pattern_properties = jsonschema.Draft4Validator.VALIDATORS[
"patternProperties"
]
def _set_defaults(validator, properties, instance, schema):
"""Adds support for the 'default' attribute in 'properties'.
``jsonschema`` does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
for property, subschema in six.iteritems(properties):
if "default" in subschema:
instance.setdefault(
property, copy.deepcopy(subschema["default"]))
for err in _validate_properties(
validator, properties, instance, schema):
yield err
def _set_pp_defaults(validator, properties, instance, schema):
"""Adds support for the 'default' attribute in 'patternProperties'.
``jsonschema`` does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
for property, subschema in six.iteritems(properties):
if "default" in subschema:
if isinstance(instance, dict):
for key, val in six.iteritems(instance):
if re.match(property, key) and val is None:
instance[key] = copy.deepcopy(subschema["default"])
for err in _validate_pattern_properties(
validator, properties, instance, schema):
yield err
def _validate_spec(validator, is_spec, instance, schema):
"""Check if the attributes on instance are valid specs."""
import jsonschema
if not validator.is_type(instance, "object"):
return
for spec_str in instance:
try:
spack.spec.parse(spec_str)
except spack.spec.SpecParseError as e:
yield jsonschema.ValidationError(
'"{0}" is an invalid spec [{1}]'.format(spec_str, str(e))
)
return jsonschema.validators.extend(
jsonschema.Draft4Validator, {
"validate_spec": _validate_spec,
"properties": _set_defaults,
"patternProperties": _set_pp_defaults
}
)
Validator = llnl.util.lang.Singleton(_make_validator)

View File

@ -11,13 +11,11 @@
from llnl.util.lang import union_dicts from llnl.util.lang import union_dicts
import spack.schema.merged import spack.schema.merged
import spack.schema.modules
schema = { schema = {
'$schema': 'http://json-schema.org/schema#', '$schema': 'http://json-schema.org/schema#',
'title': 'Spack environment file schema', 'title': 'Spack environment file schema',
'definitions': spack.schema.modules.definitions,
'type': 'object', 'type': 'object',
'additionalProperties': False, 'additionalProperties': False,
'patternProperties': { 'patternProperties': {

View File

@ -6,7 +6,7 @@
"""Schema for configuration merged into one file. """Schema for configuration merged into one file.
.. literalinclude:: ../spack/schema/merged.py .. literalinclude:: ../spack/schema/merged.py
:lines: 40- :lines: 39-
""" """
from llnl.util.lang import union_dicts from llnl.util.lang import union_dicts
@ -33,7 +33,6 @@
schema = { schema = {
'$schema': 'http://json-schema.org/schema#', '$schema': 'http://json-schema.org/schema#',
'title': 'Spack merged configuration file schema', 'title': 'Spack merged configuration file schema',
'definitions': spack.schema.modules.definitions,
'type': 'object', 'type': 'object',
'additionalProperties': False, 'additionalProperties': False,
'properties': properties, 'properties': properties,

View File

@ -9,110 +9,110 @@
:lines: 13- :lines: 13-
""" """
#: Matches a spec or a multi-valued variant but not another
#: valid keyword.
#:
#: THIS NEEDS TO BE UPDATED FOR EVERY NEW KEYWORD THAT
#: IS ADDED IMMEDIATELY BELOW THE MODULE TYPE ATTRIBUTE
spec_regex = r'(?!hierarchy|verbose|hash_length|whitelist|' \
r'blacklist|naming_scheme|core_compilers|all)(^\w[\w-]*)'
#: Matches an anonymous spec, i.e. a spec without a root name
anonymous_spec_regex = r'^[\^@%+~]'
#: Definitions for parts of module schema #: Definitions for parts of module schema
definitions = { array_of_strings = {
'array_of_strings': { 'type': 'array', 'default': [], 'items': {'type': 'string'}
'type': 'array', }
'default': [],
'items': { dictionary_of_strings = {
'type': 'string' 'type': 'object', 'patternProperties': {r'\w[\w-]*': {'type': 'string'}}
} }
},
'dictionary_of_strings': { dependency_selection = {'type': 'string', 'enum': ['none', 'direct', 'all']}
'type': 'object',
'patternProperties': { module_file_configuration = {
r'\w[\w-]*': { # key 'type': 'object',
'type': 'string' 'default': {},
} 'additionalProperties': False,
} 'properties': {
}, 'filter': {
'dependency_selection': { 'type': 'object',
'type': 'string', 'default': {},
'enum': ['none', 'direct', 'all'] 'additionalProperties': False,
}, 'properties': {
'module_file_configuration': { 'environment_blacklist': {
'type': 'object', 'type': 'array',
'default': {}, 'default': [],
'additionalProperties': False, 'items': {
'properties': { 'type': 'string'
'filter': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'environment_blacklist': {
'type': 'array',
'default': [],
'items': {
'type': 'string'
}
} }
} }
}, }
'template': { },
'type': 'string' 'template': {
}, 'type': 'string'
'autoload': { },
'$ref': '#/definitions/dependency_selection'}, 'autoload': dependency_selection,
'prerequisites': { 'prerequisites': dependency_selection,
'$ref': '#/definitions/dependency_selection'}, 'conflict': array_of_strings,
'conflict': { 'load': array_of_strings,
'$ref': '#/definitions/array_of_strings'}, 'suffixes': {
'load': { 'type': 'object',
'$ref': '#/definitions/array_of_strings'}, 'validate_spec': True,
'suffixes': { 'patternProperties': {
'$ref': '#/definitions/dictionary_of_strings'}, r'\w[\w-]*': { # key
'environment': { 'type': 'string'
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'set': {
'$ref': '#/definitions/dictionary_of_strings'},
'unset': {
'$ref': '#/definitions/array_of_strings'},
'prepend_path': {
'$ref': '#/definitions/dictionary_of_strings'},
'append_path': {
'$ref': '#/definitions/dictionary_of_strings'}
} }
} }
},
'environment': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'set': dictionary_of_strings,
'unset': array_of_strings,
'prepend_path': dictionary_of_strings,
'append_path': dictionary_of_strings
}
} }
},
'module_type_configuration': {
'type': 'object',
'default': {},
'anyOf': [
{'properties': {
'verbose': {
'type': 'boolean',
'default': False
},
'hash_length': {
'type': 'integer',
'minimum': 0,
'default': 7
},
'whitelist': {
'$ref': '#/definitions/array_of_strings'},
'blacklist': {
'$ref': '#/definitions/array_of_strings'},
'blacklist_implicits': {
'type': 'boolean',
'default': False
},
'naming_scheme': {
'type': 'string' # Can we be more specific here?
}
}},
{'patternProperties': {
r'\w[\w-]*': {
'$ref': '#/definitions/module_file_configuration'
}
}}
]
} }
},
module_type_configuration = {
'type': 'object',
'default': {},
'allOf': [
{'properties': {
'verbose': {
'type': 'boolean',
'default': False
},
'hash_length': {
'type': 'integer',
'minimum': 0,
'default': 7
},
'whitelist': array_of_strings,
'blacklist': array_of_strings,
'blacklist_implicits': {
'type': 'boolean',
'default': False
},
'naming_scheme': {
'type': 'string' # Can we be more specific here?
},
'all': module_file_configuration,
}
},
{'validate_spec': True,
'patternProperties': {
spec_regex: module_file_configuration,
anonymous_spec_regex: module_file_configuration,
}
}
]
} }
@ -127,8 +127,9 @@
'type': 'object', 'type': 'object',
'patternProperties': { 'patternProperties': {
# prefix-relative path to be inspected for existence # prefix-relative path to be inspected for existence
r'\w[\w-]*': { r'\w[\w-]*': array_of_strings
'$ref': '#/definitions/array_of_strings'}}}, }
},
'enable': { 'enable': {
'type': 'array', 'type': 'array',
'default': [], 'default': [],
@ -138,28 +139,27 @@
'lmod': { 'lmod': {
'allOf': [ 'allOf': [
# Base configuration # Base configuration
{'$ref': '#/definitions/module_type_configuration'}, module_type_configuration,
{ {
'core_compilers': { 'type': 'object',
'$ref': '#/definitions/array_of_strings' 'properties': {
'core_compilers': array_of_strings,
'hierarchy': array_of_strings
}, },
'hierarchical_scheme': {
'$ref': '#/definitions/array_of_strings'
}
} # Specific lmod extensions } # Specific lmod extensions
] ]
}, },
'tcl': { 'tcl': {
'allOf': [ 'allOf': [
# Base configuration # Base configuration
{'$ref': '#/definitions/module_type_configuration'}, module_type_configuration,
{} # Specific tcl extensions {} # Specific tcl extensions
] ]
}, },
'dotkit': { 'dotkit': {
'allOf': [ 'allOf': [
# Base configuration # Base configuration
{'$ref': '#/definitions/module_type_configuration'}, module_type_configuration,
{} # Specific dotkit extensions {} # Specific dotkit extensions
] ]
}, },
@ -172,7 +172,6 @@
schema = { schema = {
'$schema': 'http://json-schema.org/schema#', '$schema': 'http://json-schema.org/schema#',
'title': 'Spack module file configuration file schema', 'title': 'Spack module file configuration file schema',
'definitions': definitions,
'type': 'object', 'type': 'object',
'additionalProperties': False, 'additionalProperties': False,
'properties': properties, 'properties': properties,

View File

@ -0,0 +1,77 @@
# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import jsonschema
import pytest
import spack.schema
@pytest.fixture()
def validate_spec_schema():
return {
'type': 'object',
'validate_spec': True,
'patternProperties': {
r'\w[\w-]*': {
'type': 'string'
}
}
}
@pytest.fixture()
def module_suffixes_schema():
return {
'type': 'object',
'properties': {
'tcl': {
'type': 'object',
'patternProperties': {
r'\w[\w-]*': {
'type': 'object',
'properties': {
'suffixes': {
'validate_spec': True,
'patternProperties': {
r'\w[\w-]*': {
'type': 'string',
}
}
}
}
}
}
}
}
}
@pytest.mark.regression('9857')
def test_validate_spec(validate_spec_schema):
v = spack.schema.Validator(validate_spec_schema)
data = {'foo@3.7': 'bar'}
# Validate good data (the key is a spec)
v.validate(data)
# Check that invalid data throws
data['^python@3.7@'] = 'baz'
with pytest.raises(jsonschema.ValidationError) as exc_err:
v.validate(data)
assert 'is an invalid spec' in str(exc_err.value)
@pytest.mark.regression('9857')
def test_module_suffixes(module_suffixes_schema):
v = spack.schema.Validator(module_suffixes_schema)
data = {'tcl': {'all': {'suffixes': {'^python@2.7@': 'py2.7'}}}}
with pytest.raises(jsonschema.ValidationError) as exc_err:
v.validate(data)
assert 'is an invalid spec' in str(exc_err.value)