Improve validation of modules.yaml (#9878)

This PR improves the validation of `modules.yaml` by introducing a custom validator that checks if an attribute listed in `properties` or `patternProperties` is a valid spec. This new check applied to the test case in #9857 gives:

```console
$ spack install szip
==> Error: /home/mculpo/.spack/linux/modules.yaml:5: "^python@2.7@" is an invalid spec [Invalid version specifier]
```

Details: 
* Moved the set-up of a custom validator class to spack.schema
  * In Spack we use `jsonschema` to validate configuration files 
    against a schema. We also need custom validators to enforce
    writing default values within "properties" or "patternProperties"
    attributes.

  * Currently, validators were customized at the place of use and with the
    recent introduction of environments that meant we were setting-up and
    using 2 different validator classes in two different modules.

  * This commit moves the set-up of a custom validator class in the
    `spack.schema` module and refactors the code in `spack.config` and
    `spack.environments` to use it.

* Added a custom validator to check if an attribute is a valid spec
  * Added a custom validator that can be used on objects, which yields an
    error if the attribute is not a valid spec.

* Updated the schema for modules.yaml

* Updated modules.yaml to fix a few inconsistencies:
  - a few attributes were not tested properly using 'anyOf'
  - suffixes has been updated to also check that the attribute is a spec
  - hierarchical_scheme has been updated to hierarchy

* Removed $ref from every schema
  * $ref is not composable or particularly legible
  * Use python dicts and regular old variables instead.
This commit is contained in:
Massimiliano Culpo 2019-01-01 09:11:49 +01:00 committed by Todd Gamblin
parent e6b2f0c179
commit 3b8b13809e
8 changed files with 269 additions and 169 deletions

View File

@ -562,6 +562,9 @@ def __getitem__(self, name):
def __contains__(self, element):
return element in self.instance
def __call__(self, *args, **kwargs):
return self.instance(*args, **kwargs)
def __iter__(self):
return iter(self.instance)

View File

@ -32,7 +32,6 @@
import copy
import os
import re
import sys
import multiprocessing
from contextlib import contextmanager
@ -49,6 +48,7 @@
import spack.paths
import spack.architecture
import spack.schema
import spack.schema.compilers
import spack.schema.mirrors
import spack.schema.repos
@ -115,47 +115,6 @@ def first_existing(dictionary, keys):
raise KeyError("None of %s is in dict!" % keys)
def _extend_with_default(validator_class):
"""Add support for the 'default' attr for properties and patternProperties.
jsonschema does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
import jsonschema
validate_properties = validator_class.VALIDATORS["properties"]
validate_pattern_properties = validator_class.VALIDATORS[
"patternProperties"]
def set_defaults(validator, properties, instance, schema):
for property, subschema in iteritems(properties):
if "default" in subschema:
instance.setdefault(
property, copy.deepcopy(subschema["default"]))
for err in validate_properties(
validator, properties, instance, schema):
yield err
def set_pp_defaults(validator, properties, instance, schema):
for property, subschema in iteritems(properties):
if "default" in subschema:
if isinstance(instance, dict):
for key, val in iteritems(instance):
if re.match(property, key) and val is None:
instance[key] = copy.deepcopy(subschema["default"])
for err in validate_pattern_properties(
validator, properties, instance, schema):
yield err
return jsonschema.validators.extend(validator_class, {
"properties": set_defaults,
"patternProperties": set_pp_defaults
})
class ConfigScope(object):
"""This class represents a configuration scope.
@ -697,17 +656,10 @@ def _validate(data, schema, set_defaults=True):
This leverages the line information (start_mark, end_mark) stored
on Spack YAML structures.
"""
import jsonschema
if not hasattr(_validate, 'validator'):
default_setting_validator = _extend_with_default(
jsonschema.Draft4Validator)
_validate.validator = default_setting_validator
try:
_validate.validator(schema).validate(data)
spack.schema.Validator(schema).validate(data)
except jsonschema.ValidationError as e:
raise ConfigFormatError(e, data)

View File

@ -8,7 +8,6 @@
import sys
import shutil
import jsonschema
import ruamel.yaml
import llnl.util.filesystem as fs
@ -67,9 +66,6 @@
#: legal first keys in the spack.yaml manifest file
env_schema_keys = ('spack', 'env')
#: jsonschema validator for environments
_validator = None
def valid_env_name(name):
return re.match(valid_environment_name_re, name)
@ -306,11 +302,9 @@ def all_environments():
def validate(data, filename=None):
global _validator
if _validator is None:
_validator = jsonschema.Draft4Validator(spack.schema.env.schema)
import jsonschema
try:
_validator.validate(data)
spack.schema.Validator(spack.schema.env.schema).validate(data)
except jsonschema.ValidationError as e:
raise spack.config.ConfigFormatError(
e, data, filename, e.instance.lc.line + 1)

View File

@ -4,3 +4,81 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""This module contains jsonschema files for all of Spack's YAML formats."""
import copy
import re
import six
import llnl.util.lang
import spack.spec
# jsonschema is imported lazily as it is heavy to import
# and increases the start-up time
def _make_validator():
import jsonschema
_validate_properties = jsonschema.Draft4Validator.VALIDATORS["properties"]
_validate_pattern_properties = jsonschema.Draft4Validator.VALIDATORS[
"patternProperties"
]
def _set_defaults(validator, properties, instance, schema):
"""Adds support for the 'default' attribute in 'properties'.
``jsonschema`` does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
for property, subschema in six.iteritems(properties):
if "default" in subschema:
instance.setdefault(
property, copy.deepcopy(subschema["default"]))
for err in _validate_properties(
validator, properties, instance, schema):
yield err
def _set_pp_defaults(validator, properties, instance, schema):
"""Adds support for the 'default' attribute in 'patternProperties'.
``jsonschema`` does not handle this out of the box -- it only
validates. This allows us to set default values for configs
where certain fields are `None` b/c they're deleted or
commented out.
"""
for property, subschema in six.iteritems(properties):
if "default" in subschema:
if isinstance(instance, dict):
for key, val in six.iteritems(instance):
if re.match(property, key) and val is None:
instance[key] = copy.deepcopy(subschema["default"])
for err in _validate_pattern_properties(
validator, properties, instance, schema):
yield err
def _validate_spec(validator, is_spec, instance, schema):
"""Check if the attributes on instance are valid specs."""
import jsonschema
if not validator.is_type(instance, "object"):
return
for spec_str in instance:
try:
spack.spec.parse(spec_str)
except spack.spec.SpecParseError as e:
yield jsonschema.ValidationError(
'"{0}" is an invalid spec [{1}]'.format(spec_str, str(e))
)
return jsonschema.validators.extend(
jsonschema.Draft4Validator, {
"validate_spec": _validate_spec,
"properties": _set_defaults,
"patternProperties": _set_pp_defaults
}
)
Validator = llnl.util.lang.Singleton(_make_validator)

View File

@ -11,13 +11,11 @@
from llnl.util.lang import union_dicts
import spack.schema.merged
import spack.schema.modules
schema = {
'$schema': 'http://json-schema.org/schema#',
'title': 'Spack environment file schema',
'definitions': spack.schema.modules.definitions,
'type': 'object',
'additionalProperties': False,
'patternProperties': {

View File

@ -6,7 +6,7 @@
"""Schema for configuration merged into one file.
.. literalinclude:: ../spack/schema/merged.py
:lines: 40-
:lines: 39-
"""
from llnl.util.lang import union_dicts
@ -33,7 +33,6 @@
schema = {
'$schema': 'http://json-schema.org/schema#',
'title': 'Spack merged configuration file schema',
'definitions': spack.schema.modules.definitions,
'type': 'object',
'additionalProperties': False,
'properties': properties,

View File

@ -9,110 +9,110 @@
:lines: 13-
"""
#: Matches a spec or a multi-valued variant but not another
#: valid keyword.
#:
#: THIS NEEDS TO BE UPDATED FOR EVERY NEW KEYWORD THAT
#: IS ADDED IMMEDIATELY BELOW THE MODULE TYPE ATTRIBUTE
spec_regex = r'(?!hierarchy|verbose|hash_length|whitelist|' \
r'blacklist|naming_scheme|core_compilers|all)(^\w[\w-]*)'
#: Matches an anonymous spec, i.e. a spec without a root name
anonymous_spec_regex = r'^[\^@%+~]'
#: Definitions for parts of module schema
definitions = {
'array_of_strings': {
'type': 'array',
'default': [],
'items': {
'type': 'string'
}
},
'dictionary_of_strings': {
'type': 'object',
'patternProperties': {
r'\w[\w-]*': { # key
'type': 'string'
}
}
},
'dependency_selection': {
'type': 'string',
'enum': ['none', 'direct', 'all']
},
'module_file_configuration': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'filter': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'environment_blacklist': {
'type': 'array',
'default': [],
'items': {
'type': 'string'
}
array_of_strings = {
'type': 'array', 'default': [], 'items': {'type': 'string'}
}
dictionary_of_strings = {
'type': 'object', 'patternProperties': {r'\w[\w-]*': {'type': 'string'}}
}
dependency_selection = {'type': 'string', 'enum': ['none', 'direct', 'all']}
module_file_configuration = {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'filter': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'environment_blacklist': {
'type': 'array',
'default': [],
'items': {
'type': 'string'
}
}
},
'template': {
'type': 'string'
},
'autoload': {
'$ref': '#/definitions/dependency_selection'},
'prerequisites': {
'$ref': '#/definitions/dependency_selection'},
'conflict': {
'$ref': '#/definitions/array_of_strings'},
'load': {
'$ref': '#/definitions/array_of_strings'},
'suffixes': {
'$ref': '#/definitions/dictionary_of_strings'},
'environment': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'set': {
'$ref': '#/definitions/dictionary_of_strings'},
'unset': {
'$ref': '#/definitions/array_of_strings'},
'prepend_path': {
'$ref': '#/definitions/dictionary_of_strings'},
'append_path': {
'$ref': '#/definitions/dictionary_of_strings'}
}
},
'template': {
'type': 'string'
},
'autoload': dependency_selection,
'prerequisites': dependency_selection,
'conflict': array_of_strings,
'load': array_of_strings,
'suffixes': {
'type': 'object',
'validate_spec': True,
'patternProperties': {
r'\w[\w-]*': { # key
'type': 'string'
}
}
},
'environment': {
'type': 'object',
'default': {},
'additionalProperties': False,
'properties': {
'set': dictionary_of_strings,
'unset': array_of_strings,
'prepend_path': dictionary_of_strings,
'append_path': dictionary_of_strings
}
}
},
'module_type_configuration': {
'type': 'object',
'default': {},
'anyOf': [
{'properties': {
'verbose': {
'type': 'boolean',
'default': False
},
'hash_length': {
'type': 'integer',
'minimum': 0,
'default': 7
},
'whitelist': {
'$ref': '#/definitions/array_of_strings'},
'blacklist': {
'$ref': '#/definitions/array_of_strings'},
'blacklist_implicits': {
'type': 'boolean',
'default': False
},
'naming_scheme': {
'type': 'string' # Can we be more specific here?
}
}},
{'patternProperties': {
r'\w[\w-]*': {
'$ref': '#/definitions/module_file_configuration'
}
}}
]
}
},
module_type_configuration = {
'type': 'object',
'default': {},
'allOf': [
{'properties': {
'verbose': {
'type': 'boolean',
'default': False
},
'hash_length': {
'type': 'integer',
'minimum': 0,
'default': 7
},
'whitelist': array_of_strings,
'blacklist': array_of_strings,
'blacklist_implicits': {
'type': 'boolean',
'default': False
},
'naming_scheme': {
'type': 'string' # Can we be more specific here?
},
'all': module_file_configuration,
}
},
{'validate_spec': True,
'patternProperties': {
spec_regex: module_file_configuration,
anonymous_spec_regex: module_file_configuration,
}
}
]
}
@ -127,8 +127,9 @@
'type': 'object',
'patternProperties': {
# prefix-relative path to be inspected for existence
r'\w[\w-]*': {
'$ref': '#/definitions/array_of_strings'}}},
r'\w[\w-]*': array_of_strings
}
},
'enable': {
'type': 'array',
'default': [],
@ -138,28 +139,27 @@
'lmod': {
'allOf': [
# Base configuration
{'$ref': '#/definitions/module_type_configuration'},
module_type_configuration,
{
'core_compilers': {
'$ref': '#/definitions/array_of_strings'
'type': 'object',
'properties': {
'core_compilers': array_of_strings,
'hierarchy': array_of_strings
},
'hierarchical_scheme': {
'$ref': '#/definitions/array_of_strings'
}
} # Specific lmod extensions
]
},
'tcl': {
'allOf': [
# Base configuration
{'$ref': '#/definitions/module_type_configuration'},
module_type_configuration,
{} # Specific tcl extensions
]
},
'dotkit': {
'allOf': [
# Base configuration
{'$ref': '#/definitions/module_type_configuration'},
module_type_configuration,
{} # Specific dotkit extensions
]
},
@ -172,7 +172,6 @@
schema = {
'$schema': 'http://json-schema.org/schema#',
'title': 'Spack module file configuration file schema',
'definitions': definitions,
'type': 'object',
'additionalProperties': False,
'properties': properties,

View File

@ -0,0 +1,77 @@
# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import jsonschema
import pytest
import spack.schema
@pytest.fixture()
def validate_spec_schema():
return {
'type': 'object',
'validate_spec': True,
'patternProperties': {
r'\w[\w-]*': {
'type': 'string'
}
}
}
@pytest.fixture()
def module_suffixes_schema():
return {
'type': 'object',
'properties': {
'tcl': {
'type': 'object',
'patternProperties': {
r'\w[\w-]*': {
'type': 'object',
'properties': {
'suffixes': {
'validate_spec': True,
'patternProperties': {
r'\w[\w-]*': {
'type': 'string',
}
}
}
}
}
}
}
}
}
@pytest.mark.regression('9857')
def test_validate_spec(validate_spec_schema):
v = spack.schema.Validator(validate_spec_schema)
data = {'foo@3.7': 'bar'}
# Validate good data (the key is a spec)
v.validate(data)
# Check that invalid data throws
data['^python@3.7@'] = 'baz'
with pytest.raises(jsonschema.ValidationError) as exc_err:
v.validate(data)
assert 'is an invalid spec' in str(exc_err.value)
@pytest.mark.regression('9857')
def test_module_suffixes(module_suffixes_schema):
v = spack.schema.Validator(module_suffixes_schema)
data = {'tcl': {'all': {'suffixes': {'^python@2.7@': 'py2.7'}}}}
with pytest.raises(jsonschema.ValidationError) as exc_err:
v.validate(data)
assert 'is an invalid spec' in str(exc_err.value)