Add custom YAML loader & dumper to track lines & maintain dict order.

- Configs are now parsed with `spack.util.spack_yaml.load/dump`

- Parser annotates returned data with `_start_mark` and `_end_mark`
  properties, so that we can recover what lines/files they came from.

- Parser uses `OrderedDict` instead of `dict`.  This will help
  maintain some sanity when round-tripping config files.
This commit is contained in:
Todd Gamblin 2015-12-27 17:51:11 -08:00
parent 4e8e298eb3
commit 39e360f93a
6 changed files with 352 additions and 44 deletions

View File

@ -1,5 +1,5 @@
##############################################################################
# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
# Copyright (c) 2013-2015, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
@ -120,8 +120,10 @@
import os
import sys
import copy
import yaml
from yaml.error import MarkedYAMLError
from ordereddict_backport import OrderedDict
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@ -129,11 +131,20 @@
import spack
from spack.error import SpackError
"""List of valid config sections."""
valid_sections = ('compilers', 'mirrors', 'repos')
# Hacked yaml for configuration files preserves line numbers.
import spack.util.spack_yaml as syaml
def check_section(section):
"""Dict from section names -> function to check section YAML format."""
valid_sections = ['compilers', 'mirrors', 'repos']
"""OrderedDict of config scopes keyed by name.
Later scopes will override earlier scopes.
"""
config_scopes = OrderedDict()
def validate_section(section):
"""Raise a ValueError if the section is not a valid section."""
if section not in valid_sections:
raise ValueError("Invalid config section: '%s'. Options are %s."
@ -146,14 +157,19 @@ class ConfigScope(object):
A scope is one directory containing named configuration files.
Each file is a config "section" (e.g., mirrors, compilers, etc).
"""
def __init__(self, name, path):
self.name = name # scope name.
self.path = path # path to directory containing configs.
self.sections = {} # sections read from config files.
# Register in a dict of all ConfigScopes
# TODO: make this cleaner. Mocking up for testing is brittle.
global config_scopes
config_scopes[name] = self
def get_section_filename(self, section):
check_section(section)
validate_section(section)
return os.path.join(self.path, "%s.yaml" % section)
@ -161,7 +177,10 @@ def get_section(self, section):
if not section in self.sections:
path = self.get_section_filename(section)
data = _read_config_file(path)
self.sections[section] = {} if data is None else data
if data is None:
self.sections[section] = {}
else:
self.sections[section] = data
return self.sections[section]
@ -171,7 +190,7 @@ def write_section(self, section):
try:
mkdirp(self.path)
with open(filename, 'w') as f:
yaml.dump(data, stream=f, default_flow_style=False)
syaml.dump(data, stream=f, default_flow_style=False)
except (yaml.YAMLError, IOError) as e:
raise ConfigFileError("Error writing to config file: '%s'" % str(e))
@ -181,18 +200,11 @@ def clear(self):
self.sections = {}
"""List of config scopes by name.
Later scopes in the list will override earlier scopes.
"""
config_scopes = [
ConfigScope('site', os.path.join(spack.etc_path, 'spack')),
ConfigScope('user', os.path.expanduser('~/.spack'))]
"""List of valid scopes, for convenience."""
valid_scopes = (s.name for s in config_scopes)
ConfigScope('site', os.path.join(spack.etc_path, 'spack')),
ConfigScope('user', os.path.expanduser('~/.spack'))
def check_scope(scope):
def validate_scope(scope):
"""Ensure that scope is valid, and return a valid scope if it is None.
This should be used by routines in ``config.py`` to validate
@ -202,16 +214,14 @@ def check_scope(scope):
"""
if scope is None:
# default to the scope with highest precedence.
return config_scopes[-1]
elif scope not in valid_scopes:
return config_scopes.values()[-1]
elif scope in config_scopes:
return config_scopes[scope]
else:
raise ValueError("Invalid config scope: '%s'. Must be one of %s."
% (scope, valid_scopes))
return scope
def get_scope(scope):
scope = check_scope(scope)
return next(s for s in config_scopes if s.name == scope)
% (scope, config_scopes.keys()))
def _read_config_file(filename):
@ -229,7 +239,7 @@ def _read_config_file(filename):
try:
with open(filename) as f:
return yaml.load(f)
return syaml.load(f)
except MarkedYAMLError, e:
raise ConfigFileError(
@ -243,7 +253,7 @@ def _read_config_file(filename):
def clear_config_caches():
"""Clears the caches for configuration files, which will cause them
to be re-read upon the next request"""
for scope in config_scopes:
for scope in config_scopes.values():
scope.clear()
@ -306,10 +316,10 @@ def get_config(section):
Strips off the top-level section name from the YAML dict.
"""
check_section(section)
validate_section(section)
merged_section = {}
for scope in config_scopes:
for scope in config_scopes.values():
# read potentially cached data from the scope.
data = scope.get_section(section)
if not data or not section in data:
@ -345,7 +355,7 @@ def expand_repo_path(path):
def get_config_filename(scope, section):
"""For some scope and section, get the name of the configuration file"""
scope = get_scope(scope)
scope = validate_scope(scope)
return scope.get_section_filename(section)
@ -363,8 +373,8 @@ def update_config(section, update_data, scope=None):
# read in the config to ensure we've got current data
get_config(section)
check_section(section) # validate section name
scope = get_scope(scope) # get ConfigScope object from string.
validate_section(section) # validate section name
scope = validate_scope(scope) # get ConfigScope object from string.
# read only the requested section's data.
data = scope.get_section(section)
@ -382,7 +392,7 @@ def remove_from_config(section, key_to_rm, scope=None):
get_config(section)
# check args and get the objects we need.
scope = get_scope(scope)
scope = validate_scope(scope)
data = scope.get_section(section)
filename = scope.get_section_filename(section)
@ -411,3 +421,4 @@ def print_section(section):
class ConfigError(SpackError): pass
class ConfigFileError(ConfigError): pass
class ConfigFormatError(ConfigError): pass

View File

@ -60,7 +60,8 @@
'unit_install',
'lock',
'database',
'namespace_trie']
'namespace_trie',
'yaml']
def list_tests():

View File

@ -26,6 +26,7 @@
import shutil
import os
from tempfile import mkdtemp
from ordereddict_backport import OrderedDict
import spack
import spack.config
from spack.test.mock_packages_test import *
@ -72,10 +73,9 @@ class ConfigTest(MockPackagesTest):
def setUp(self):
super(ConfigTest, self).setUp()
self.tmp_dir = mkdtemp('.tmp', 'spack-config-test-')
spack.config.config_scopes = [
spack.config.ConfigScope('test_low_priority', os.path.join(self.tmp_dir, 'low')),
spack.config.ConfigScope('test_high_priority', os.path.join(self.tmp_dir, 'high'))]
spack.config.valid_scopes = ('test_low_priority', 'test_high_priority')
spack.config.config_scopes = OrderedDict()
spack.config.ConfigScope('test_low_priority', os.path.join(self.tmp_dir, 'low'))
spack.config.ConfigScope('test_high_priority', os.path.join(self.tmp_dir, 'high'))
def tearDown(self):
super(ConfigTest, self).tearDown()

View File

@ -24,6 +24,7 @@
##############################################################################
import sys
import unittest
from ordereddict_backport import OrderedDict
import spack
import spack.config
@ -41,10 +42,12 @@ def initmock(self):
spack.config.clear_config_caches()
self.real_scopes = spack.config.config_scopes
self.real_valid_scopes = spack.config.valid_scopes
spack.config.config_scopes = [
spack.config.ConfigScope('site', spack.mock_site_config),
spack.config.ConfigScope('user', spack.mock_user_config)]
# TODO: Mocking this up is kind of brittle b/c ConfigScope
# TODO: constructor modifies config_scopes. Make it cleaner.
spack.config.config_scopes = OrderedDict()
spack.config.ConfigScope('site', spack.mock_site_config)
spack.config.ConfigScope('user', spack.mock_user_config)
# Store changes to the package's dependencies so we can
# restore later.
@ -72,7 +75,6 @@ def cleanmock(self):
"""Restore the real packages path after any test."""
spack.repo.swap(self.db)
spack.config.config_scopes = self.real_scopes
spack.config.valid_scopes = self.real_valid_scopes
spack.config.clear_config_caches()
# Restore dependency changes that happened during the test

View File

@ -0,0 +1,93 @@
##############################################################################
# Copyright (c) 2013-2015, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
# LLNL-CODE-647188
#
# For details, see https://github.com/llnl/spack
# Please also see the LICENSE file for our notice and the LGPL.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (as published by
# the Free Software Foundation) version 2.1 dated February 1999.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
# conditions of the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
"""
Test Spack's custom YAML format.
"""
import unittest
import spack.util.spack_yaml as syaml
test_file = """\
config_file:
x86_64:
foo: /path/to/foo
bar: /path/to/bar
baz: /path/to/baz
some_list:
- item 1
- item 2
- item 3
another_list:
[ 1, 2, 3 ]
some_key: some_string
"""
test_data = {
'config_file' : syaml.syaml_dict([
('x86_64', syaml.syaml_dict([
('foo', '/path/to/foo'),
('bar', '/path/to/bar'),
('baz', '/path/to/baz' )])),
('some_list', [ 'item 1', 'item 2', 'item 3' ]),
('another_list', [ 1, 2, 3 ]),
('some_key', 'some_string')
])}
class YamlTest(unittest.TestCase):
def setUp(self):
self.data = syaml.load(test_file)
def test_parse(self):
self.assertEqual(test_data, self.data)
def test_dict_order(self):
self.assertEqual(
['x86_64', 'some_list', 'another_list', 'some_key'],
self.data['config_file'].keys())
self.assertEqual(
['foo', 'bar', 'baz'],
self.data['config_file']['x86_64'].keys())
def test_line_numbers(self):
def check(obj, start_line, end_line):
self.assertEqual(obj._start_mark.line, start_line)
self.assertEqual(obj._end_mark.line, end_line)
check(self.data, 0, 12)
check(self.data['config_file'], 1, 12)
check(self.data['config_file']['x86_64'], 2, 5)
check(self.data['config_file']['x86_64']['foo'], 2, 2)
check(self.data['config_file']['x86_64']['bar'], 3, 3)
check(self.data['config_file']['x86_64']['baz'], 4, 4)
check(self.data['config_file']['some_list'], 6, 9)
check(self.data['config_file']['some_list'][0], 6, 6)
check(self.data['config_file']['some_list'][1], 7, 7)
check(self.data['config_file']['some_list'][2], 8, 8)
check(self.data['config_file']['another_list'], 10, 10)
check(self.data['config_file']['some_key'], 11, 11)

View File

@ -0,0 +1,201 @@
##############################################################################
# Copyright (c) 2013-2015, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
# LLNL-CODE-647188
#
# For details, see https://github.com/llnl/spack
# Please also see the LICENSE file for our notice and the LGPL.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (as published by
# the Free Software Foundation) version 2.1 dated February 1999.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
# conditions of the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
"""Enhanced YAML parsing for Spack.
- ``load()`` preserves YAML Marks on returned objects -- this allows
us to access file and line information later.
- ``Our load methods use ``OrderedDict`` class instead of YAML's
default unorderd dict.
"""
import yaml
from yaml.nodes import *
from yaml.constructor import ConstructorError
from yaml.representer import SafeRepresenter
from ordereddict_backport import OrderedDict
# Only export load and dump
__all__ = ['load', 'dump']
# Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict.
class syaml_dict(OrderedDict):
def __repr__(self):
mappings = ('%r: %r' % (k,v) for k,v in self.items())
return '{%s}' % ', '.join(mappings)
class syaml_list(list):
__repr__ = list.__repr__
class syaml_str(str):
__repr__ = str.__repr__
def mark(obj, node):
"""Add start and end markers to an object."""
obj._start_mark = node.start_mark
obj._end_mark = node.end_mark
class OrderedLineLoader(yaml.Loader):
"""YAML loader that preserves order and line numbers.
Mappings read in by this loader behave like an ordered dict.
Sequences, mappings, and strings also have new attributes,
``_start_mark`` and ``_end_mark``, that preserve YAML line
information in the output data.
"""
#
# Override construct_yaml_* so that they build our derived types,
# which allows us to add new attributes to them.
#
# The standard YAML constructors return empty instances and fill
# in with mappings later. We preserve this behavior.
#
def construct_yaml_str(self, node):
value = self.construct_scalar(node)
try:
value = value.encode('ascii')
except UnicodeEncodeError:
pass
value = syaml_str(value)
mark(value, node)
return value
def construct_yaml_seq(self, node):
data = syaml_list()
mark(data, node)
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = syaml_dict()
mark(data, node)
yield data
value = self.construct_mapping(node)
data.update(value)
#
# Override the ``construct_*`` routines. These fill in empty
# objects after yielded by the above ``construct_yaml_*`` methods.
#
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
value = syaml_list(self.construct_object(child, deep=deep)
for child in node.value)
mark(value, node)
return value
def construct_mapping(self, node, deep=False):
"""Store mappings as OrderedDicts instead of as regular python
dictionaries to preserve file ordering."""
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = syaml_dict()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
if key in mapping:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found already in-use key (%s)" % key, key_node.start_mark)
mapping[key] = value
mark(mapping, node)
return mapping
# register above new constructors
OrderedLineLoader.add_constructor(u'tag:yaml.org,2002:map', OrderedLineLoader.construct_yaml_map)
OrderedLineLoader.add_constructor(u'tag:yaml.org,2002:seq', OrderedLineLoader.construct_yaml_seq)
OrderedLineLoader.add_constructor(u'tag:yaml.org,2002:str', OrderedLineLoader.construct_yaml_str)
class OrderedLineDumper(yaml.Dumper):
"""Dumper that preserves ordering and formats ``syaml_*`` objects.
This dumper preserves insertion ordering ``syaml_dict`` objects
when they're written out. It also has some custom formatters
for ``syaml_*`` objects so that they are formatted like their
regular Python equivalents, instead of ugly YAML pyobjects.
"""
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
# if it's a syaml_dict, preserve OrderedDict order.
# Otherwise do the default thing.
sort = not isinstance(mapping, syaml_dict)
mapping = mapping.items()
if sort:
mapping.sort()
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
# Make our special objects look like normal YAML ones.
OrderedLineDumper.add_representer(syaml_dict, OrderedLineDumper.represent_dict)
OrderedLineDumper.add_representer(syaml_list, OrderedLineDumper.represent_list)
OrderedLineDumper.add_representer(syaml_str, OrderedLineDumper.represent_str)
def load(*args, **kwargs):
"""Load but modify the loader instance so that it will add __line__
atrributes to the returned object."""
kwargs['Loader'] = OrderedLineLoader
return yaml.load(*args, **kwargs)
def dump(*args, **kwargs):
kwargs['Dumper'] = OrderedLineDumper
return yaml.dump(*args, **kwargs)