Merge pull request #1698 from LLNL/bugfix/hash-collision

Fix hash collisions, add stable hashing
This commit is contained in:
Todd Gamblin 2016-09-02 07:29:44 -07:00 committed by GitHub
commit 417fe0ec67
6 changed files with 109 additions and 55 deletions

View File

@ -87,6 +87,7 @@
from spack.util.naming import mod_to_class
from spack.util.environment import get_path
from spack.util.multiproc import parmap
from spack.util.spack_yaml import syaml_dict
import spack.error as serr
@ -407,12 +408,13 @@ def _cmp_key(self):
return (platform, platform_os, target)
def to_dict(self):
d = {}
d['platform'] = str(self.platform) if self.platform else None
d['platform_os'] = str(self.platform_os) if self.platform_os else None
d['target'] = str(self.target) if self.target else None
return d
return syaml_dict((
('platform',
str(self.platform) if self.platform else None),
('platform_os',
str(self.platform_os) if self.platform_os else None),
('target',
str(self.target) if self.target else None)))
def _target_from_dict(target_name, plat=None):

View File

@ -42,7 +42,6 @@
import os
import socket
import yaml
from yaml.error import MarkedYAMLError, YAMLError
import llnl.util.tty as tty
@ -54,7 +53,7 @@
from spack.spec import Spec
from spack.error import SpackError
from spack.repository import UnknownPackageError
import spack.util.spack_yaml as syaml
# DB goes in this directory underneath the root
_db_dirname = '.spack-db'
@ -197,7 +196,8 @@ def _write_to_yaml(self, stream):
}
try:
return yaml.dump(database, stream=stream, default_flow_style=False)
return syaml.dump(
database, stream=stream, default_flow_style=False)
except YAMLError as e:
raise SpackYAMLError("error writing YAML database:", str(e))
@ -247,9 +247,9 @@ def _read_from_yaml(self, stream):
try:
if isinstance(stream, basestring):
with open(stream, 'r') as f:
yfile = yaml.load(f)
yfile = syaml.load(f)
else:
yfile = yaml.load(stream)
yfile = syaml.load(stream)
except MarkedYAMLError as e:
raise SpackYAMLError("error parsing YAML database:", str(e))

View File

@ -268,6 +268,13 @@ def check_installed(self, spec):
if installed_spec == spec:
return path
# DAG hashes currently do not include build dependencies.
#
# TODO: remove this when we do better concretization and don't
# ignore build-only deps in hashes.
elif installed_spec == spec.copy(deps=('link', 'run')):
return path
if spec.dag_hash() == installed_spec.dag_hash():
raise SpecHashCollisionError(spec, installed_spec)
else:

View File

@ -102,7 +102,6 @@
from StringIO import StringIO
from operator import attrgetter
import yaml
from yaml.error import MarkedYAMLError
import llnl.util.tty as tty
@ -119,6 +118,8 @@
from spack.util.naming import mod_to_class
from spack.util.prefix import Prefix
from spack.util.string import *
import spack.util.spack_yaml as syaml
from spack.util.spack_yaml import syaml_dict
from spack.version import *
from spack.provider_index import ProviderIndex
@ -266,9 +267,10 @@ def _cmp_key(self):
return (self.name, self.versions)
def to_dict(self):
d = {'name': self.name}
d = syaml_dict([('name', self.name)])
d.update(self.versions.to_dict())
return {'compiler': d}
return syaml_dict([('compiler', d)])
@staticmethod
def from_dict(d):
@ -902,7 +904,7 @@ def dag_hash(self, length=None):
return self._hash[:length]
else:
# XXX(deptype): ignore 'build' dependencies here
yaml_text = yaml.dump(
yaml_text = syaml.dump(
self.to_node_dict(), default_flow_style=True, width=sys.maxint)
sha = hashlib.sha1(yaml_text)
b32_hash = base64.b32encode(sha.digest()).lower()[:length]
@ -911,38 +913,37 @@ def dag_hash(self, length=None):
return b32_hash
def to_node_dict(self):
d = {}
params = dict((name, v.value) for name, v in self.variants.items())
params.update(dict((name, value)
for name, value in self.compiler_flags.items()))
if params:
d['parameters'] = params
if self.dependencies():
deps = self.dependencies_dict(deptype=('link', 'run'))
d['dependencies'] = dict(
(name, {
'hash': dspec.spec.dag_hash(),
'type': [str(s) for s in dspec.deptypes]})
for name, dspec in deps.items())
if self.namespace:
d['namespace'] = self.namespace
if self.architecture:
# TODO: Fix the target.to_dict to account for the tuple
# Want it to be a dict of dicts
d['arch'] = self.architecture.to_dict()
if self.compiler:
d.update(self.compiler.to_dict())
d = syaml_dict()
if self.versions:
d.update(self.versions.to_dict())
return {self.name: d}
if self.compiler:
d.update(self.compiler.to_dict())
if self.namespace:
d['namespace'] = self.namespace
params = syaml_dict(sorted(
(name, v.value) for name, v in self.variants.items()))
params.update(sorted(self.compiler_flags.items()))
if params:
d['parameters'] = params
if self.architecture:
d['arch'] = self.architecture.to_dict()
deps = self.dependencies_dict(deptype=('link', 'run'))
if deps:
d['dependencies'] = syaml_dict([
(name,
syaml_dict([
('hash', dspec.spec.dag_hash()),
('type', sorted(str(s) for s in dspec.deptypes))])
) for name, dspec in sorted(deps.items())
])
return syaml_dict([(self.name, d)])
def to_yaml(self, stream=None):
node_list = []
@ -950,8 +951,9 @@ def to_yaml(self, stream=None):
node = s.to_node_dict()
node[s.name]['hash'] = s.dag_hash()
node_list.append(node)
return yaml.dump({'spec': node_list},
stream=stream, default_flow_style=False)
return syaml.dump(
syaml_dict([('spec', node_list)]),
stream=stream, default_flow_style=False)
@staticmethod
def from_node_dict(node):
@ -1025,7 +1027,7 @@ def from_yaml(stream):
"""
try:
yfile = yaml.load(stream)
yfile = syaml.load(stream)
except MarkedYAMLError as e:
raise SpackYAMLError("error parsing YAML spec:", str(e))
@ -1911,8 +1913,9 @@ def _dup(self, other, deps=True, cleardeps=True):
self.external = other.external
self.external_module = other.external_module
self.namespace = other.namespace
self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache
self.external = other.external
self.external_module = other.external_module
# If we copy dependencies, preserve DAG structure in the new spec
if deps:
@ -1940,11 +1943,20 @@ def _dup(self, other, deps=True, cleardeps=True):
new_spec._add_dependency(
new_nodes[depspec.spec.name], depspec.deptypes)
# Since we preserved structure, we can copy _normal safely.
self._normal = other._normal
self._concrete = other._concrete
self.external = other.external
self.external_module = other.external_module
# These fields are all cached results of expensive operations.
# If we preserved the original structure, we can copy them
# safely. If not, they need to be recomputed.
if deps is True or deps == alldeps:
self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache
self._normal = other._normal
self._concrete = other._concrete
else:
self._hash = None
self._cmp_key_cache = None
self._normal = False
self._concrete = False
return changed
def copy(self, deps=True):

View File

@ -495,3 +495,31 @@ def test_deptype_traversal_run(self):
traversal = dag.traverse(deptype='run')
self.assertEqual([x.name for x in traversal], names)
def test_using_ordered_dict(self):
""" Checks that dicts are ordered
Necessary to make sure that dag_hash is stable across python
versions and processes.
"""
def descend_and_check(iterable, level=0):
from spack.util.spack_yaml import syaml_dict
from collections import Iterable, Mapping
if isinstance(iterable, Mapping):
self.assertTrue(isinstance(iterable, syaml_dict))
return descend_and_check(iterable.values(), level=level + 1)
max_level = level
for value in iterable:
if isinstance(value, Iterable) and not isinstance(value, str):
nlevel = descend_and_check(value, level=level + 1)
if nlevel > max_level:
max_level = nlevel
return max_level
specs = ['mpileaks ^zmpi', 'dttop', 'dtuse']
for spec in specs:
dag = Spec(spec)
dag.normalize()
level = descend_and_check(dag.to_node_dict())
# level just makes sure we are doing something here
self.assertTrue(level >= 5)

View File

@ -49,6 +49,7 @@
from functools import wraps
from functools_backport import total_ordering
from spack.util.spack_yaml import syaml_dict
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
@ -593,9 +594,13 @@ def overlaps(self, other):
def to_dict(self):
"""Generate human-readable dict for YAML."""
if self.concrete:
return {'version': str(self[0])}
return syaml_dict([
('version', str(self[0]))
])
else:
return {'versions': [str(v) for v in self]}
return syaml_dict([
('versions', [str(v) for v in self])
])
@staticmethod
def from_dict(dictionary):