Merge pull request #1698 from LLNL/bugfix/hash-collision

Fix hash collisions, add stable hashing
This commit is contained in:
Todd Gamblin 2016-09-02 07:29:44 -07:00 committed by GitHub
commit 417fe0ec67
6 changed files with 109 additions and 55 deletions

View File

@ -87,6 +87,7 @@
from spack.util.naming import mod_to_class from spack.util.naming import mod_to_class
from spack.util.environment import get_path from spack.util.environment import get_path
from spack.util.multiproc import parmap from spack.util.multiproc import parmap
from spack.util.spack_yaml import syaml_dict
import spack.error as serr import spack.error as serr
@ -407,12 +408,13 @@ def _cmp_key(self):
return (platform, platform_os, target) return (platform, platform_os, target)
def to_dict(self): def to_dict(self):
d = {} return syaml_dict((
d['platform'] = str(self.platform) if self.platform else None ('platform',
d['platform_os'] = str(self.platform_os) if self.platform_os else None str(self.platform) if self.platform else None),
d['target'] = str(self.target) if self.target else None ('platform_os',
str(self.platform_os) if self.platform_os else None),
return d ('target',
str(self.target) if self.target else None)))
def _target_from_dict(target_name, plat=None): def _target_from_dict(target_name, plat=None):

View File

@ -42,7 +42,6 @@
import os import os
import socket import socket
import yaml
from yaml.error import MarkedYAMLError, YAMLError from yaml.error import MarkedYAMLError, YAMLError
import llnl.util.tty as tty import llnl.util.tty as tty
@ -54,7 +53,7 @@
from spack.spec import Spec from spack.spec import Spec
from spack.error import SpackError from spack.error import SpackError
from spack.repository import UnknownPackageError from spack.repository import UnknownPackageError
import spack.util.spack_yaml as syaml
# DB goes in this directory underneath the root # DB goes in this directory underneath the root
_db_dirname = '.spack-db' _db_dirname = '.spack-db'
@ -197,7 +196,8 @@ def _write_to_yaml(self, stream):
} }
try: try:
return yaml.dump(database, stream=stream, default_flow_style=False) return syaml.dump(
database, stream=stream, default_flow_style=False)
except YAMLError as e: except YAMLError as e:
raise SpackYAMLError("error writing YAML database:", str(e)) raise SpackYAMLError("error writing YAML database:", str(e))
@ -247,9 +247,9 @@ def _read_from_yaml(self, stream):
try: try:
if isinstance(stream, basestring): if isinstance(stream, basestring):
with open(stream, 'r') as f: with open(stream, 'r') as f:
yfile = yaml.load(f) yfile = syaml.load(f)
else: else:
yfile = yaml.load(stream) yfile = syaml.load(stream)
except MarkedYAMLError as e: except MarkedYAMLError as e:
raise SpackYAMLError("error parsing YAML database:", str(e)) raise SpackYAMLError("error parsing YAML database:", str(e))

View File

@ -268,6 +268,13 @@ def check_installed(self, spec):
if installed_spec == spec: if installed_spec == spec:
return path return path
# DAG hashes currently do not include build dependencies.
#
# TODO: remove this when we do better concretization and don't
# ignore build-only deps in hashes.
elif installed_spec == spec.copy(deps=('link', 'run')):
return path
if spec.dag_hash() == installed_spec.dag_hash(): if spec.dag_hash() == installed_spec.dag_hash():
raise SpecHashCollisionError(spec, installed_spec) raise SpecHashCollisionError(spec, installed_spec)
else: else:

View File

@ -102,7 +102,6 @@
from StringIO import StringIO from StringIO import StringIO
from operator import attrgetter from operator import attrgetter
import yaml
from yaml.error import MarkedYAMLError from yaml.error import MarkedYAMLError
import llnl.util.tty as tty import llnl.util.tty as tty
@ -119,6 +118,8 @@
from spack.util.naming import mod_to_class from spack.util.naming import mod_to_class
from spack.util.prefix import Prefix from spack.util.prefix import Prefix
from spack.util.string import * from spack.util.string import *
import spack.util.spack_yaml as syaml
from spack.util.spack_yaml import syaml_dict
from spack.version import * from spack.version import *
from spack.provider_index import ProviderIndex from spack.provider_index import ProviderIndex
@ -266,9 +267,10 @@ def _cmp_key(self):
return (self.name, self.versions) return (self.name, self.versions)
def to_dict(self): def to_dict(self):
d = {'name': self.name} d = syaml_dict([('name', self.name)])
d.update(self.versions.to_dict()) d.update(self.versions.to_dict())
return {'compiler': d}
return syaml_dict([('compiler', d)])
@staticmethod @staticmethod
def from_dict(d): def from_dict(d):
@ -902,7 +904,7 @@ def dag_hash(self, length=None):
return self._hash[:length] return self._hash[:length]
else: else:
# XXX(deptype): ignore 'build' dependencies here # XXX(deptype): ignore 'build' dependencies here
yaml_text = yaml.dump( yaml_text = syaml.dump(
self.to_node_dict(), default_flow_style=True, width=sys.maxint) self.to_node_dict(), default_flow_style=True, width=sys.maxint)
sha = hashlib.sha1(yaml_text) sha = hashlib.sha1(yaml_text)
b32_hash = base64.b32encode(sha.digest()).lower()[:length] b32_hash = base64.b32encode(sha.digest()).lower()[:length]
@ -911,38 +913,37 @@ def dag_hash(self, length=None):
return b32_hash return b32_hash
def to_node_dict(self): def to_node_dict(self):
d = {} d = syaml_dict()
params = dict((name, v.value) for name, v in self.variants.items())
params.update(dict((name, value)
for name, value in self.compiler_flags.items()))
if params:
d['parameters'] = params
if self.dependencies():
deps = self.dependencies_dict(deptype=('link', 'run'))
d['dependencies'] = dict(
(name, {
'hash': dspec.spec.dag_hash(),
'type': [str(s) for s in dspec.deptypes]})
for name, dspec in deps.items())
if self.namespace:
d['namespace'] = self.namespace
if self.architecture:
# TODO: Fix the target.to_dict to account for the tuple
# Want it to be a dict of dicts
d['arch'] = self.architecture.to_dict()
if self.compiler:
d.update(self.compiler.to_dict())
if self.versions: if self.versions:
d.update(self.versions.to_dict()) d.update(self.versions.to_dict())
return {self.name: d} if self.compiler:
d.update(self.compiler.to_dict())
if self.namespace:
d['namespace'] = self.namespace
params = syaml_dict(sorted(
(name, v.value) for name, v in self.variants.items()))
params.update(sorted(self.compiler_flags.items()))
if params:
d['parameters'] = params
if self.architecture:
d['arch'] = self.architecture.to_dict()
deps = self.dependencies_dict(deptype=('link', 'run'))
if deps:
d['dependencies'] = syaml_dict([
(name,
syaml_dict([
('hash', dspec.spec.dag_hash()),
('type', sorted(str(s) for s in dspec.deptypes))])
) for name, dspec in sorted(deps.items())
])
return syaml_dict([(self.name, d)])
def to_yaml(self, stream=None): def to_yaml(self, stream=None):
node_list = [] node_list = []
@ -950,8 +951,9 @@ def to_yaml(self, stream=None):
node = s.to_node_dict() node = s.to_node_dict()
node[s.name]['hash'] = s.dag_hash() node[s.name]['hash'] = s.dag_hash()
node_list.append(node) node_list.append(node)
return yaml.dump({'spec': node_list}, return syaml.dump(
stream=stream, default_flow_style=False) syaml_dict([('spec', node_list)]),
stream=stream, default_flow_style=False)
@staticmethod @staticmethod
def from_node_dict(node): def from_node_dict(node):
@ -1025,7 +1027,7 @@ def from_yaml(stream):
""" """
try: try:
yfile = yaml.load(stream) yfile = syaml.load(stream)
except MarkedYAMLError as e: except MarkedYAMLError as e:
raise SpackYAMLError("error parsing YAML spec:", str(e)) raise SpackYAMLError("error parsing YAML spec:", str(e))
@ -1911,8 +1913,9 @@ def _dup(self, other, deps=True, cleardeps=True):
self.external = other.external self.external = other.external
self.external_module = other.external_module self.external_module = other.external_module
self.namespace = other.namespace self.namespace = other.namespace
self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache self.external = other.external
self.external_module = other.external_module
# If we copy dependencies, preserve DAG structure in the new spec # If we copy dependencies, preserve DAG structure in the new spec
if deps: if deps:
@ -1940,11 +1943,20 @@ def _dup(self, other, deps=True, cleardeps=True):
new_spec._add_dependency( new_spec._add_dependency(
new_nodes[depspec.spec.name], depspec.deptypes) new_nodes[depspec.spec.name], depspec.deptypes)
# Since we preserved structure, we can copy _normal safely. # These fields are all cached results of expensive operations.
self._normal = other._normal # If we preserved the original structure, we can copy them
self._concrete = other._concrete # safely. If not, they need to be recomputed.
self.external = other.external if deps is True or deps == alldeps:
self.external_module = other.external_module self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache
self._normal = other._normal
self._concrete = other._concrete
else:
self._hash = None
self._cmp_key_cache = None
self._normal = False
self._concrete = False
return changed return changed
def copy(self, deps=True): def copy(self, deps=True):

View File

@ -495,3 +495,31 @@ def test_deptype_traversal_run(self):
traversal = dag.traverse(deptype='run') traversal = dag.traverse(deptype='run')
self.assertEqual([x.name for x in traversal], names) self.assertEqual([x.name for x in traversal], names)
def test_using_ordered_dict(self):
""" Checks that dicts are ordered
Necessary to make sure that dag_hash is stable across python
versions and processes.
"""
def descend_and_check(iterable, level=0):
from spack.util.spack_yaml import syaml_dict
from collections import Iterable, Mapping
if isinstance(iterable, Mapping):
self.assertTrue(isinstance(iterable, syaml_dict))
return descend_and_check(iterable.values(), level=level + 1)
max_level = level
for value in iterable:
if isinstance(value, Iterable) and not isinstance(value, str):
nlevel = descend_and_check(value, level=level + 1)
if nlevel > max_level:
max_level = nlevel
return max_level
specs = ['mpileaks ^zmpi', 'dttop', 'dtuse']
for spec in specs:
dag = Spec(spec)
dag.normalize()
level = descend_and_check(dag.to_node_dict())
# level just makes sure we are doing something here
self.assertTrue(level >= 5)

View File

@ -49,6 +49,7 @@
from functools import wraps from functools import wraps
from functools_backport import total_ordering from functools_backport import total_ordering
from spack.util.spack_yaml import syaml_dict
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver'] __all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
@ -593,9 +594,13 @@ def overlaps(self, other):
def to_dict(self): def to_dict(self):
"""Generate human-readable dict for YAML.""" """Generate human-readable dict for YAML."""
if self.concrete: if self.concrete:
return {'version': str(self[0])} return syaml_dict([
('version', str(self[0]))
])
else: else:
return {'versions': [str(v) for v in self]} return syaml_dict([
('versions', [str(v) for v in self])
])
@staticmethod @staticmethod
def from_dict(dictionary): def from_dict(dictionary):