Clean up specs, spec comparison, and spec hashing.

- Spec comparison is now less strict
  - compares based on sorted list of dependencies but not
    their structure
  - Makes comparison easy when a spec is not normalized.

- This makes the dep_hash consistent for specs read in from a
  directory layout.  - Can now reliably read in a spec for which the
  package has gone away, and still be able to delete its install.
  - easy switching between git branches

- Fixed latent bug in Spec.flat_dependencies() (was including root)

- added a test for the directory layout so that this code will get
  more exercise.
This commit is contained in:
Todd Gamblin 2014-08-07 19:04:04 -07:00
parent d13d32040c
commit d5c625d87d
6 changed files with 312 additions and 64 deletions

View File

@ -81,7 +81,7 @@
# stage directories.
#
from spack.directory_layout import SpecHashDirectoryLayout
install_layout = SpecHashDirectoryLayout(install_path, prefix_size=6)
install_layout = SpecHashDirectoryLayout(install_path)
#
# This controls how things are concretized in spack.

View File

@ -30,6 +30,8 @@
from contextlib import closing
from llnl.util.filesystem import join_path, mkdirp
import spack
from spack.spec import Spec
from spack.error import SpackError
@ -131,12 +133,9 @@ def __init__(self, root, **kwargs):
"""Prefix size is number of characters in the SHA-1 prefix to use
to make each hash unique.
"""
prefix_size = kwargs.get('prefix_size', 8)
spec_file = kwargs.get('spec_file', '.spec')
spec_file_name = kwargs.get('spec_file_name', '.spec')
super(SpecHashDirectoryLayout, self).__init__(root)
self.prefix_size = prefix_size
self.spec_file = spec_file
self.spec_file_name = spec_file_name
def relative_path_for_spec(self, spec):
@ -154,16 +153,31 @@ def write_spec(self, spec, path):
def read_spec(self, path):
"""Read the contents of a file and parse them as a spec"""
with closing(open(path)) as spec_file:
string = spec_file.read().replace('\n', '')
# Specs from files are assumed normal and concrete
return Spec(string, concrete=True)
spec = Spec(spec_file.read().replace('\n', ''))
# If we do not have a package on hand for this spec, we know
# it is concrete, and we *assume* that it is normal. This
# prevents us from trying to fetch a non-existing package, and
# allows best effort for commands like spack find.
if not spack.db.exists(spec.name):
spec._normal = True
spec._concrete = True
return spec
def spec_file_path(self, spec):
"""Gets full path to spec file"""
_check_concrete(spec)
return join_path(self.path_for_spec(spec), self.spec_file_name)
def make_path_for_spec(self, spec):
_check_concrete(spec)
path = self.path_for_spec(spec)
spec_file_path = join_path(path, self.spec_file)
spec_file_path = self.spec_file_path(spec)
if os.path.isdir(path):
if not os.path.isfile(spec_file_path):
@ -177,8 +191,7 @@ def make_path_for_spec(self, spec):
spec_hash = self.hash_spec(spec)
installed_hash = self.hash_spec(installed_spec)
if installed_spec == spec_hash:
raise SpecHashCollisionError(
installed_hash, spec_hash, self.prefix_size)
raise SpecHashCollisionError(installed_hash, spec_hash)
else:
raise InconsistentInstallDirectoryError(
'Spec file in %s does not match SHA-1 hash!'
@ -195,7 +208,7 @@ def all_specs(self):
for path in traverse_dirs_at_depth(self.root, 3):
arch, compiler, last_dir = path
spec_file_path = join_path(
self.root, arch, compiler, last_dir, self.spec_file)
self.root, arch, compiler, last_dir, self.spec_file_name)
if os.path.exists(spec_file_path):
spec = self.read_spec(spec_file_path)
yield spec
@ -209,10 +222,10 @@ def __init__(self, message):
class SpecHashCollisionError(DirectoryLayoutError):
"""Raised when there is a hash collision in an SpecHashDirectoryLayout."""
def __init__(self, installed_spec, new_spec, prefix_size):
def __init__(self, installed_spec, new_spec):
super(SpecHashDirectoryLayout, self).__init__(
'Specs %s and %s have the same %d character SHA-1 prefix!'
% prefix_size, installed_spec, new_spec)
'Specs %s and %s have the same SHA-1 prefix!'
% installed_spec, new_spec)
class InconsistentInstallDirectoryError(DirectoryLayoutError):

View File

@ -94,6 +94,7 @@
import itertools
import hashlib
from StringIO import StringIO
from operator import attrgetter
import llnl.util.tty as tty
from llnl.util.lang import *
@ -352,10 +353,6 @@ def __init__(self, spec_like, *dep_like, **kwargs):
self._normal = kwargs.get('normal', False)
self._concrete = kwargs.get('concrete', False)
# Specs cannot be concrete and non-normal.
if self._concrete:
self._normal = True
# This allows users to construct a spec DAG with literals.
# Note that given two specs a and b, Spec(a) copies a, but
# Spec(a, b) will copy a but just add b as a dep.
@ -489,6 +486,8 @@ def preorder_traversal(self, visited=None, d=0, **kwargs):
"""
depth = kwargs.get('depth', False)
key_fun = kwargs.get('key', id)
if isinstance(key_fun, basestring):
key_fun = attrgetter(key_fun)
yield_root = kwargs.get('root', True)
cover = kwargs.get('cover', 'nodes')
direction = kwargs.get('direction', 'children')
@ -540,13 +539,15 @@ def prefix(self):
def dep_hash(self, length=None):
"""Return a hash representing the dependencies of this spec
This will always normalize first so that the hash is consistent.
"""Return a hash representing all dependencies of this spec
(direct and indirect).
This always normalizes first so that hash is consistent.
"""
self.normalize()
sha = hashlib.sha1()
sha.update(str(self.dependencies))
sha.update(self.dep_string())
full_hash = sha.hexdigest()
return full_hash[:length]
@ -661,19 +662,15 @@ def flat_dependencies(self):
This will work even on specs that are not normalized; i.e. specs
that have two instances of the same dependency in the DAG.
This is used as the first step of normalization.
This is the first step of normalization.
"""
# This ensures that the package descriptions themselves are consistent
if not self.virtual:
self.package.validate_dependencies()
# Once that is guaranteed, we know any constraint violations are due
# to the spec -- so they're the user's fault, not Spack's.
flat_deps = DependencyMap()
try:
for spec in self.preorder_traversal():
for spec in self.preorder_traversal(root=False):
if spec.name not in flat_deps:
new_spec = spec.copy(dependencies=False)
new_spec = spec.copy(deps=False)
flat_deps[spec.name] = new_spec
else:
@ -797,9 +794,11 @@ def normalize(self, **kwargs):
# Ensure first that all packages & compilers in the DAG exist.
self.validate_names()
# Then ensure that the packages referenced are sane, that the
# provided spec is sane, and that all dependency specs are in the
# root node of the spec. flat_dependencies will do this for us.
# Ensure that the package & dep descriptions are consistent & sane
if not self.virtual:
self.package.validate_dependencies()
# Get all the dependencies into one DependencyMap
spec_deps = self.flat_dependencies()
self.dependencies.clear()
@ -1028,7 +1027,7 @@ def _dup(self, other, **kwargs):
self.compiler = other.compiler.copy()
self.dependents = DependencyMap()
copy_deps = kwargs.get('dependencies', True)
copy_deps = kwargs.get('deps', True)
if copy_deps:
self.dependencies = other.dependencies.copy()
else:
@ -1074,9 +1073,65 @@ def __contains__(self, spec):
return False
def _cmp_key(self):
def sorted_deps(self):
"""Return a list of all dependencies sorted by name."""
deps = self.flat_dependencies()
return tuple(deps[name] for name in sorted(deps))
def _eq_dag(self, other, vs, vo):
"""Test that entire dependency DAGs are equal."""
vs.add(id(self))
vo.add(id(other))
if self._cmp_node() != other._cmp_node():
return False
if len(self.dependencies) != len(other.dependencies):
return False
ssorted = [self.dependencies[name] for name in sorted(self.dependencies)]
osorted = [other.dependencies[name] for name in sorted(other.dependencies)]
for s, o in zip(ssorted, osorted):
# Check for duplicate or non-equal dependencies
if (id(s) in vs) != (id(o) in vo):
return False
# Skip visited nodes
if id(s) in vs:
continue
# Recursive check for equality
if not s._eq_dag(o, vs, vo):
return False
return True
def eq_dag(self, other):
"""True if the entire dependency DAG of this spec is equal to another."""
return self._eq_dag(other, set(), set())
def ne_dag(self, other):
"""True if the entire dependency DAG of this spec is not equal to
another."""
return not self.eq_dag(other)
def _cmp_node(self):
"""Comparison key for just *this node* and not its deps."""
return (self.name, self.versions, self.variants,
self.architecture, self.compiler, self.dependencies)
self.architecture, self.compiler)
def _cmp_key(self):
"""Comparison key for this node and all dependencies *without*
considering structure. This is the default, as
normalization will restore structure.
"""
return self._cmp_node() + (self.sorted_deps(),)
def colorized(self):
@ -1179,12 +1234,12 @@ def write(s, c):
return result
def dep_string(self):
return ''.join("^" + dep.format() for dep in self.sorted_deps())
def __str__(self):
by_name = lambda d: d.name
deps = self.preorder_traversal(key=by_name, root=False)
sorted_deps = sorted(deps, key=by_name)
dep_string = ''.join("^" + dep.format() for dep in sorted_deps)
return self.format() + dep_string
return self.format() + self.dep_string()
def tree(self, **kwargs):

View File

@ -45,7 +45,8 @@
'multimethod',
'install',
'package_sanity',
'config']
'config',
'directory_layout']
def list_tests():

View File

@ -0,0 +1,141 @@
##############################################################################
# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
# LLNL-CODE-647188
#
# For details, see https://scalability-llnl.github.io/spack
# Please also see the LICENSE file for our notice and the LGPL.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (as published by
# the Free Software Foundation) version 2.1 dated February 1999.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
# conditions of the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
"""\
This test verifies that the Spack directory layout works properly.
"""
import unittest
import tempfile
import shutil
import os
from llnl.util.filesystem import *
import spack
from spack.packages import PackageDB
from spack.directory_layout import SpecHashDirectoryLayout
class DirectoryLayoutTest(unittest.TestCase):
"""Tests that a directory layout works correctly and produces a
consistent install path."""
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.layout = SpecHashDirectoryLayout(self.tmpdir)
def tearDown(self):
shutil.rmtree(self.tmpdir, ignore_errors=True)
self.layout = None
def test_read_and_write_spec(self):
"""This goes through each package in spack and creates a directory for
it. It then ensures that the spec for the directory's
installed package can be read back in consistently, and
finally that the directory can be removed by the directory
layout.
"""
for pkg in spack.db.all_packages():
spec = pkg.spec
# If a spec fails to concretize, just skip it. If it is a
# real error, it will be caught by concretization tests.
try:
spec.concretize()
except:
continue
self.layout.make_path_for_spec(spec)
install_dir = self.layout.path_for_spec(spec)
spec_path = self.layout.spec_file_path(spec)
# Ensure directory has been created in right place.
self.assertTrue(os.path.isdir(install_dir))
self.assertTrue(install_dir.startswith(self.tmpdir))
# Ensure spec file exists when directory is created
self.assertTrue(os.path.isfile(spec_path))
self.assertTrue(spec_path.startswith(install_dir))
# Make sure spec file can be read back in to get the original spec
spec_from_file = self.layout.read_spec(spec_path)
self.assertEqual(spec, spec_from_file)
# Make sure the dep hash of the read-in spec is the same
self.assertEqual(spec.dep_hash(), spec_from_file.dep_hash())
# Ensure directories are properly removed
self.layout.remove_path_for_spec(spec)
self.assertFalse(os.path.isdir(install_dir))
self.assertFalse(os.path.exists(install_dir))
def test_handle_unknown_package(self):
"""This test ensures that spack can at least do *some*
operations with packages that are installed but that it
does not know about. This is actually not such an uncommon
scenario with spack; it can happen when you switch from a
git branch where you're working on a new package.
This test ensures that the directory layout stores enough
information about installed packages' specs to uninstall
or query them again if the package goes away.
"""
mock_db = PackageDB(spack.mock_packages_path)
not_in_mock = set(spack.db.all_package_names()).difference(
set(mock_db.all_package_names()))
# Create all the packages that are not in mock.
installed_specs = {}
for pkg_name in not_in_mock:
spec = spack.db.get(pkg_name).spec
# If a spec fails to concretize, just skip it. If it is a
# real error, it will be caught by concretization tests.
try:
spec.concretize()
except:
continue
self.layout.make_path_for_spec(spec)
installed_specs[spec] = self.layout.path_for_spec(spec)
tmp = spack.db
spack.db = mock_db
# Now check that even without the package files, we know
# enough to read a spec from the spec file.
for spec, path in installed_specs.items():
spec_from_file = self.layout.read_spec(join_path(path, '.spec'))
# To satisfy these conditions, directory layouts need to
# read in concrete specs from their install dirs somehow.
self.assertEqual(path, self.layout.path_for_spec(spec_from_file))
self.assertEqual(spec, spec_from_file)
self.assertEqual(spec.dep_hash(), spec_from_file.dep_hash())
spack.db = tmp

View File

@ -221,30 +221,53 @@ def test_invalid_dep(self):
def test_equal(self):
spec = Spec('mpileaks ^callpath ^libelf ^libdwarf')
self.assertNotEqual(spec, Spec(
'mpileaks', Spec('callpath',
# Different spec structures to test for equality
flat = Spec('mpileaks ^callpath ^libelf ^libdwarf')
flat_init = Spec(
'mpileaks', Spec('callpath'), Spec('libdwarf'), Spec('libelf'))
flip_flat = Spec(
'mpileaks', Spec('libelf'), Spec('libdwarf'), Spec('callpath'))
dag = Spec('mpileaks', Spec('callpath',
Spec('libdwarf',
Spec('libelf')))))
self.assertNotEqual(spec, Spec(
'mpileaks', Spec('callpath',
Spec('libelf'))))
flip_dag = Spec('mpileaks', Spec('callpath',
Spec('libelf',
Spec('libdwarf')))))
Spec('libdwarf'))))
self.assertEqual(spec, Spec(
'mpileaks', Spec('callpath'), Spec('libdwarf'), Spec('libelf')))
# All these are equal to each other with regular ==
specs = (flat, flat_init, flip_flat, dag, flip_dag)
for lhs, rhs in zip(specs, specs):
self.assertEqual(lhs, rhs)
self.assertEqual(str(lhs), str(rhs))
self.assertEqual(spec, Spec(
'mpileaks', Spec('libelf'), Spec('libdwarf'), Spec('callpath')))
# Same DAGs constructed different ways are equal
self.assertTrue(flat.eq_dag(flat_init))
# order at same level does not matter -- (dep on same parent)
self.assertTrue(flat.eq_dag(flip_flat))
# DAGs should be unequal if nesting is different
self.assertFalse(flat.eq_dag(dag))
self.assertFalse(flat.eq_dag(flip_dag))
self.assertFalse(flip_flat.eq_dag(dag))
self.assertFalse(flip_flat.eq_dag(flip_dag))
self.assertFalse(dag.eq_dag(flip_dag))
def test_normalize_mpileaks(self):
# Spec parsed in from a string
spec = Spec('mpileaks ^mpich ^callpath ^dyninst ^libelf@1.8.11 ^libdwarf')
# What that spec should look like after parsing
expected_flat = Spec(
'mpileaks', Spec('mpich'), Spec('callpath'), Spec('dyninst'),
Spec('libelf@1.8.11'), Spec('libdwarf'))
# What it should look like after normalization
mpich = Spec('mpich')
libelf = Spec('libelf@1.8.11')
expected_normalized = Spec(
@ -257,7 +280,10 @@ def test_normalize_mpileaks(self):
mpich),
mpich)
expected_non_unique_nodes = Spec(
# Similar to normalized spec, but now with copies of the same
# libelf node. Normalization should result in a single unique
# node for each package, so this is the wrong DAG.
non_unique_nodes = Spec(
'mpileaks',
Spec('callpath',
Spec('dyninst',
@ -267,21 +293,33 @@ def test_normalize_mpileaks(self):
mpich),
Spec('mpich'))
self.assertEqual(expected_normalized, expected_non_unique_nodes)
self.assertEqual(str(expected_normalized), str(expected_non_unique_nodes))
self.assertEqual(str(spec), str(expected_non_unique_nodes))
self.assertEqual(str(expected_normalized), str(spec))
# All specs here should be equal under regular equality
specs = (spec, expected_flat, expected_normalized, non_unique_nodes)
for lhs, rhs in zip(specs, specs):
self.assertEqual(lhs, rhs)
self.assertEqual(str(lhs), str(rhs))
# Test that equal and equal_dag are doing the right thing
self.assertEqual(spec, expected_flat)
self.assertNotEqual(spec, expected_normalized)
self.assertNotEqual(spec, expected_non_unique_nodes)
self.assertTrue(spec.eq_dag(expected_flat))
self.assertEqual(spec, expected_normalized)
self.assertFalse(spec.eq_dag(expected_normalized))
self.assertEqual(spec, non_unique_nodes)
self.assertFalse(spec.eq_dag(non_unique_nodes))
spec.normalize()
self.assertNotEqual(spec, expected_flat)
# After normalizing, spec_dag_equal should match the normalized spec.
self.assertEqual(spec, expected_flat)
self.assertFalse(spec.eq_dag(expected_flat))
self.assertEqual(spec, expected_normalized)
self.assertEqual(spec, expected_non_unique_nodes)
self.assertTrue(spec.eq_dag(expected_normalized))
self.assertEqual(spec, non_unique_nodes)
self.assertFalse(spec.eq_dag(non_unique_nodes))
def test_normalize_with_virtual_package(self):