Merge pull request #1535 from LLNL/bugfix/faster-install-db-gh1521
[WIP] Faster database loading, faster in-memory hashing
This commit is contained in:
commit
f5bc0cbb65
@ -164,6 +164,9 @@ def __init__(self, root, db_dir=None):
|
|||||||
self.lock = Lock(self._lock_path)
|
self.lock = Lock(self._lock_path)
|
||||||
self._data = {}
|
self._data = {}
|
||||||
|
|
||||||
|
# whether there was an error at the start of a read transaction
|
||||||
|
self._error = None
|
||||||
|
|
||||||
def write_transaction(self, timeout=_db_lock_timeout):
|
def write_transaction(self, timeout=_db_lock_timeout):
|
||||||
"""Get a write lock context manager for use in a `with` block."""
|
"""Get a write lock context manager for use in a `with` block."""
|
||||||
return WriteTransaction(self.lock, self._read, self._write, timeout)
|
return WriteTransaction(self.lock, self._read, self._write, timeout)
|
||||||
@ -198,7 +201,7 @@ def _write_to_yaml(self, stream):
|
|||||||
except YAMLError as e:
|
except YAMLError as e:
|
||||||
raise SpackYAMLError("error writing YAML database:", str(e))
|
raise SpackYAMLError("error writing YAML database:", str(e))
|
||||||
|
|
||||||
def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
|
def _read_spec_from_yaml(self, hash_key, installs):
|
||||||
"""Recursively construct a spec from a hash in a YAML database.
|
"""Recursively construct a spec from a hash in a YAML database.
|
||||||
|
|
||||||
Does not do any locking.
|
Does not do any locking.
|
||||||
@ -212,19 +215,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
|
|||||||
|
|
||||||
# Build spec from dict first.
|
# Build spec from dict first.
|
||||||
spec = Spec.from_node_dict(spec_dict)
|
spec = Spec.from_node_dict(spec_dict)
|
||||||
|
return spec
|
||||||
|
|
||||||
|
def _assign_dependencies(self, hash_key, installs, data):
|
||||||
# Add dependencies from other records in the install DB to
|
# Add dependencies from other records in the install DB to
|
||||||
# form a full spec.
|
# form a full spec.
|
||||||
|
spec = data[hash_key].spec
|
||||||
|
spec_dict = installs[hash_key]['spec']
|
||||||
|
|
||||||
if 'dependencies' in spec_dict[spec.name]:
|
if 'dependencies' in spec_dict[spec.name]:
|
||||||
yaml_deps = spec_dict[spec.name]['dependencies']
|
yaml_deps = spec_dict[spec.name]['dependencies']
|
||||||
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
|
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
|
||||||
child = self._read_spec_from_yaml(dhash, installs, hash_key)
|
if dhash not in data:
|
||||||
spec._add_dependency(child, dtypes)
|
tty.warn("Missing dependency not in database: ",
|
||||||
|
"%s needs %s-%s" % (
|
||||||
|
spec.format('$_$#'), dname, dhash[:7]))
|
||||||
|
continue
|
||||||
|
|
||||||
# Specs from the database need to be marked concrete because
|
# defensive copy (not sure everything handles extra
|
||||||
# they represent actual installations.
|
# parent links yet)
|
||||||
spec._mark_concrete()
|
child = data[dhash].spec
|
||||||
return spec
|
spec._add_dependency(child, dtypes)
|
||||||
|
|
||||||
def _read_from_yaml(self, stream):
|
def _read_from_yaml(self, stream):
|
||||||
"""
|
"""
|
||||||
@ -248,7 +259,8 @@ def _read_from_yaml(self, stream):
|
|||||||
|
|
||||||
def check(cond, msg):
|
def check(cond, msg):
|
||||||
if not cond:
|
if not cond:
|
||||||
raise CorruptDatabaseError(self._index_path, msg)
|
raise CorruptDatabaseError(
|
||||||
|
"Spack database is corrupt: %s" % msg, self._index_path)
|
||||||
|
|
||||||
check('database' in yfile, "No 'database' attribute in YAML.")
|
check('database' in yfile, "No 'database' attribute in YAML.")
|
||||||
|
|
||||||
@ -267,22 +279,28 @@ def check(cond, msg):
|
|||||||
self.reindex(spack.install_layout)
|
self.reindex(spack.install_layout)
|
||||||
installs = dict((k, v.to_dict()) for k, v in self._data.items())
|
installs = dict((k, v.to_dict()) for k, v in self._data.items())
|
||||||
|
|
||||||
# Iterate through database and check each record.
|
def invalid_record(hash_key, error):
|
||||||
|
msg = ("Invalid record in Spack database: "
|
||||||
|
"hash: %s, cause: %s: %s")
|
||||||
|
msg %= (hash_key, type(e).__name__, str(e))
|
||||||
|
raise CorruptDatabaseError(msg, self._index_path)
|
||||||
|
|
||||||
|
# Build up the database in three passes:
|
||||||
|
#
|
||||||
|
# 1. Read in all specs without dependencies.
|
||||||
|
# 2. Hook dependencies up among specs.
|
||||||
|
# 3. Mark all specs concrete.
|
||||||
|
#
|
||||||
|
# The database is built up so that ALL specs in it share nodes
|
||||||
|
# (i.e., its specs are a true Merkle DAG, unlike most specs.)
|
||||||
|
|
||||||
|
# Pass 1: Iterate through database and build specs w/o dependencies
|
||||||
data = {}
|
data = {}
|
||||||
for hash_key, rec in installs.items():
|
for hash_key, rec in installs.items():
|
||||||
try:
|
try:
|
||||||
# This constructs a spec DAG from the list of all installs
|
# This constructs a spec DAG from the list of all installs
|
||||||
spec = self._read_spec_from_yaml(hash_key, installs)
|
spec = self._read_spec_from_yaml(hash_key, installs)
|
||||||
|
|
||||||
# Validate the spec by ensuring the stored and actual
|
|
||||||
# hashes are the same.
|
|
||||||
spec_hash = spec.dag_hash()
|
|
||||||
if not spec_hash == hash_key:
|
|
||||||
tty.warn(
|
|
||||||
"Hash mismatch in database: %s -> spec with hash %s" %
|
|
||||||
(hash_key, spec_hash))
|
|
||||||
continue # TODO: is skipping the right thing to do?
|
|
||||||
|
|
||||||
# Insert the brand new spec in the database. Each
|
# Insert the brand new spec in the database. Each
|
||||||
# spec has its own copies of its dependency specs.
|
# spec has its own copies of its dependency specs.
|
||||||
# TODO: would a more immmutable spec implementation simplify
|
# TODO: would a more immmutable spec implementation simplify
|
||||||
@ -290,11 +308,22 @@ def check(cond, msg):
|
|||||||
data[hash_key] = InstallRecord.from_dict(spec, rec)
|
data[hash_key] = InstallRecord.from_dict(spec, rec)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tty.warn("Invalid database reecord:",
|
invalid_record(hash_key, e)
|
||||||
"file: %s" % self._index_path,
|
|
||||||
"hash: %s" % hash_key,
|
# Pass 2: Assign dependencies once all specs are created.
|
||||||
"cause: %s: %s" % (type(e).__name__, str(e)))
|
for hash_key in data:
|
||||||
raise
|
try:
|
||||||
|
self._assign_dependencies(hash_key, installs, data)
|
||||||
|
except Exception as e:
|
||||||
|
invalid_record(hash_key, e)
|
||||||
|
|
||||||
|
# Pass 3: Mark all specs concrete. Specs representing real
|
||||||
|
# installations must be explicitly marked.
|
||||||
|
# We do this *after* all dependencies are connected because if we
|
||||||
|
# do it *while* we're constructing specs,it causes hashes to be
|
||||||
|
# cached prematurely.
|
||||||
|
for hash_key, rec in data.items():
|
||||||
|
rec.spec._mark_concrete()
|
||||||
|
|
||||||
self._data = data
|
self._data = data
|
||||||
|
|
||||||
@ -304,7 +333,26 @@ def reindex(self, directory_layout):
|
|||||||
Locks the DB if it isn't locked already.
|
Locks the DB if it isn't locked already.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
with self.write_transaction():
|
# Special transaction to avoid recursive reindex calls and to
|
||||||
|
# ignore errors if we need to rebuild a corrupt database.
|
||||||
|
def _read_suppress_error():
|
||||||
|
try:
|
||||||
|
if os.path.isfile(self._index_path):
|
||||||
|
self._read_from_yaml(self._index_path)
|
||||||
|
except CorruptDatabaseError as e:
|
||||||
|
self._error = e
|
||||||
|
self._data = {}
|
||||||
|
|
||||||
|
transaction = WriteTransaction(
|
||||||
|
self.lock, _read_suppress_error, self._write, _db_lock_timeout)
|
||||||
|
|
||||||
|
with transaction:
|
||||||
|
if self._error:
|
||||||
|
tty.warn(
|
||||||
|
"Spack database was corrupt. Will rebuild. Error was:",
|
||||||
|
str(self._error))
|
||||||
|
self._error = None
|
||||||
|
|
||||||
old_data = self._data
|
old_data = self._data
|
||||||
try:
|
try:
|
||||||
self._data = {}
|
self._data = {}
|
||||||
@ -313,10 +361,15 @@ def reindex(self, directory_layout):
|
|||||||
for spec in directory_layout.all_specs():
|
for spec in directory_layout.all_specs():
|
||||||
# Create a spec for each known package and add it.
|
# Create a spec for each known package and add it.
|
||||||
path = directory_layout.path_for_spec(spec)
|
path = directory_layout.path_for_spec(spec)
|
||||||
old_info = old_data.get(spec.dag_hash())
|
|
||||||
|
# Try to recover explicit value from old DB, but
|
||||||
|
# default it to False if DB was corrupt.
|
||||||
explicit = False
|
explicit = False
|
||||||
if old_info is not None:
|
if old_data is not None:
|
||||||
explicit = old_info.explicit
|
old_info = old_data.get(spec.dag_hash())
|
||||||
|
if old_info is not None:
|
||||||
|
explicit = old_info.explicit
|
||||||
|
|
||||||
self._add(spec, path, directory_layout, explicit=explicit)
|
self._add(spec, path, directory_layout, explicit=explicit)
|
||||||
|
|
||||||
self._check_ref_counts()
|
self._check_ref_counts()
|
||||||
@ -601,11 +654,7 @@ def missing(self, spec):
|
|||||||
|
|
||||||
|
|
||||||
class CorruptDatabaseError(SpackError):
|
class CorruptDatabaseError(SpackError):
|
||||||
|
"""Raised when errors are found while reading the database."""
|
||||||
def __init__(self, path, msg=''):
|
|
||||||
super(CorruptDatabaseError, self).__init__(
|
|
||||||
"Spack database is corrupt: %s. %s." % (path, msg),
|
|
||||||
"Try running `spack reindex` to fix.")
|
|
||||||
|
|
||||||
|
|
||||||
class InvalidDatabaseVersionError(SpackError):
|
class InvalidDatabaseVersionError(SpackError):
|
||||||
|
@ -504,6 +504,7 @@ def __init__(self, spec_like, *dep_like, **kwargs):
|
|||||||
self.variants.spec = self
|
self.variants.spec = self
|
||||||
self.namespace = other.namespace
|
self.namespace = other.namespace
|
||||||
self._hash = other._hash
|
self._hash = other._hash
|
||||||
|
self._cmp_key_cache = other._cmp_key_cache
|
||||||
|
|
||||||
# Specs are by default not assumed to be normal, but in some
|
# Specs are by default not assumed to be normal, but in some
|
||||||
# cases we've read them from a file want to assume normal.
|
# cases we've read them from a file want to assume normal.
|
||||||
@ -858,9 +859,10 @@ def return_val(res):
|
|||||||
# Edge traversal yields but skips children of visited nodes
|
# Edge traversal yields but skips children of visited nodes
|
||||||
if not (key in visited and cover == 'edges'):
|
if not (key in visited and cover == 'edges'):
|
||||||
# This code determines direction and yields the children/parents
|
# This code determines direction and yields the children/parents
|
||||||
|
|
||||||
successors = deps
|
successors = deps
|
||||||
if direction == 'parents':
|
if direction == 'parents':
|
||||||
successors = self.dependents_dict()
|
successors = self.dependents_dict() # TODO: deptype?
|
||||||
|
|
||||||
visited.add(key)
|
visited.add(key)
|
||||||
for name in sorted(successors):
|
for name in sorted(successors):
|
||||||
@ -1278,15 +1280,15 @@ def concretize(self):
|
|||||||
# Mark everything in the spec as concrete, as well.
|
# Mark everything in the spec as concrete, as well.
|
||||||
self._mark_concrete()
|
self._mark_concrete()
|
||||||
|
|
||||||
def _mark_concrete(self):
|
def _mark_concrete(self, value=True):
|
||||||
"""Mark this spec and its dependencies as concrete.
|
"""Mark this spec and its dependencies as concrete.
|
||||||
|
|
||||||
Only for internal use -- client code should use "concretize"
|
Only for internal use -- client code should use "concretize"
|
||||||
unless there is a need to force a spec to be concrete.
|
unless there is a need to force a spec to be concrete.
|
||||||
"""
|
"""
|
||||||
for s in self.traverse(deptype_query=alldeps):
|
for s in self.traverse(deptype_query=alldeps):
|
||||||
s._normal = True
|
s._normal = value
|
||||||
s._concrete = True
|
s._concrete = value
|
||||||
|
|
||||||
def concretized(self):
|
def concretized(self):
|
||||||
"""This is a non-destructive version of concretize(). First clones,
|
"""This is a non-destructive version of concretize(). First clones,
|
||||||
@ -1533,6 +1535,10 @@ def normalize(self, force=False):
|
|||||||
if self._normal and not force:
|
if self._normal and not force:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# avoid any assumptions about concreteness when forced
|
||||||
|
if force:
|
||||||
|
self._mark_concrete(False)
|
||||||
|
|
||||||
# Ensure first that all packages & compilers in the DAG exist.
|
# Ensure first that all packages & compilers in the DAG exist.
|
||||||
self.validate_names()
|
self.validate_names()
|
||||||
# Get all the dependencies into one DependencyMap
|
# Get all the dependencies into one DependencyMap
|
||||||
@ -1865,7 +1871,7 @@ def virtual_dependencies(self):
|
|||||||
"""Return list of any virtual deps in this spec."""
|
"""Return list of any virtual deps in this spec."""
|
||||||
return [spec for spec in self.traverse() if spec.virtual]
|
return [spec for spec in self.traverse() if spec.virtual]
|
||||||
|
|
||||||
def _dup(self, other, **kwargs):
|
def _dup(self, other, deps=True, cleardeps=True):
|
||||||
"""Copy the spec other into self. This is an overwriting
|
"""Copy the spec other into self. This is an overwriting
|
||||||
copy. It does not copy any dependents (parents), but by default
|
copy. It does not copy any dependents (parents), but by default
|
||||||
copies dependencies.
|
copies dependencies.
|
||||||
@ -1896,7 +1902,7 @@ def _dup(self, other, **kwargs):
|
|||||||
self.versions = other.versions.copy()
|
self.versions = other.versions.copy()
|
||||||
self.architecture = other.architecture
|
self.architecture = other.architecture
|
||||||
self.compiler = other.compiler.copy() if other.compiler else None
|
self.compiler = other.compiler.copy() if other.compiler else None
|
||||||
if kwargs.get('cleardeps', True):
|
if cleardeps:
|
||||||
self._dependents = DependencyMap()
|
self._dependents = DependencyMap()
|
||||||
self._dependencies = DependencyMap()
|
self._dependencies = DependencyMap()
|
||||||
self.compiler_flags = other.compiler_flags.copy()
|
self.compiler_flags = other.compiler_flags.copy()
|
||||||
@ -1906,19 +1912,15 @@ def _dup(self, other, **kwargs):
|
|||||||
self.external_module = other.external_module
|
self.external_module = other.external_module
|
||||||
self.namespace = other.namespace
|
self.namespace = other.namespace
|
||||||
self._hash = other._hash
|
self._hash = other._hash
|
||||||
|
self._cmp_key_cache = other._cmp_key_cache
|
||||||
|
|
||||||
# If we copy dependencies, preserve DAG structure in the new spec
|
# If we copy dependencies, preserve DAG structure in the new spec
|
||||||
if kwargs.get('deps', True):
|
if deps:
|
||||||
# This copies the deps from other using _dup(deps=False)
|
# This copies the deps from other using _dup(deps=False)
|
||||||
# XXX(deptype): We can keep different instances of specs here iff
|
deptypes = alldeps
|
||||||
# it is only a 'build' dependency (from its parent).
|
if isinstance(deps, (tuple, list)):
|
||||||
# All other instances must be shared (due to symbol
|
deptypes = deps
|
||||||
# and PATH contention). These should probably search
|
new_nodes = other.flat_dependencies(deptypes=deptypes)
|
||||||
# for any existing installation which can satisfy the
|
|
||||||
# build and latch onto that because if 3 things need
|
|
||||||
# the same build dependency and it is *not*
|
|
||||||
# available, we only want to build it once.
|
|
||||||
new_nodes = other.flat_dependencies(deptype_query=alldeps)
|
|
||||||
new_nodes[self.name] = self
|
new_nodes[self.name] = self
|
||||||
|
|
||||||
stack = [other]
|
stack = [other]
|
||||||
@ -1927,6 +1929,9 @@ def _dup(self, other, **kwargs):
|
|||||||
new_spec = new_nodes[cur_spec.name]
|
new_spec = new_nodes[cur_spec.name]
|
||||||
|
|
||||||
for depspec in cur_spec._dependencies.values():
|
for depspec in cur_spec._dependencies.values():
|
||||||
|
if not any(d in deptypes for d in depspec.deptypes):
|
||||||
|
continue
|
||||||
|
|
||||||
stack.append(depspec.spec)
|
stack.append(depspec.spec)
|
||||||
|
|
||||||
# XXX(deptype): add any new deptypes that may have appeared
|
# XXX(deptype): add any new deptypes that may have appeared
|
||||||
@ -1942,13 +1947,22 @@ def _dup(self, other, **kwargs):
|
|||||||
self.external_module = other.external_module
|
self.external_module = other.external_module
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
def copy(self, **kwargs):
|
def copy(self, deps=True):
|
||||||
"""Return a copy of this spec.
|
"""Return a copy of this spec.
|
||||||
By default, returns a deep copy. Supply dependencies=False
|
|
||||||
to get a shallow copy.
|
By default, returns a deep copy. To control how dependencies are
|
||||||
|
copied, supply:
|
||||||
|
|
||||||
|
deps=True: deep copy
|
||||||
|
|
||||||
|
deps=False: shallow copy (no dependencies)
|
||||||
|
|
||||||
|
deps=('link', 'build'):
|
||||||
|
only build and link dependencies. Similar for other deptypes.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
clone = Spec.__new__(Spec)
|
clone = Spec.__new__(Spec)
|
||||||
clone._dup(self, **kwargs)
|
clone._dup(self, deps=deps)
|
||||||
return clone
|
return clone
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -2059,10 +2073,17 @@ def _cmp_key(self):
|
|||||||
1. A tuple describing this node in the DAG.
|
1. A tuple describing this node in the DAG.
|
||||||
2. The hash of each of this node's dependencies' cmp_keys.
|
2. The hash of each of this node's dependencies' cmp_keys.
|
||||||
"""
|
"""
|
||||||
dep_dict = self.dependencies_dict(deptype=('link', 'run'))
|
if self._cmp_key_cache:
|
||||||
return self._cmp_node() + (
|
return self._cmp_key_cache
|
||||||
tuple(hash(dep_dict[name])
|
|
||||||
for name in sorted(dep_dict)),)
|
dep_tuple = tuple(
|
||||||
|
(d.spec.name, hash(d.spec), tuple(sorted(d.deptypes)))
|
||||||
|
for name, d in sorted(self._dependencies.items()))
|
||||||
|
|
||||||
|
key = (self._cmp_node(), dep_tuple)
|
||||||
|
if self._concrete:
|
||||||
|
self._cmp_key_cache = key
|
||||||
|
return key
|
||||||
|
|
||||||
def colorized(self):
|
def colorized(self):
|
||||||
return colorize_spec(self)
|
return colorize_spec(self)
|
||||||
@ -2457,6 +2478,7 @@ def spec(self, name, check_valid_token=False):
|
|||||||
spec._dependencies = DependencyMap()
|
spec._dependencies = DependencyMap()
|
||||||
spec.namespace = spec_namespace
|
spec.namespace = spec_namespace
|
||||||
spec._hash = None
|
spec._hash = None
|
||||||
|
spec._cmp_key_cache = None
|
||||||
|
|
||||||
spec._normal = False
|
spec._normal = False
|
||||||
spec._concrete = False
|
spec._concrete = False
|
||||||
|
@ -91,22 +91,33 @@ def test_read_and_write_spec(self):
|
|||||||
|
|
||||||
# Make sure spec file can be read back in to get the original spec
|
# Make sure spec file can be read back in to get the original spec
|
||||||
spec_from_file = self.layout.read_spec(spec_path)
|
spec_from_file = self.layout.read_spec(spec_path)
|
||||||
self.assertEqual(spec, spec_from_file)
|
|
||||||
self.assertTrue(spec.eq_dag, spec_from_file)
|
# currently we don't store build dependency information when
|
||||||
|
# we write out specs to the filesystem.
|
||||||
|
|
||||||
|
# TODO: fix this when we can concretize more loosely based on
|
||||||
|
# TODO: what is installed. We currently omit these to
|
||||||
|
# TODO: increase reuse of build dependencies.
|
||||||
|
stored_deptypes = ('link', 'run')
|
||||||
|
expected = spec.copy(deps=stored_deptypes)
|
||||||
|
self.assertEqual(expected, spec_from_file)
|
||||||
|
self.assertTrue(expected.eq_dag, spec_from_file)
|
||||||
self.assertTrue(spec_from_file.concrete)
|
self.assertTrue(spec_from_file.concrete)
|
||||||
|
|
||||||
# Ensure that specs that come out "normal" are really normal.
|
# Ensure that specs that come out "normal" are really normal.
|
||||||
with open(spec_path) as spec_file:
|
with open(spec_path) as spec_file:
|
||||||
read_separately = Spec.from_yaml(spec_file.read())
|
read_separately = Spec.from_yaml(spec_file.read())
|
||||||
|
|
||||||
read_separately.normalize()
|
# TODO: revise this when build deps are in dag_hash
|
||||||
self.assertEqual(read_separately, spec_from_file)
|
norm = read_separately.normalized().copy(deps=stored_deptypes)
|
||||||
|
self.assertEqual(norm, spec_from_file)
|
||||||
|
|
||||||
read_separately.concretize()
|
# TODO: revise this when build deps are in dag_hash
|
||||||
self.assertEqual(read_separately, spec_from_file)
|
conc = read_separately.concretized().copy(deps=stored_deptypes)
|
||||||
|
self.assertEqual(conc, spec_from_file)
|
||||||
|
|
||||||
# Make sure the hash of the read-in spec is the same
|
# Make sure the hash of the read-in spec is the same
|
||||||
self.assertEqual(spec.dag_hash(), spec_from_file.dag_hash())
|
self.assertEqual(expected.dag_hash(), spec_from_file.dag_hash())
|
||||||
|
|
||||||
# Ensure directories are properly removed
|
# Ensure directories are properly removed
|
||||||
self.layout.remove_install_directory(spec)
|
self.layout.remove_install_directory(spec)
|
||||||
|
Loading…
Reference in New Issue
Block a user