Faster database loading.
- use a 3-pass algorithm to load the installed package DAG. - avoid redundant hashing/comparing on load.
This commit is contained in:
parent
a8aad95d41
commit
409e7a2e64
@ -198,7 +198,7 @@ def _write_to_yaml(self, stream):
|
||||
except YAMLError as e:
|
||||
raise SpackYAMLError("error writing YAML database:", str(e))
|
||||
|
||||
def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
|
||||
def _read_spec_from_yaml(self, hash_key, installs):
|
||||
"""Recursively construct a spec from a hash in a YAML database.
|
||||
|
||||
Does not do any locking.
|
||||
@ -212,19 +212,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
|
||||
|
||||
# Build spec from dict first.
|
||||
spec = Spec.from_node_dict(spec_dict)
|
||||
return spec
|
||||
|
||||
def _assign_dependencies(self, hash_key, installs, data):
|
||||
# Add dependencies from other records in the install DB to
|
||||
# form a full spec.
|
||||
spec = data[hash_key].spec
|
||||
spec_dict = installs[hash_key]['spec']
|
||||
|
||||
if 'dependencies' in spec_dict[spec.name]:
|
||||
yaml_deps = spec_dict[spec.name]['dependencies']
|
||||
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
|
||||
child = self._read_spec_from_yaml(dhash, installs, hash_key)
|
||||
spec._add_dependency(child, dtypes)
|
||||
if dhash not in data:
|
||||
tty.warn("Missing dependency not in database: ",
|
||||
"%s needs %s-%s" % (
|
||||
spec.format('$_$#'), dname, dhash[:7]))
|
||||
continue
|
||||
|
||||
# Specs from the database need to be marked concrete because
|
||||
# they represent actual installations.
|
||||
spec._mark_concrete()
|
||||
return spec
|
||||
# defensive copy (not sure everything handles extra
|
||||
# parent links yet)
|
||||
child = data[dhash].spec
|
||||
spec._add_dependency(child, dtypes)
|
||||
|
||||
def _read_from_yaml(self, stream):
|
||||
"""
|
||||
@ -267,22 +275,22 @@ def check(cond, msg):
|
||||
self.reindex(spack.install_layout)
|
||||
installs = dict((k, v.to_dict()) for k, v in self._data.items())
|
||||
|
||||
# Iterate through database and check each record.
|
||||
# Build up the database in three passes:
|
||||
#
|
||||
# 1. Read in all specs without dependencies.
|
||||
# 2. Hook dependencies up among specs.
|
||||
# 3. Mark all specs concrete.
|
||||
#
|
||||
# The database is built up so that ALL specs in it share nodes
|
||||
# (i.e., its specs are a true Merkle DAG, unlike most specs.)
|
||||
|
||||
# Pass 1: Iterate through database and build specs w/o dependencies
|
||||
data = {}
|
||||
for hash_key, rec in installs.items():
|
||||
try:
|
||||
# This constructs a spec DAG from the list of all installs
|
||||
spec = self._read_spec_from_yaml(hash_key, installs)
|
||||
|
||||
# Validate the spec by ensuring the stored and actual
|
||||
# hashes are the same.
|
||||
spec_hash = spec.dag_hash()
|
||||
if not spec_hash == hash_key:
|
||||
tty.warn(
|
||||
"Hash mismatch in database: %s -> spec with hash %s" %
|
||||
(hash_key, spec_hash))
|
||||
continue # TODO: is skipping the right thing to do?
|
||||
|
||||
# Insert the brand new spec in the database. Each
|
||||
# spec has its own copies of its dependency specs.
|
||||
# TODO: would a more immmutable spec implementation simplify
|
||||
@ -296,6 +304,18 @@ def check(cond, msg):
|
||||
"cause: %s: %s" % (type(e).__name__, str(e)))
|
||||
raise
|
||||
|
||||
# Pass 2: Assign dependencies once all specs are created.
|
||||
for hash_key in data:
|
||||
self._assign_dependencies(hash_key, installs, data)
|
||||
|
||||
# Pass 3: Mark all specs concrete. Specs representing real
|
||||
# installations must be explicitly marked.
|
||||
# We do this *after* all dependencies are connected because if we
|
||||
# do it *while* we're constructing specs,it causes hashes to be
|
||||
# cached prematurely.
|
||||
for hash_key, rec in data.items():
|
||||
rec.spec._mark_concrete()
|
||||
|
||||
self._data = data
|
||||
|
||||
def reindex(self, directory_layout):
|
||||
|
Loading…
Reference in New Issue
Block a user