Faster database loading.
- use a 3-pass algorithm to load the installed package DAG. - avoid redundant hashing/comparing on load.
This commit is contained in:
		@@ -198,7 +198,7 @@ def _write_to_yaml(self, stream):
 | 
			
		||||
        except YAMLError as e:
 | 
			
		||||
            raise SpackYAMLError("error writing YAML database:", str(e))
 | 
			
		||||
 | 
			
		||||
    def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
 | 
			
		||||
    def _read_spec_from_yaml(self, hash_key, installs):
 | 
			
		||||
        """Recursively construct a spec from a hash in a YAML database.
 | 
			
		||||
 | 
			
		||||
        Does not do any locking.
 | 
			
		||||
@@ -212,19 +212,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
 | 
			
		||||
 | 
			
		||||
        # Build spec from dict first.
 | 
			
		||||
        spec = Spec.from_node_dict(spec_dict)
 | 
			
		||||
        return spec
 | 
			
		||||
 | 
			
		||||
    def _assign_dependencies(self, hash_key, installs, data):
 | 
			
		||||
        # Add dependencies from other records in the install DB to
 | 
			
		||||
        # form a full spec.
 | 
			
		||||
        spec = data[hash_key].spec
 | 
			
		||||
        spec_dict = installs[hash_key]['spec']
 | 
			
		||||
 | 
			
		||||
        if 'dependencies' in spec_dict[spec.name]:
 | 
			
		||||
            yaml_deps = spec_dict[spec.name]['dependencies']
 | 
			
		||||
            for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
 | 
			
		||||
                child = self._read_spec_from_yaml(dhash, installs, hash_key)
 | 
			
		||||
                spec._add_dependency(child, dtypes)
 | 
			
		||||
                if dhash not in data:
 | 
			
		||||
                    tty.warn("Missing dependency not in database: ",
 | 
			
		||||
                             "%s needs %s-%s" % (
 | 
			
		||||
                                 spec.format('$_$#'), dname, dhash[:7]))
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
        # Specs from the database need to be marked concrete because
 | 
			
		||||
        # they represent actual installations.
 | 
			
		||||
        spec._mark_concrete()
 | 
			
		||||
        return spec
 | 
			
		||||
                # defensive copy (not sure everything handles extra
 | 
			
		||||
                # parent links yet)
 | 
			
		||||
                child = data[dhash].spec
 | 
			
		||||
                spec._add_dependency(child, dtypes)
 | 
			
		||||
 | 
			
		||||
    def _read_from_yaml(self, stream):
 | 
			
		||||
        """
 | 
			
		||||
@@ -267,22 +275,22 @@ def check(cond, msg):
 | 
			
		||||
            self.reindex(spack.install_layout)
 | 
			
		||||
            installs = dict((k, v.to_dict()) for k, v in self._data.items())
 | 
			
		||||
 | 
			
		||||
        # Iterate through database and check each record.
 | 
			
		||||
        # Build up the database in three passes:
 | 
			
		||||
        #
 | 
			
		||||
        #   1. Read in all specs without dependencies.
 | 
			
		||||
        #   2. Hook dependencies up among specs.
 | 
			
		||||
        #   3. Mark all specs concrete.
 | 
			
		||||
        #
 | 
			
		||||
        # The database is built up so that ALL specs in it share nodes
 | 
			
		||||
        # (i.e., its specs are a true Merkle DAG, unlike most specs.)
 | 
			
		||||
 | 
			
		||||
        # Pass 1: Iterate through database and build specs w/o dependencies
 | 
			
		||||
        data = {}
 | 
			
		||||
        for hash_key, rec in installs.items():
 | 
			
		||||
            try:
 | 
			
		||||
                # This constructs a spec DAG from the list of all installs
 | 
			
		||||
                spec = self._read_spec_from_yaml(hash_key, installs)
 | 
			
		||||
 | 
			
		||||
                # Validate the spec by ensuring the stored and actual
 | 
			
		||||
                # hashes are the same.
 | 
			
		||||
                spec_hash = spec.dag_hash()
 | 
			
		||||
                if not spec_hash == hash_key:
 | 
			
		||||
                    tty.warn(
 | 
			
		||||
                        "Hash mismatch in database: %s -> spec with hash %s" %
 | 
			
		||||
                        (hash_key, spec_hash))
 | 
			
		||||
                    continue  # TODO: is skipping the right thing to do?
 | 
			
		||||
 | 
			
		||||
                # Insert the brand new spec in the database.  Each
 | 
			
		||||
                # spec has its own copies of its dependency specs.
 | 
			
		||||
                # TODO: would a more immmutable spec implementation simplify
 | 
			
		||||
@@ -296,6 +304,18 @@ def check(cond, msg):
 | 
			
		||||
                         "cause: %s: %s" % (type(e).__name__, str(e)))
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
        # Pass 2: Assign dependencies once all specs are created.
 | 
			
		||||
        for hash_key in data:
 | 
			
		||||
            self._assign_dependencies(hash_key, installs, data)
 | 
			
		||||
 | 
			
		||||
        # Pass 3: Mark all specs concrete.  Specs representing real
 | 
			
		||||
        # installations must be explicitly marked.
 | 
			
		||||
        # We do this *after* all dependencies are connected because if we
 | 
			
		||||
        # do it *while* we're constructing specs,it causes hashes to be
 | 
			
		||||
        # cached prematurely.
 | 
			
		||||
        for hash_key, rec in data.items():
 | 
			
		||||
            rec.spec._mark_concrete()
 | 
			
		||||
 | 
			
		||||
        self._data = data
 | 
			
		||||
 | 
			
		||||
    def reindex(self, directory_layout):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user