Add ref counting to database. This does not handle removal properly yet.

2015-09-18 11:40:05 -07:00
parent fb73979345
commit d0e22b2240
6 changed files with 183 additions and 55 deletions
--- a/lib/spack/spack/cmd/init.py
+++ b/lib/spack/spack/cmd/init.py
@@ -125,7 +125,7 @@ def elide_list(line_list, max_num=10):
 def disambiguate_spec(spec):
    with spack.installed_db.read_lock():
-        matching_specs = spack.installed_db.get_installed(spec)
+        matching_specs = spack.installed_db.query(spec)
        if not matching_specs:
            tty.die("Spec '%s' matches no installed packages." % spec)
--- a/lib/spack/spack/cmd/find.py
+++ b/lib/spack/spack/cmd/find.py
@@ -54,6 +54,16 @@ def setup_parser(subparser):
        '-L', '--very-long', action='store_true', dest='very_long',
        help='Show dependency hashes as well as versions.')
    subparser.add_argument(
        '-u', '--unknown', action='store_true', dest='unknown',
        help='Show only specs Spack does not have a package for.')
    subparser.add_argument(
        '-m', '--missing', action='store_true', dest='missing',
        help='Show missing dependencies as well as installed specs.')
    subparser.add_argument(
        '-M', '--only-missing', action='store_true', dest='only_missing',
        help='Show only missing dependencies.')
    subparser.add_argument(
        'query_specs', nargs=argparse.REMAINDER,
        help='optional specs to filter results')
@@ -113,6 +123,7 @@ def fmt(s):
                if hashes:
                    string += gray_hash(s, hlen) + ' '
                string += s.format('$-_$@$+', color=True)
                return string
            colify(fmt(s) for s in specs)
@@ -136,15 +147,23 @@ def find(parser, args):
        if not query_specs:
            return
-    # Get all the specs the user asked for
+    # Set up query arguments.
-    if not query_specs:
+    installed, known = True, any
-        with spack.installed_db.read_lock():
+    if args.only_missing:
-            specs = set(spack.installed_db.installed_package_specs())
+        installed = False
    elif args.missing:
        installed = any
    if args.unknown:
        known = False
    q_args = { 'installed' : installed, 'known' : known }
-    else:
+    # Get all the specs the user asked for
-        with spack.installed_db.read_lock():
+    with spack.installed_db.read_lock():
-            results = [set(spack.installed_db.get_installed(qs)) for qs in query_specs]
+        if not query_specs:
-        specs = set.union(*results)
+            specs = set(spack.installed_db.query(**q_args))
        else:
            results = [set(spack.installed_db.query(qs, **q_args)) for qs in query_specs]
            specs = set.union(*results)
    if not args.mode:
        args.mode = 'short'
--- a/lib/spack/spack/cmd/module.py
+++ b/lib/spack/spack/cmd/module.py
@@ -65,7 +65,7 @@ def module_find(mtype, spec_array):
        tty.die("You can only pass one spec.")
    spec = specs[0]
-    specs = [s for s in spack.installed_db.installed_package_specs() if s.satisfies(spec)]
+    specs = spack.installed_db.query(spec)
    if len(specs) == 0:
        tty.die("No installed packages match spec %s" % spec)
@@ -86,7 +86,7 @@ def module_find(mtype, spec_array):
 def module_refresh():
    """Regenerate all module files for installed packages known to
       spack (some packages may no longer exist)."""
-    specs = [s for s in spack.installed_db.installed_known_package_specs()]
+    specs = [s for s in spack.installed_db.query(installed=True, known=True)]
    for name, cls in module_types.items():
        tty.msg("Regenerating %s module files." % name)
--- a/lib/spack/spack/cmd/uninstall.py
+++ b/lib/spack/spack/cmd/uninstall.py
@@ -60,7 +60,7 @@ def uninstall(parser, args):
        # Fail and ask user to be unambiguous if it doesn't
        pkgs = []
        for spec in specs:
-            matching_specs = spack.installed_db.get_installed(spec)
+            matching_specs = spack.installed_db.query(spec)
            if not args.all and len(matching_specs) > 1:
                tty.error("%s matches multiple packages:" % spec)
                print
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -22,6 +22,23 @@
 # along with this program; if not, write to the Free Software Foundation,
 # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 ##############################################################################
 """Spack's installation tracking database.
 The database serves two purposes:
  1. It implements a cache on top of a potentially very large Spack
     directory hierarchy, speeding up many operations that would
     otherwise require filesystem access.
  2. It will allow us to track external installations as well as lost
     packages and their dependencies.
 Prior ot the implementation of this store, a direcotry layout served
 as the authoritative database of packages in Spack.  This module
 provides a cache and a sanity checking mechanism for what is in the
 filesystem.
 """
 import os
 import time
 import socket
@@ -58,18 +75,37 @@ def converter(self, spec_like, *args, **kwargs):
 class InstallRecord(object):
-    """A record represents one installation in the DB."""
+    """A record represents one installation in the DB.
-    def __init__(self, spec, path):
+
    The record keeps track of the spec for the installation, its
    install path, AND whether or not it is installed.  We need the
    installed flag in case a user either:
        a) blew away a directory, or
        b) used spack uninstall -f to get rid of it
    If, in either case, the package was removed but others still
    depend on it, we still need to track its spec, so we don't
    actually remove from the database until a spec has no installed
    dependents left.
    """
    def __init__(self, spec, path, installed):
        self.spec = spec
        self.path = path
        self.installed = installed
        self.ref_count = 0
    def to_dict(self):
-        return { 'spec' : self.spec.to_node_dict(),
+        return { 'spec'      : self.spec.to_node_dict(),
-                 'path' : self.path }
+                 'path'      : self.path,
                 'installed' : self.installed,
                 'ref_count' : self.ref_count }
    @classmethod
    def from_dict(cls, d):
-        return InstallRecord(d['spec'], d['path'])
+        # TODO: check the dict more rigorously.
        return InstallRecord(d['spec'], d['path'], d['installed'], d['ref_count'])
 class Database(object):
@@ -136,9 +172,11 @@ def _write_to_yaml(self, stream):
            raise SpackYAMLError("error writing YAML database:", str(e))
-    def _read_spec_from_yaml(self, hash_key, installs):
+    def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
        """Recursively construct a spec from a hash in a YAML database."""
-        # TODO: check validity of hash_key records here.
+        if hash_key not in installs:
            parent = read_spec(installs[parent_key]['path'])
        spec_dict = installs[hash_key]['spec']
        # Build spec from dict first.
@@ -147,7 +185,8 @@ def _read_spec_from_yaml(self, hash_key, installs):
        # Add dependencies from other records in the install DB to
        # form a full spec.
        for dep_hash in spec_dict[spec.name]['dependencies'].values():
-            spec._add_dependency(self._read_spec_from_yaml(dep_hash, installs))
+            child = self._read_spec_from_yaml(dep_hash, installs, hash_key)
            spec._add_dependency(child)
        return spec
@@ -175,12 +214,12 @@ def check(cond, msg):
        check('database' in yfile, "No 'database' attribute in YAML.")
-        # High-level file checks.
+        # High-level file checks
        db = yfile['database']
        check('installs' in db, "No 'installs' in YAML DB.")
        check('version'  in db, "No 'version' in YAML DB.")
-        # TODO: better version check.
+        # TODO: better version checking semantics.
        version = Version(db['version'])
        if version != _db_version:
            raise InvalidDatabaseVersionError(_db_version, version)
@@ -190,14 +229,21 @@ def check(cond, msg):
        data = {}
        for hash_key, rec in installs.items():
            try:
                # This constructs a spec DAG from the list of all installs
                spec = self._read_spec_from_yaml(hash_key, installs)
                # Validate the spec by ensuring the stored and actual
                # hashes are the same.
                spec_hash = spec.dag_hash()
                if not spec_hash == hash_key:
                    tty.warn("Hash mismatch in database: %s -> spec with hash %s"
                             % (hash_key, spec_hash))
-                    continue
+                    continue    # TODO: is skipping the right thing to do?
-                data[hash_key] = InstallRecord(spec, rec['path'])
+                # Insert the brand new spec in the database.  Each
                # spec has its own copies of its dependency specs.
                # TODO: would a more immmutable spec implementation simplify this?
                data[hash_key] = InstallRecord(spec, rec['path'], rec['installed'])
            except Exception as e:
                tty.warn("Invalid database reecord:",
@@ -213,12 +259,29 @@ def reindex(self, directory_layout):
        """Build database index from scratch based from a directory layout."""
        with self.write_lock():
            data = {}
            # Ask the directory layout to traverse the filesystem.
            for spec in directory_layout.all_specs():
                # Create a spec for each known package and add it.
                path = directory_layout.path_for_spec(spec)
                hash_key = spec.dag_hash()
-                data[hash_key] = InstallRecord(spec, path)
+                data[hash_key] = InstallRecord(spec, path, True)
                # Recursively examine dependencies and add them, even
                # if they are NOT installed.  This ensures we know
                # about missing dependencies.
                for dep in spec.traverse(root=False):
                    dep_hash = dep.dag_hash()
                    if dep_hash not in data:
                        path = directory_layout.path_for_spec(dep)
                        installed = os.path.isdir(path)
                        data[dep_hash] = InstallRecord(dep.copy(), path, installed)
                    data[dep_hash].ref_count += 1
            # Assuming everything went ok, replace this object's data.
            self._data = data
            # write out, blowing away the old version if necessary
            self.write()
@@ -274,22 +337,37 @@ def is_dirty(self):
    @_autospec
    def add(self, spec, path):
        """Read the database from the set location
-        Add the specified entry as a dict
+
-        Write the database back to memory
+        Add the specified entry as a dict, then write the database
        back to memory. This assumes that ALL dependencies are already in
        the database.  Should not be called otherwise.
        """
        # Should always already be locked
        with self.write_lock():
            self.read()
-            self._data[spec.dag_hash()] = InstallRecord(spec, path)
+            self._data[spec.dag_hash()] = InstallRecord(spec, path, True)
            # sanity check the dependencies in case something went
            # wrong during install()
            # TODO: ensure no races during distributed install.
            for dep in spec.traverse(root=False):
                assert dep.dag_hash() in self._data
            self.write()
    @_autospec
    def remove(self, spec):
-        """
+        """Removes a spec from the database.  To be called on uninstall.
-        Reads the database from the set location
+
-        Searches for and removes the specified spec
+        Reads the database, then:
-        Writes the database back to memory
+
          1. Marks the spec as not installed.
          2. Removes the spec if it has no more dependents.
          3. If removed, recursively updates dependencies' ref counts
             and remvoes them if they are no longer needed.
        """
        # Should always already be locked
        with self.write_lock():
@@ -300,19 +378,13 @@ def remove(self, spec):
            self.write()
    @_autospec
    def get_installed(self, spec):
        """Get installed specs that satisfy the provided spec constraint."""
        return [s for s in self.installed_package_specs() if s.satisfies(spec)]
    @_autospec
    def installed_extensions_for(self, extendee_spec):
        """
        Return the specs of all packages that extend
        the given spec
        """
-        for s in self.installed_package_specs():
+        for s in self.query():
            try:
                if s.package.extends(extendee_spec):
                    yield s.package
@@ -322,25 +394,59 @@ def installed_extensions_for(self, extendee_spec):
            # TODO: conditional way to do this instead of catching exceptions
-    def installed_package_specs(self):
+    def query(self, query_spec=any, known=any, installed=True):
        """Run a query on the database.
        ``query_spec``
            Queries iterate through specs in the database and return
            those that satisfy the supplied ``query_spec``.  If
            query_spec is `any`, This will match all specs in the
            database.  If it is a spec, we'll evaluate
            ``spec.satisfies(query_spec)``.
        The query can be constrained by two additional attributes:
        ``known``
            Possible values: True, False, any
            Specs that are "known" are those for which Spack can
            locate a ``package.py`` file -- i.e., Spack "knows" how to
            install them.  Specs that are unknown may represent
            packages that existed in a previous version of Spack, but
            have since either changed their name or been removed.
        ``installed``
            Possible values: True, False, any
            Specs for which a prefix exists are "installed". A spec
            that is NOT installed will be in the database if some
            other spec depends on it but its installation has gone
            away since Spack installed it.
        TODO: Specs are a lot like queries.  Should there be a
              wildcard spec object, and should specs have attributes
              like installed and known that can be queried?  Or are
              these really special cases that only belong here?
        """
        Read installed package names from the database
        and return their specs
        """
        # Should always already be locked
        with self.read_lock():
            self.read()
-        return sorted(rec.spec for rec in self._data.values())
+
        results = []
        for key, rec in self._data.items():
            if installed is not any and rec.installed != installed:
                continue
            if known is not any and spack.db.exists(rec.spec.name) != known:
                continue
            if query_spec is any or rec.spec.satisfies(query_spec):
                results.append(rec.spec)
        return sorted(results)
-    def installed_known_package_specs(self):
+    def missing(self, spec):
-        """
+        key =  spec.dag_hash()
-        Read installed package names from the database.
+        return key in self._data and not self._data[key].installed
        Return only the specs for which the package is known
        to this version of spack
        """
        return [s for s in self.installed_package_specs()
                if spack.db.exists(s.name)]
 class CorruptDatabaseError(SpackError):
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -563,9 +563,12 @@ def installed(self):
    @property
    def installed_dependents(self):
        """Return a list of the specs of all installed packages that depend
-           on this one."""
+           on this one.
        TODO: move this method to database.py?
        """
        dependents = []
-        for spec in spack.installed_db.installed_package_specs():
+        for spec in spack.installed_db.query():
            if self.name == spec.name:
                continue
            for dep in spec.traverse():