From dfca65158d252c4d66dbad9c7f675726fbb71930 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Wed, 5 Feb 2025 15:51:56 +0100 Subject: [PATCH] database.py: simplify, avoid sets, improve perf --- lib/spack/spack/database.py | 86 ++++++++++++-------------------- lib/spack/spack/test/database.py | 8 ++- 2 files changed, 35 insertions(+), 59 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 6b20e09fe39..ef653a7e862 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -263,18 +263,6 @@ def from_dict(cls, spec, dictionary): return InstallRecord(spec, **d) -class ForbiddenLockError(SpackError): - """Raised when an upstream DB attempts to acquire a lock""" - - -class ForbiddenLock: - def __getattr__(self, name): - raise ForbiddenLockError(f"Cannot access attribute '{name}' of lock") - - def __reduce__(self): - return ForbiddenLock, tuple() - - class LockConfiguration(NamedTuple): """Data class to configure locks in Database objects @@ -617,16 +605,12 @@ def __init__( self.db_lock_timeout = lock_cfg.database_timeout tty.debug(f"DATABASE LOCK TIMEOUT: {str(self.db_lock_timeout)}s") - self.lock: Union[ForbiddenLock, lk.Lock] - if self.is_upstream: - self.lock = ForbiddenLock() - else: - self.lock = lk.Lock( - str(self._lock_path), - default_timeout=self.db_lock_timeout, - desc="database", - enable=lock_cfg.enable, - ) + self.lock = lk.Lock( + str(self._lock_path), + default_timeout=self.db_lock_timeout, + desc="database", + enable=not self.is_upstream and lock_cfg.enable, + ) self._data: Dict[str, InstallRecord] = {} # For every installed spec we keep track of its install prefix, so that @@ -1050,6 +1034,9 @@ def _write(self, type=None, value=None, traceback=None): This routine does no locking. """ + if self.is_upstream: + raise UpstreamDatabaseLockingError("Cannot write to an upstream database") + self._ensure_parent_directories() # Do not write if exceptions were raised @@ -1670,38 +1657,24 @@ def query( """ valid_trees = ["all", "upstream", "local", self.root] + [u.root for u in self.upstream_dbs] if install_tree not in valid_trees: - msg = "Invalid install_tree argument to Database.query()\n" - msg += f"Try one of {', '.join(valid_trees)}" - tty.error(msg) - return [] - - upstream_results = [] - upstreams = self.upstream_dbs - if install_tree not in ("all", "upstream"): - upstreams = [u for u in self.upstream_dbs if u.root == install_tree] - for upstream_db in upstreams: - # queries for upstream DBs need to *not* lock - we may not - # have permissions to do this and the upstream DBs won't know about - # us anyway (so e.g. they should never uninstall specs) - upstream_results.extend( - upstream_db._query( - query_spec, - predicate_fn=predicate_fn, - installed=installed, - explicit=explicit, - start_date=start_date, - end_date=end_date, - hashes=hashes, - in_buildcache=in_buildcache, - origin=origin, - ) - or [] + raise ValueError( + f"Invalid install_tree argument to Database.query(). Try one of {valid_trees}" ) - local_results: Set["spack.spec.Spec"] = set() - if install_tree in ("all", "local") or self.root == install_tree: - local_results = set( - self.query_local( + if install_tree == "all": + databases = [self, *self.upstream_dbs] + elif install_tree == "upstream": + databases = self.upstream_dbs + elif install_tree == "local" or self.root == install_tree: + databases = [self] + else: + databases = [u for u in self.upstream_dbs if u.root == install_tree] + + results: List[spack.spec.Spec] = [] + + for db in databases: + results.extend( + db.query_local( query_spec, predicate_fn=predicate_fn, installed=installed, @@ -1714,8 +1687,13 @@ def query( ) ) - results = list(local_results) + list(x for x in upstream_results if x not in local_results) - results.sort() # type: ignore[call-overload] + # Stable deduplication on dag hash picks local specs over upstreams. + if len(databases) > 1: + results = list(llnl.util.lang.dedupe(results, key=lambda x: x.dag_hash())) + + # reduce number of comparisons with slow default __lt__ + results.sort(key=lambda s: s.name) + results.sort() return results def query_one( diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 4862a04071b..fc1a4feb319 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -165,10 +165,8 @@ def test_installed_upstream(upstream_and_downstream_db, tmpdir): upstream_db._read() for dep in spec.traverse(root=False): - record = downstream_db.get_by_hash(dep.dag_hash()) - assert record is not None - with pytest.raises(spack.database.ForbiddenLockError): - upstream_db.get_by_hash(dep.dag_hash()) + assert downstream_db.get_by_hash(dep.dag_hash()) is not None + assert upstream_db.get_by_hash(dep.dag_hash()) is not None new_spec = spack.concretize.concretize_one("w") downstream_db.add(new_spec) @@ -258,7 +256,7 @@ def test_cannot_write_upstream(tmp_path, mock_packages, config): # Create it as an upstream db = spack.database.Database(str(tmp_path), is_upstream=True) - with pytest.raises(spack.database.ForbiddenLockError): + with pytest.raises(spack.database.UpstreamDatabaseLockingError): db.add(spack.concretize.concretize_one("pkg-a"))