Improve Database.query* methods (#47116)

* Add type hints to all query* methods
* Inline docstrings
* Change defaults from `any` to `None` so they can be type hinted in old Python
* Pre-filter on given hashes instead of iterating over all db specs
* Fix a bug where the `--origin` option of uninstall had no effect
* Fix a bug where query args were not applied when searching by concrete spec

Signed-off-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
Massimiliano Culpo 2024-10-24 08:13:07 +02:00 committed by GitHub
parent a0173a5a94
commit f6ad1e23f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 177 additions and 124 deletions

View File

@ -252,7 +252,7 @@ def _associate_built_specs_with_mirror(self, cache_key, mirror_url):
spec_list = [ spec_list = [
s s
for s in db.query_local(installed=any, in_buildcache=any) for s in db.query_local(installed=any)
if s.external or db.query_local_by_spec_hash(s.dag_hash()).in_buildcache if s.external or db.query_local_by_spec_hash(s.dag_hash()).in_buildcache
] ]

View File

@ -99,5 +99,5 @@ def deconcretize(parser, args):
" Use `spack deconcretize --all` to deconcretize ALL specs.", " Use `spack deconcretize --all` to deconcretize ALL specs.",
) )
specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] specs = spack.cmd.parse_specs(args.specs) if args.specs else [None]
deconcretize_specs(args, specs) deconcretize_specs(args, specs)

View File

@ -178,7 +178,7 @@ def query_arguments(args):
if args.unknown: if args.unknown:
predicate_fn = lambda x: not spack.repo.PATH.exists(x.spec.name) predicate_fn = lambda x: not spack.repo.PATH.exists(x.spec.name)
explicit = any explicit = None
if args.explicit: if args.explicit:
explicit = True explicit = True
if args.implicit: if args.implicit:

View File

@ -80,8 +80,8 @@ def find_matching_specs(specs, allow_multiple_matches=False):
has_errors = True has_errors = True
# No installed package matches the query # No installed package matches the query
if len(matching) == 0 and spec is not any: if len(matching) == 0 and spec is not None:
tty.die("{0} does not match any installed packages.".format(spec)) tty.die(f"{spec} does not match any installed packages.")
specs_from_cli.extend(matching) specs_from_cli.extend(matching)
@ -116,6 +116,6 @@ def mark(parser, args):
" Use `spack mark --all` to mark ALL packages.", " Use `spack mark --all` to mark ALL packages.",
) )
# [any] here handles the --all case by forcing all specs to be returned # [None] here handles the --all case by forcing all specs to be returned
specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] specs = spack.cmd.parse_specs(args.specs) if args.specs else [None]
mark_specs(args, specs) mark_specs(args, specs)

View File

@ -165,7 +165,7 @@ def test_run(args):
if args.fail_fast: if args.fail_fast:
spack.config.set("config:fail_fast", True, scope="command_line") spack.config.set("config:fail_fast", True, scope="command_line")
explicit = args.explicit or any explicit = args.explicit or None
explicit_str = "explicitly " if args.explicit else "" explicit_str = "explicitly " if args.explicit else ""
# Get specs to test # Get specs to test

View File

@ -90,6 +90,7 @@ def find_matching_specs(
env: optional active environment env: optional active environment
specs: list of specs to be matched against installed packages specs: list of specs to be matched against installed packages
allow_multiple_matches: if True multiple matches are admitted allow_multiple_matches: if True multiple matches are admitted
origin: origin of the spec
Return: Return:
list: list of specs list: list of specs
@ -98,7 +99,7 @@ def find_matching_specs(
hashes = env.all_hashes() if env else None hashes = env.all_hashes() if env else None
# List of specs that match expressions given via command line # List of specs that match expressions given via command line
specs_from_cli = [] specs_from_cli: List["spack.spec.Spec"] = []
has_errors = False has_errors = False
for spec in specs: for spec in specs:
install_query = [InstallStatuses.INSTALLED, InstallStatuses.DEPRECATED] install_query = [InstallStatuses.INSTALLED, InstallStatuses.DEPRECATED]
@ -116,7 +117,7 @@ def find_matching_specs(
has_errors = True has_errors = True
# No installed package matches the query # No installed package matches the query
if len(matching) == 0 and spec is not any: if len(matching) == 0 and spec is not None:
if env: if env:
pkg_type = "packages in environment '%s'" % env.name pkg_type = "packages in environment '%s'" % env.name
else: else:
@ -213,7 +214,7 @@ def get_uninstall_list(args, specs: List[spack.spec.Spec], env: Optional[ev.Envi
# Gets the list of installed specs that match the ones given via cli # Gets the list of installed specs that match the ones given via cli
# args.all takes care of the case where '-a' is given in the cli # args.all takes care of the case where '-a' is given in the cli
matching_specs = find_matching_specs(env, specs, args.all) matching_specs = find_matching_specs(env, specs, args.all, origin=args.origin)
dependent_specs = installed_dependents(matching_specs) dependent_specs = installed_dependents(matching_specs)
all_uninstall_specs = matching_specs + dependent_specs if args.dependents else matching_specs all_uninstall_specs = matching_specs + dependent_specs if args.dependents else matching_specs
other_dependent_envs = dependent_environments(all_uninstall_specs, current_env=env) other_dependent_envs = dependent_environments(all_uninstall_specs, current_env=env)
@ -301,6 +302,6 @@ def uninstall(parser, args):
" Use `spack uninstall --all` to uninstall ALL packages.", " Use `spack uninstall --all` to uninstall ALL packages.",
) )
# [any] here handles the --all case by forcing all specs to be returned # [None] here handles the --all case by forcing all specs to be returned
specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] specs = spack.cmd.parse_specs(args.specs) if args.specs else [None]
uninstall_specs(args, specs) uninstall_specs(args, specs)

View File

@ -32,6 +32,7 @@
Container, Container,
Dict, Dict,
Generator, Generator,
Iterable,
List, List,
NamedTuple, NamedTuple,
Optional, Optional,
@ -290,52 +291,6 @@ def __reduce__(self):
return ForbiddenLock, tuple() return ForbiddenLock, tuple()
_QUERY_DOCSTRING = """
Args:
query_spec: queries iterate through specs in the database and
return those that satisfy the supplied ``query_spec``. If
query_spec is `any`, This will match all specs in the
database. If it is a spec, we'll evaluate
``spec.satisfies(query_spec)``
predicate_fn: optional predicate taking an InstallRecord as argument, and returning
whether that record is selected for the query. It can be used to craft criteria
that need some data for selection not provided by the Database itself.
installed (bool or InstallStatus or typing.Iterable or None):
if ``True``, includes only installed
specs in the search; if ``False`` only missing specs, and if
``any``, all specs in database. If an InstallStatus or iterable
of InstallStatus, returns specs whose install status
(installed, deprecated, or missing) matches (one of) the
InstallStatus. (default: True)
explicit (bool or None): A spec that was installed
following a specific user request is marked as explicit. If
instead it was pulled-in as a dependency of a user requested
spec it's considered implicit.
start_date (datetime.datetime or None): filters the query
discarding specs that have been installed before ``start_date``.
end_date (datetime.datetime or None): filters the query discarding
specs that have been installed after ``end_date``.
hashes (Container): list or set of hashes that we can use to
restrict the search
in_buildcache (bool or None): Specs that are marked in
this database as part of an associated binary cache are
``in_buildcache``. All other specs are not. This field is used
for querying mirror indices. Default is ``any``.
Returns:
list of specs that match the query
"""
class LockConfiguration(NamedTuple): class LockConfiguration(NamedTuple):
"""Data class to configure locks in Database objects """Data class to configure locks in Database objects
@ -1525,59 +1480,48 @@ def get_by_hash(self, dag_hash, default=None, installed=any):
def _query( def _query(
self, self,
query_spec=any, query_spec: Optional[Union[str, "spack.spec.Spec"]] = None,
*,
predicate_fn: Optional[SelectType] = None, predicate_fn: Optional[SelectType] = None,
installed=True, installed: Union[bool, InstallStatus, List[InstallStatus]] = True,
explicit=any, explicit: Optional[bool] = None,
start_date=None, start_date: Optional[datetime.datetime] = None,
end_date=None, end_date: Optional[datetime.datetime] = None,
hashes=None, hashes: Optional[Iterable[str]] = None,
in_buildcache=any, in_buildcache: Optional[bool] = None,
origin=None, origin: Optional[str] = None,
) -> List["spack.spec.Spec"]: ) -> List["spack.spec.Spec"]:
"""Run a query on the database."""
# TODO: Specs are a lot like queries. Should there be a # Restrict the set of records over which we iterate first
# TODO: wildcard spec object, and should specs have attributes matching_hashes = self._data
# TODO: like installed and known that can be queried? Or are if hashes is not None:
# TODO: these really special cases that only belong here? matching_hashes = {h: self._data[h] for h in hashes if h in self._data}
if query_spec is not any: if isinstance(query_spec, str):
if not isinstance(query_spec, spack.spec.Spec):
query_spec = spack.spec.Spec(query_spec) query_spec = spack.spec.Spec(query_spec)
# Just look up concrete specs with hashes; no fancy search. if query_spec is not None and query_spec.concrete:
if query_spec.concrete:
# TODO: handling of hashes restriction is not particularly elegant.
hash_key = query_spec.dag_hash() hash_key = query_spec.dag_hash()
if hash_key in self._data and (not hashes or hash_key in hashes): if hash_key not in matching_hashes:
return [self._data[hash_key].spec]
else:
return [] return []
matching_hashes = {hash_key: matching_hashes[hash_key]}
# Abstract specs require more work -- currently we test
# against everything.
results = [] results = []
start_date = start_date or datetime.datetime.min start_date = start_date or datetime.datetime.min
end_date = end_date or datetime.datetime.max end_date = end_date or datetime.datetime.max
# save specs whose name doesn't match for last, to avoid a virtual check
deferred = [] deferred = []
for rec in matching_hashes.values():
for key, rec in self._data.items():
if hashes is not None and rec.spec.dag_hash() not in hashes:
continue
if origin and not (origin == rec.origin): if origin and not (origin == rec.origin):
continue continue
if not rec.install_type_matches(installed): if not rec.install_type_matches(installed):
continue continue
if in_buildcache is not any and rec.in_buildcache != in_buildcache: if in_buildcache is not None and rec.in_buildcache != in_buildcache:
continue continue
if explicit is not any and rec.explicit != explicit: if explicit is not None and rec.explicit != explicit:
continue continue
if predicate_fn is not None and not predicate_fn(rec): if predicate_fn is not None and not predicate_fn(rec):
@ -1588,7 +1532,7 @@ def _query(
if not (start_date < inst_date < end_date): if not (start_date < inst_date < end_date):
continue continue
if query_spec is any: if query_spec is None or query_spec.concrete:
results.append(rec.spec) results.append(rec.spec)
continue continue
@ -1606,36 +1550,118 @@ def _query(
# If we did fine something, the query spec can't be virtual b/c we matched an actual # If we did fine something, the query spec can't be virtual b/c we matched an actual
# package installation, so skip the virtual check entirely. If we *didn't* find anything, # package installation, so skip the virtual check entirely. If we *didn't* find anything,
# check all the deferred specs *if* the query is virtual. # check all the deferred specs *if* the query is virtual.
if not results and query_spec is not any and deferred and query_spec.virtual: if not results and query_spec is not None and deferred and query_spec.virtual:
results = [spec for spec in deferred if spec.satisfies(query_spec)] results = [spec for spec in deferred if spec.satisfies(query_spec)]
return results return results
if _query.__doc__ is None: def query_local(
_query.__doc__ = "" self,
_query.__doc__ += _QUERY_DOCSTRING query_spec: Optional[Union[str, "spack.spec.Spec"]] = None,
*,
predicate_fn: Optional[SelectType] = None,
installed: Union[bool, InstallStatus, List[InstallStatus]] = True,
explicit: Optional[bool] = None,
start_date: Optional[datetime.datetime] = None,
end_date: Optional[datetime.datetime] = None,
hashes: Optional[List[str]] = None,
in_buildcache: Optional[bool] = None,
origin: Optional[str] = None,
) -> List["spack.spec.Spec"]:
"""Queries the local Spack database.
def query_local(self, *args, **kwargs): This function doesn't guarantee any sorting of the returned data for performance reason,
"""Query only the local Spack database. since comparing specs for __lt__ may be an expensive operation.
This function doesn't guarantee any sorting of the returned Args:
data for performance reason, since comparing specs for __lt__ query_spec: if query_spec is ``None``, match all specs in the database.
may be an expensive operation. If it is a spec, return all specs matching ``spec.satisfies(query_spec)``.
predicate_fn: optional predicate taking an InstallRecord as argument, and returning
whether that record is selected for the query. It can be used to craft criteria
that need some data for selection not provided by the Database itself.
installed: if ``True``, includes only installed specs in the search. If ``False`` only
missing specs, and if ``any``, all specs in database. If an InstallStatus or
iterable of InstallStatus, returns specs whose install status matches at least
one of the InstallStatus.
explicit: a spec that was installed following a specific user request is marked as
explicit. If instead it was pulled-in as a dependency of a user requested spec
it's considered implicit.
start_date: if set considers only specs installed from the starting date.
end_date: if set considers only specs installed until the ending date.
in_buildcache: specs that are marked in this database as part of an associated binary
cache are ``in_buildcache``. All other specs are not. This field is used for
querying mirror indices. By default, it does not check this status.
hashes: list of hashes used to restrict the search
origin: origin of the spec
""" """
with self.read_transaction(): with self.read_transaction():
return self._query(*args, **kwargs) return self._query(
query_spec,
predicate_fn=predicate_fn,
installed=installed,
explicit=explicit,
start_date=start_date,
end_date=end_date,
hashes=hashes,
in_buildcache=in_buildcache,
origin=origin,
)
if query_local.__doc__ is None: def query(
query_local.__doc__ = "" self,
query_local.__doc__ += _QUERY_DOCSTRING query_spec: Optional[Union[str, "spack.spec.Spec"]] = None,
*,
predicate_fn: Optional[SelectType] = None,
installed: Union[bool, InstallStatus, List[InstallStatus]] = True,
explicit: Optional[bool] = None,
start_date: Optional[datetime.datetime] = None,
end_date: Optional[datetime.datetime] = None,
in_buildcache: Optional[bool] = None,
hashes: Optional[List[str]] = None,
origin: Optional[str] = None,
install_tree: str = "all",
):
"""Queries the Spack database including all upstream databases.
def query(self, *args, **kwargs): Args:
"""Query the Spack database including all upstream databases. query_spec: if query_spec is ``None``, match all specs in the database.
If it is a spec, return all specs matching ``spec.satisfies(query_spec)``.
Additional Arguments: predicate_fn: optional predicate taking an InstallRecord as argument, and returning
install_tree (str): query 'all' (default), 'local', 'upstream', or upstream path whether that record is selected for the query. It can be used to craft criteria
that need some data for selection not provided by the Database itself.
installed: if ``True``, includes only installed specs in the search. If ``False`` only
missing specs, and if ``any``, all specs in database. If an InstallStatus or
iterable of InstallStatus, returns specs whose install status matches at least
one of the InstallStatus.
explicit: a spec that was installed following a specific user request is marked as
explicit. If instead it was pulled-in as a dependency of a user requested spec
it's considered implicit.
start_date: if set considers only specs installed from the starting date.
end_date: if set considers only specs installed until the ending date.
in_buildcache: specs that are marked in this database as part of an associated binary
cache are ``in_buildcache``. All other specs are not. This field is used for
querying mirror indices. By default, it does not check this status.
hashes: list of hashes used to restrict the search
install_tree: query 'all' (default), 'local', 'upstream', or upstream path
origin: origin of the spec
""" """
install_tree = kwargs.pop("install_tree", "all")
valid_trees = ["all", "upstream", "local", self.root] + [u.root for u in self.upstream_dbs] valid_trees = ["all", "upstream", "local", self.root] + [u.root for u in self.upstream_dbs]
if install_tree not in valid_trees: if install_tree not in valid_trees:
msg = "Invalid install_tree argument to Database.query()\n" msg = "Invalid install_tree argument to Database.query()\n"
@ -1651,26 +1677,52 @@ def query(self, *args, **kwargs):
# queries for upstream DBs need to *not* lock - we may not # queries for upstream DBs need to *not* lock - we may not
# have permissions to do this and the upstream DBs won't know about # have permissions to do this and the upstream DBs won't know about
# us anyway (so e.g. they should never uninstall specs) # us anyway (so e.g. they should never uninstall specs)
upstream_results.extend(upstream_db._query(*args, **kwargs) or []) upstream_results.extend(
upstream_db._query(
query_spec,
predicate_fn=predicate_fn,
installed=installed,
explicit=explicit,
start_date=start_date,
end_date=end_date,
hashes=hashes,
in_buildcache=in_buildcache,
origin=origin,
)
or []
)
local_results = [] local_results: Set["spack.spec.Spec"] = set()
if install_tree in ("all", "local") or self.root == install_tree: if install_tree in ("all", "local") or self.root == install_tree:
local_results = set(self.query_local(*args, **kwargs)) local_results = set(
self.query_local(
query_spec,
predicate_fn=predicate_fn,
installed=installed,
explicit=explicit,
start_date=start_date,
end_date=end_date,
hashes=hashes,
in_buildcache=in_buildcache,
origin=origin,
)
)
results = list(local_results) + list(x for x in upstream_results if x not in local_results) results = list(local_results) + list(x for x in upstream_results if x not in local_results)
return sorted(results) return sorted(results)
if query.__doc__ is None: def query_one(
query.__doc__ = "" self,
query.__doc__ += _QUERY_DOCSTRING query_spec: Optional[Union[str, "spack.spec.Spec"]],
predicate_fn: Optional[SelectType] = None,
def query_one(self, query_spec, predicate_fn=None, installed=True): installed: Union[bool, InstallStatus, List[InstallStatus]] = True,
) -> Optional["spack.spec.Spec"]:
"""Query for exactly one spec that matches the query spec. """Query for exactly one spec that matches the query spec.
Raises an assertion error if more than one spec matches the Returns None if no installed package matches.
query. Returns None if no installed package matches.
Raises:
AssertionError: if more than one spec matches the query.
""" """
concrete_specs = self.query(query_spec, predicate_fn=predicate_fn, installed=installed) concrete_specs = self.query(query_spec, predicate_fn=predicate_fn, installed=installed)
assert len(concrete_specs) <= 1 assert len(concrete_specs) <= 1

View File

@ -74,7 +74,7 @@ def test_query_arguments():
assert "explicit" in q_args assert "explicit" in q_args
assert q_args["installed"] == ["installed"] assert q_args["installed"] == ["installed"]
assert q_args["predicate_fn"] is None assert q_args["predicate_fn"] is None
assert q_args["explicit"] is any assert q_args["explicit"] is None
assert "start_date" in q_args assert "start_date" in q_args
assert "end_date" not in q_args assert "end_date" not in q_args
assert q_args["install_tree"] == "all" assert q_args["install_tree"] == "all"