Optimize marshaling of Repo and RepoPath (#45742)

When sending Repo and RepoPath over to a child process,
we go through a marshaling procedure with pickle. The
default behavior for these classes is highly inefficient,
as it serializes a lot of specs that can just be
reconstructed on the other end of the pipe.

Here we write optimized procedures to __reduce__ both
classes.
This commit is contained in:
Massimiliano Culpo 2024-08-14 14:34:35 +02:00 committed by GitHub
parent 03a7da1e44
commit 94961ffe0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 62 additions and 19 deletions

View File

@ -12,7 +12,7 @@
import re
import sys
import warnings
from typing import Dict, List, Optional, Set, Tuple, Type
from typing import Dict, Iterable, List, Optional, Set, Tuple, Type
import llnl.util.filesystem
import llnl.util.lang
@ -187,7 +187,7 @@ def libraries_in_windows_paths(path_hints: Optional[List[str]] = None) -> Dict[s
return path_to_dict(search_paths)
def _group_by_prefix(paths: Set[str]) -> Dict[str, Set[str]]:
def _group_by_prefix(paths: List[str]) -> Dict[str, Set[str]]:
groups = collections.defaultdict(set)
for p in paths:
groups[os.path.dirname(p)].add(p)
@ -243,7 +243,9 @@ def detect_specs(
return []
result = []
for candidate_path, items_in_prefix in sorted(_group_by_prefix(set(paths)).items()):
for candidate_path, items_in_prefix in _group_by_prefix(
llnl.util.lang.dedupe(paths)
).items():
# TODO: multiple instances of a package can live in the same
# prefix, and a package implementation can return multiple specs
# for one prefix, but without additional details (e.g. about the
@ -299,19 +301,17 @@ def detect_specs(
return result
def find(
self, *, pkg_name: str, initial_guess: Optional[List[str]] = None
self, *, pkg_name: str, repository, initial_guess: Optional[List[str]] = None
) -> List[DetectedPackage]:
"""For a given package, returns a list of detected specs.
Args:
pkg_name: package being detected
initial_guess: initial list of paths to search from the caller
if None, default paths are searched. If this
is an empty list, nothing will be searched.
repository: repository to retrieve the package
initial_guess: initial list of paths to search from the caller if None, default paths
are searched. If this is an empty list, nothing will be searched.
"""
import spack.repo
pkg_cls = spack.repo.PATH.get_pkg_class(pkg_name)
pkg_cls = repository.get_pkg_class(pkg_name)
patterns = self.search_patterns(pkg=pkg_cls)
if not patterns:
return []
@ -382,7 +382,7 @@ def prefix_from_path(self, *, path: str) -> str:
def by_path(
packages_to_search: List[str],
packages_to_search: Iterable[str],
*,
path_hints: Optional[List[str]] = None,
max_workers: Optional[int] = None,
@ -396,19 +396,28 @@ def by_path(
path_hints: initial list of paths to be searched
max_workers: maximum number of workers to search for packages in parallel
"""
import spack.repo
# TODO: Packages should be able to define both .libraries and .executables in the future
# TODO: determine_spec_details should get all relevant libraries and executables in one call
executables_finder, libraries_finder = ExecutablesFinder(), LibrariesFinder()
detected_specs_by_package: Dict[str, Tuple[concurrent.futures.Future, ...]] = {}
result = collections.defaultdict(list)
repository = spack.repo.PATH.ensure_unwrapped()
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
for pkg in packages_to_search:
executable_future = executor.submit(
executables_finder.find, pkg_name=pkg, initial_guess=path_hints
executables_finder.find,
pkg_name=pkg,
initial_guess=path_hints,
repository=repository,
)
library_future = executor.submit(
libraries_finder.find, pkg_name=pkg, initial_guess=path_hints
libraries_finder.find,
pkg_name=pkg,
initial_guess=path_hints,
repository=repository,
)
detected_specs_by_package[pkg] = executable_future, library_future

View File

@ -149,12 +149,12 @@ def current_repository(self, value):
@contextlib.contextmanager
def switch_repo(self, substitute: "RepoType"):
"""Switch the current repository list for the duration of the context manager."""
old = self.current_repository
old = self._repo
try:
self.current_repository = substitute
self._repo = substitute
yield
finally:
self.current_repository = old
self._repo = old
def find_spec(self, fullname, python_path, target=None):
# "target" is not None only when calling importlib.reload()
@ -683,7 +683,7 @@ class RepoPath:
def __init__(
self,
*repos: Union[str, "Repo"],
cache: "spack.caches.FileCacheType",
cache: Optional["spack.caches.FileCacheType"],
overrides: Optional[Dict[str, Any]] = None,
) -> None:
self.repos: List[Repo] = []
@ -696,6 +696,7 @@ def __init__(
for repo in repos:
try:
if isinstance(repo, str):
assert cache is not None, "cache must hold a value, when repo is a string"
repo = Repo(repo, cache=cache, overrides=overrides)
repo.finder(self)
self.put_last(repo)
@ -707,6 +708,10 @@ def __init__(
f" spack repo rm {repo}",
)
def ensure_unwrapped(self) -> "RepoPath":
"""Ensure we unwrap this object from any dynamic wrapper (like Singleton)"""
return self
def put_first(self, repo: "Repo") -> None:
"""Add repo first in the search path."""
if isinstance(repo, RepoPath):
@ -930,6 +935,16 @@ def is_virtual_safe(self, pkg_name: str) -> bool:
def __contains__(self, pkg_name):
return self.exists(pkg_name)
def marshal(self):
return (self.repos,)
@staticmethod
def unmarshal(repos):
return RepoPath(*repos, cache=None)
def __reduce__(self):
return RepoPath.unmarshal, self.marshal()
class Repo:
"""Class representing a package repository in the filesystem.
@ -1319,6 +1334,20 @@ def __repr__(self) -> str:
def __contains__(self, pkg_name: str) -> bool:
return self.exists(pkg_name)
@staticmethod
def unmarshal(root, cache, overrides):
"""Helper method to unmarshal keyword arguments"""
return Repo(root, cache=cache, overrides=overrides)
def marshal(self):
cache = self._cache
if isinstance(cache, llnl.util.lang.Singleton):
cache = cache.instance
return self.root, cache, self.overrides
def __reduce__(self):
return Repo.unmarshal, self.marshal()
RepoType = Union[Repo, RepoPath]

View File

@ -14,6 +14,7 @@
import spack.cmd.external
import spack.detection
import spack.detection.path
import spack.repo
from spack.main import SpackCommand
from spack.spec import Spec
@ -55,7 +56,9 @@ def test_find_external_two_instances_same_package(mock_executable):
search_paths = [str(cmake1.parent.parent), str(cmake2.parent.parent)]
finder = spack.detection.path.ExecutablesFinder()
detected_specs = finder.find(pkg_name="cmake", initial_guess=search_paths)
detected_specs = finder.find(
pkg_name="cmake", initial_guess=search_paths, repository=spack.repo.PATH
)
assert len(detected_specs) == 2
spec_to_path = {e.spec: e.prefix for e in detected_specs}
@ -263,7 +266,9 @@ def _determine_variants(cls, exes, version_str):
monkeypatch.setattr(gcc_cls, "determine_variants", _determine_variants)
finder = spack.detection.path.ExecutablesFinder()
detected_specs = finder.find(pkg_name="gcc", initial_guess=[str(search_dir)])
detected_specs = finder.find(
pkg_name="gcc", initial_guess=[str(search_dir)], repository=spack.repo.PATH
)
assert len(detected_specs) == 1