From c15e55c6686dc5640f36e833902b9ce3889bb13b Mon Sep 17 00:00:00 2001 From: Paul Ferrell <51765748+Paul-Ferrell@users.noreply.github.com> Date: Wed, 20 Nov 2019 17:00:44 -0700 Subject: [PATCH 01/24] mirror bug fixes: symlinks, duplicate patch names, and exception handling (#13789) * Some packages (e.g. mpfr at the time of this patch) can have patches with the same name but different contents (which apply to different versions of the package). This appends part of the patch hash to the cache file name to avoid conflicts. * Some exceptions which occur during fetching are not a subclass of SpackError and therefore do not have a 'message' attribute. This updates the logic for mirroring a single spec (add_single_spec) to produce an appropriate error message in that case (where before it failed with an AttributeError) * In various circumstances, a mirror can contain the universal storage path but not a cosmetic symlink; in this case it would not generate a symlink. Now "spack mirror create" will create a symlink for any package that doesn't have one. --- lib/spack/spack/caches.py | 17 ++++++++++------- lib/spack/spack/mirror.py | 2 +- lib/spack/spack/patch.py | 10 +++++++--- lib/spack/spack/stage.py | 12 ++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index bdece504214..a7910905bcb 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -53,20 +53,23 @@ class MirrorCache(object): def __init__(self, root): self.root = os.path.abspath(root) - def store(self, fetcher, relative_dest, cosmetic_path=None): + def store(self, fetcher, relative_dest): + """Fetch and relocate the fetcher's target into our mirror cache.""" + # Note this will archive package sources even if they would not # normally be cached (e.g. the current tip of an hg/git branch) dst = os.path.join(self.root, relative_dest) mkdirp(os.path.dirname(dst)) fetcher.archive(dst) - # Add a symlink path that a human can read to understand what resource - # the archive path refers to - if not cosmetic_path: - return - cosmetic_path = os.path.join(self.root, cosmetic_path) + def symlink(self, mirror_ref): + """Symlink a human readible path in our mirror to the actual + storage location.""" + + cosmetic_path = os.path.join(self.root, mirror_ref.cosmetic_path) relative_dst = os.path.relpath( - dst, start=os.path.dirname(cosmetic_path)) + mirror_ref.storage_path, + start=os.path.dirname(cosmetic_path)) if not os.path.exists(cosmetic_path): mkdirp(os.path.dirname(cosmetic_path)) os.symlink(relative_dst, cosmetic_path) diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index f7e8e73ea97..db32ceb5a5b 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -519,7 +519,7 @@ def add_single_spec(spec, mirror_root, mirror_stats): else: tty.warn( "Error while fetching %s" % spec.cformat('{name}{@version}'), - exception.message) + getattr(exception, 'message', exception)) mirror_stats.error() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 79550538db5..ba8d4c5ef84 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -199,11 +199,15 @@ def fetch(self, stage): fetcher = fs.URLFetchStrategy(self.url, fetch_digest, expand=bool(self.archive_sha256)) - per_package_ref = os.path.join( - self.owner.split('.')[-1], os.path.basename(self.url)) + # The same package can have multiple patches with the same name but + # with different contents, therefore apply a subset of the hash. + name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) + + per_package_ref = os.path.join(self.owner.split('.')[-1], name) # Reference starting with "spack." is required to avoid cyclic imports mirror_ref = spack.mirror.mirror_archive_paths( - fetcher, per_package_ref) + fetcher, + per_package_ref) self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) self.stage.create() diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index d2dd3e6e7ad..e46354d9633 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -498,13 +498,13 @@ def cache_mirror(self, stats): if os.path.exists(absolute_storage_path): stats.already_existed(absolute_storage_path) - return + else: + self.fetch() + spack.caches.mirror_cache.store( + self.fetcher, self.mirror_paths.storage_path) + stats.added(absolute_storage_path) - self.fetch() - spack.caches.mirror_cache.store( - self.fetcher, self.mirror_paths.storage_path, - self.mirror_paths.cosmetic_path) - stats.added(absolute_storage_path) + spack.caches.mirror_cache.symlink(self.mirror_paths) def expand_archive(self): """Changes to the stage directory and attempt to expand the downloaded From 64209dda977c907da435faed6c9e030a50c520ab Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 18:42:21 -0800 Subject: [PATCH 02/24] Allow repeated invocations of 'mirror create' When creating a cosmetic symlink for a resource in a mirror, remove it if it already exists. The symlink is removed in case the logic to create the symlink has changed. --- lib/spack/spack/caches.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index a7910905bcb..61a47eaa6e5 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -70,7 +70,13 @@ def symlink(self, mirror_ref): relative_dst = os.path.relpath( mirror_ref.storage_path, start=os.path.dirname(cosmetic_path)) + if not os.path.exists(cosmetic_path): + if os.path.lexists(cosmetic_path): + # In this case the link itself exists but it is broken: remove + # it and recreate it (in order to fix any symlinks broken prior + # to https://github.com/spack/spack/pull/13908) + os.unlink(cosmetic_path) mkdirp(os.path.dirname(cosmetic_path)) os.symlink(relative_dst, cosmetic_path) From 98b498c671fe4cec0c3b1ecfc09fa959faa713bd Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 18:50:32 -0800 Subject: [PATCH 03/24] Mirrors: fix cosmetic symlink targets The targets for the cosmetic paths in mirrrors were being calculated incorrectly as of fb3a3ba: the symlinks used relative paths as targets, and the relative path was computed relative to the wrong directory. --- lib/spack/spack/caches.py | 3 ++- lib/spack/spack/test/mirror.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index 61a47eaa6e5..ce2f8a63717 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -67,8 +67,9 @@ def symlink(self, mirror_ref): storage location.""" cosmetic_path = os.path.join(self.root, mirror_ref.cosmetic_path) + storage_path = os.path.join(self.root, mirror_ref.storage_path) relative_dst = os.path.relpath( - mirror_ref.storage_path, + storage_path, start=os.path.dirname(cosmetic_path)) if not os.path.exists(cosmetic_path): diff --git a/lib/spack/spack/test/mirror.py b/lib/spack/spack/test/mirror.py index 9068db71937..e1b31695e37 100644 --- a/lib/spack/spack/test/mirror.py +++ b/lib/spack/spack/test/mirror.py @@ -14,6 +14,8 @@ from spack.stage import Stage from spack.util.executable import which +from llnl.util.filesystem import resolve_link_target_relative_to_the_link + pytestmark = pytest.mark.usefixtures('config', 'mutable_mock_packages') # paths in repos that shouldn't be in the mirror tarballs. @@ -192,3 +194,33 @@ def successful_apply(*args, **kwargs): 'abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234', 'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd.gz' # NOQA: ignore=E501 ]) - files_cached_in_mirror) + + +class MockFetcher(object): + """Mock fetcher object which implements the necessary functionality for + testing MirrorCache + """ + @staticmethod + def archive(dst): + with open(dst, 'w'): + pass + + +@pytest.mark.regression('14067') +def test_mirror_cache_symlinks(tmpdir): + """Confirm that the cosmetic symlink created in the mirror cache (which may + be relative) targets the storage path correctly. + """ + cosmetic_path = 'zlib/zlib-1.2.11.tar.gz' + global_path = '_source-cache/archive/c3/c3e5.tar.gz' + cache = spack.caches.MirrorCache(str(tmpdir)) + reference = spack.mirror.MirrorReference(cosmetic_path, global_path) + + cache.store(MockFetcher(), reference.storage_path) + cache.symlink(reference) + + link_target = resolve_link_target_relative_to_the_link( + os.path.join(cache.root, reference.cosmetic_path)) + assert os.path.exists(link_target) + assert (os.path.normpath(link_target) == + os.path.join(cache.root, reference.storage_path)) From a69b3c85b01075703dc480b490e894ee14d00125 Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:23:38 -0800 Subject: [PATCH 04/24] Mirrors: perform checksum of fetched sources Since cache_mirror does the fetch itself, it also needs to do the checksum itself if it wants to verify that the source stored in the mirror is valid. Note that this isn't strictly required because fetching (including from mirrors) always separately verifies the checksum. --- lib/spack/spack/stage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index e46354d9633..ffad242f279 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -500,6 +500,7 @@ def cache_mirror(self, stats): stats.already_existed(absolute_storage_path) else: self.fetch() + self.check() spack.caches.mirror_cache.store( self.fetcher, self.mirror_paths.storage_path) stats.added(absolute_storage_path) From d71428622bebc54db4a1903e2026067fc085cb1b Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:35:58 -0800 Subject: [PATCH 05/24] Mirrors: avoid re-downloading patches When updating a mirror, Spack was re-retrieving all patches (since the fetch logic for patches is separate). This updates the patch logic to allow the mirror logic to avoid this. --- lib/spack/spack/mirror.py | 1 - lib/spack/spack/patch.py | 47 +++++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index db32ceb5a5b..da9b20472aa 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -502,7 +502,6 @@ def add_single_spec(spec, mirror_root, mirror_stats): with spec.package.stage as pkg_stage: pkg_stage.cache_mirror(mirror_stats) for patch in spec.package.all_patches(): - patch.fetch(pkg_stage) if patch.cache(): patch.cache().cache_mirror(mirror_stats) patch.clean() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index ba8d4c5ef84..9e29dcc6386 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -171,6 +171,7 @@ def __init__(self, pkg, url, level=1, working_dir='.', ordering_key=None, super(UrlPatch, self).__init__(pkg, url, level, working_dir) self.url = url + self._stage = None self.ordering_key = ordering_key @@ -191,25 +192,6 @@ def fetch(self, stage): Args: stage: stage for the package that needs to be patched """ - # use archive digest for compressed archives - fetch_digest = self.sha256 - if self.archive_sha256: - fetch_digest = self.archive_sha256 - - fetcher = fs.URLFetchStrategy(self.url, fetch_digest, - expand=bool(self.archive_sha256)) - - # The same package can have multiple patches with the same name but - # with different contents, therefore apply a subset of the hash. - name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) - - per_package_ref = os.path.join(self.owner.split('.')[-1], name) - # Reference starting with "spack." is required to avoid cyclic imports - mirror_ref = spack.mirror.mirror_archive_paths( - fetcher, - per_package_ref) - - self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) self.stage.create() self.stage.fetch() self.stage.check() @@ -243,6 +225,33 @@ def fetch(self, stage): "sha256 checksum failed for %s" % self.path, "Expected %s but got %s" % (self.sha256, checker.sum)) + @property + def stage(self): + if self._stage: + return self._stage + + # use archive digest for compressed archives + fetch_digest = self.sha256 + if self.archive_sha256: + fetch_digest = self.archive_sha256 + + fetcher = fs.URLFetchStrategy(self.url, fetch_digest, + expand=bool(self.archive_sha256)) + + # The same package can have multiple patches with the same name but + # with different contents, therefore apply a subset of the hash. + name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) + + per_package_ref = os.path.join(self.owner.split('.')[-1], name) + # Reference starting with "spack." is required to avoid cyclic imports + mirror_ref = spack.mirror.mirror_archive_paths( + fetcher, + per_package_ref) + + self._stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) + self._stage.create() + return self._stage + def cache(self): return self.stage From 587c650b8891ff1a633d4ccfcfcae74e079b1c8d Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:50:06 -0800 Subject: [PATCH 06/24] Mirrors: skip attempts to fetch BundlePackages BundlePackages use a noop fetch strategy. The mirror logic was assuming that the fetcher had a resource to cach after performing a fetch. This adds a special check to skip caching if the stage is associated with a BundleFetchStrategy. Note that this should allow caching resources associated with BundlePackages. --- lib/spack/spack/stage.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index ffad242f279..bf65ee0b011 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -492,6 +492,16 @@ def cache_local(self): def cache_mirror(self, stats): """Perform a fetch if the resource is not already cached""" + if isinstance(self.default_fetcher, fs.BundleFetchStrategy): + # BundleFetchStrategy has no source to fetch. The associated + # fetcher does nothing but the associated stage may still exist. + # There is currently no method available on the fetcher to + # distinguish this ('cachable' refers to whether the fetcher + # refers to a resource with a fixed ID, which is not the same + # concept as whether there is anything to fetch at all) so we + # must examine the type of the fetcher. + return + dst_root = spack.caches.mirror_cache.root absolute_storage_path = os.path.join( dst_root, self.mirror_paths.storage_path) From 639156130b54a05b22b5e6338431b378ff8b5030 Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Wed, 27 Nov 2019 14:04:45 -0800 Subject: [PATCH 07/24] Patch fetching: remove unnecessary argument --- lib/spack/spack/package.py | 2 +- lib/spack/spack/patch.py | 8 ++------ lib/spack/spack/test/patch.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 38631c7a0e2..26335ed2ff6 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -1086,7 +1086,7 @@ def do_fetch(self, mirror_only=False): self.stage.cache_local() for patch in self.spec.patches: - patch.fetch(self.stage) + patch.fetch() if patch.cache(): patch.cache().cache_local() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 9e29dcc6386..73fdf4df25f 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -64,11 +64,8 @@ def __init__(self, pkg, path_or_url, level, working_dir): self.level = level self.working_dir = working_dir - def fetch(self, stage): + def fetch(self): """Fetch the patch in case of a UrlPatch - - Args: - stage: stage for the package that needs to be patched """ def clean(self): @@ -185,8 +182,7 @@ def __init__(self, pkg, url, level=1, working_dir='.', ordering_key=None, if not self.sha256: raise PatchDirectiveError("URL patches require a sha256 checksum") - # TODO: this function doesn't use the stage arg - def fetch(self, stage): + def fetch(self): """Retrieve the patch in a temporary stage and compute self.path Args: diff --git a/lib/spack/spack/test/patch.py b/lib/spack/spack/test/patch.py index c3df33dc8b1..b705d01e426 100644 --- a/lib/spack/spack/test/patch.py +++ b/lib/spack/spack/test/patch.py @@ -79,7 +79,7 @@ def test_url_patch(mock_patch_stage, filename, sha256, archive_sha256): third line """) # apply the patch and compare files - patch.fetch(stage) + patch.fetch() patch.apply(stage) patch.clean() From 37eac1a2269fcaf1bd0e1fa51851150874676e1a Mon Sep 17 00:00:00 2001 From: Sajid Ali <30510036+s-sajid-ali@users.noreply.github.com> Date: Sat, 21 Dec 2019 02:02:28 -0600 Subject: [PATCH 08/24] use `sys.executable` instead of `python` in `_source_single_file` (#14252) --- lib/spack/spack/util/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spack/spack/util/environment.py b/lib/spack/spack/util/environment.py index b85ec963e25..f7dc728e7cb 100644 --- a/lib/spack/spack/util/environment.py +++ b/lib/spack/spack/util/environment.py @@ -919,7 +919,7 @@ def _source_single_file(file_and_args, environment): source_file = ' '.join(source_file) dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))' - dump_environment = 'python -c "{0}"'.format(dump_cmd) + dump_environment = sys.executable + ' -c "{0}"'.format(dump_cmd) # Try to source the file source_file_arguments = ' '.join([ From 48befd67b565efcb218849644c9dc1f728e2f095 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 16 Dec 2019 16:56:55 -0800 Subject: [PATCH 09/24] performance: memoize spack.architecture.get_platform() `get_platform()` is pretty expensive and can be called many times in a spack invocation. - [x] memoize `get_platform()` --- lib/spack/spack/architecture.py | 1 + lib/spack/spack/test/concretize.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/lib/spack/spack/architecture.py b/lib/spack/spack/architecture.py index 0e318bbccf9..7552795cd23 100644 --- a/lib/spack/spack/architecture.py +++ b/lib/spack/spack/architecture.py @@ -436,6 +436,7 @@ def from_dict(d): return arch_for_spec(spec) +@memoized def get_platform(platform_name): """Returns a platform object that corresponds to the given name.""" platform_list = all_platforms() diff --git a/lib/spack/spack/test/concretize.py b/lib/spack/spack/test/concretize.py index e0774909f47..4b17e755ed9 100644 --- a/lib/spack/spack/test/concretize.py +++ b/lib/spack/spack/test/concretize.py @@ -94,6 +94,10 @@ def current_host(request, monkeypatch): # preferred target via packages.yaml cpu, _, is_preference = request.param.partition('-') target = llnl.util.cpu.targets[cpu] + + # this function is memoized, so clear its state for testing + spack.architecture.get_platform.cache.clear() + if not is_preference: monkeypatch.setattr(llnl.util.cpu, 'host', lambda: target) monkeypatch.setattr(spack.platforms.test.Test, 'default', cpu) @@ -104,6 +108,9 @@ def current_host(request, monkeypatch): with spack.config.override('packages:all', {'target': [cpu]}): yield target + # clear any test values fetched + spack.architecture.get_platform.cache.clear() + @pytest.mark.usefixtures('config', 'mock_packages') class TestConcretize(object): From cbf8553406c9b7d7ec529751f39ebbad331cbdc7 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 16 Dec 2019 17:37:47 -0800 Subject: [PATCH 10/24] concretization: improve performance by avoiding database locks Checks for deprecated specs were repeatedly taking out read locks on the database, which can be very slow. - [x] put a read transaction around the deprecation check --- lib/spack/spack/spec.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 050f0276795..86ac62e0d75 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -2243,10 +2243,12 @@ def concretize(self, tests=False): # If any spec in the DAG is deprecated, throw an error deprecated = [] - for x in self.traverse(): - _, rec = spack.store.db.query_by_spec_hash(x.dag_hash()) - if rec and rec.deprecated_for: - deprecated.append(rec) + with spack.store.db.read_transaction(): + for x in self.traverse(): + _, rec = spack.store.db.query_by_spec_hash(x.dag_hash()) + if rec and rec.deprecated_for: + deprecated.append(rec) + if deprecated: msg = "\n The following specs have been deprecated" msg += " in favor of specs with the hashes shown:\n" From 5bdba9883735eb64a5d598f95bb774637e27e9db Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Tue, 17 Dec 2019 12:50:44 -0800 Subject: [PATCH 11/24] performance: `spack spec` should use a read transacction with -I `spack spec -I` queries the database for installation status and should use a read transaction around calls to `Spec.tree()`. --- lib/spack/spack/cmd/spec.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/spack/spack/cmd/spec.py b/lib/spack/spack/cmd/spec.py index f63ac638c4d..f10eef9c733 100644 --- a/lib/spack/spack/cmd/spec.py +++ b/lib/spack/spack/cmd/spec.py @@ -6,6 +6,7 @@ from __future__ import print_function import argparse +import contextlib import sys import llnl.util.tty as tty @@ -14,6 +15,7 @@ import spack.cmd import spack.cmd.common.arguments as arguments import spack.spec +import spack.store import spack.hash_types as ht description = "show what would be installed, given a spec" @@ -45,6 +47,14 @@ def setup_parser(subparser): 'specs', nargs=argparse.REMAINDER, help="specs of packages") +@contextlib.contextmanager +def nullcontext(): + """Empty context manager. + TODO: replace with contextlib.nullcontext() if we ever require python 3.7. + """ + yield + + def spec(parser, args): name_fmt = '{namespace}.{name}' if args.namespaces else '{name}' fmt = '{@version}{%compiler}{compiler_flags}{variants}{arch=architecture}' @@ -57,6 +67,12 @@ def spec(parser, args): 'status_fn': install_status_fn if args.install_status else None } + # use a read transaction if we are getting install status for every + # spec in the DAG. This avoids repeatedly querying the DB. + tree_context = nullcontext + if args.install_status: + tree_context = spack.store.db.read_transaction + if not args.specs: tty.die("spack spec requires at least one spec") @@ -73,13 +89,14 @@ def spec(parser, args): print(spec.to_json(hash=ht.build_hash)) continue - kwargs['hashes'] = False # Always False for input spec - print("Input spec") - print("--------------------------------") - print(spec.tree(**kwargs)) + with tree_context(): + kwargs['hashes'] = False # Always False for input spec + print("Input spec") + print("--------------------------------") + print(spec.tree(**kwargs)) - kwargs['hashes'] = args.long or args.very_long - print("Concretized") - print("--------------------------------") - spec.concretize() - print(spec.tree(**kwargs)) + kwargs['hashes'] = args.long or args.very_long + print("Concretized") + print("--------------------------------") + spec.concretize() + print(spec.tree(**kwargs)) From 91ea90c25347a33ab4b7f7d7d390af048b4ab809 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Wed, 18 Dec 2019 14:25:21 -0800 Subject: [PATCH 12/24] performance: speed up `spack find` in environments `Environment.added_specs()` has a loop around calls to `Package.installed()`, which can result in repeated DB queries. Optimize this with a read transaction in `Environment`. --- lib/spack/spack/environment.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 2c7a0cf098f..4a23c526223 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -25,6 +25,7 @@ import spack.repo import spack.schema.env import spack.spec +import spack.store import spack.util.spack_json as sjson import spack.util.spack_yaml as syaml import spack.config @@ -1232,13 +1233,16 @@ def added_specs(self): Yields the user spec for non-concretized specs, and the concrete spec for already concretized but not yet installed specs. """ - concretized = dict(self.concretized_specs()) - for spec in self.user_specs: - concrete = concretized.get(spec) - if not concrete: - yield spec - elif not concrete.package.installed: - yield concrete + # use a transaction to avoid overhead of repeated calls + # to `package.installed` + with spack.store.db.read_transaction(): + concretized = dict(self.concretized_specs()) + for spec in self.user_specs: + concrete = concretized.get(spec) + if not concrete: + yield spec + elif not concrete.package.installed: + yield concrete def concretized_specs(self): """Tuples of (user spec, concrete spec) for all concrete specs.""" From a85b9070cb37103c45e0e9b58732b4d92c09288b Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 20 Dec 2019 23:38:23 -0800 Subject: [PATCH 13/24] performance: avoid repeated DB locking on view generation `ViewDescriptor.regenerate()` checks repeatedly whether packages are installed and also does a lot of DB queries. Put a read transaction around the whole thing to avoid repeatedly locking and unlocking the DB. --- lib/spack/spack/environment.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 4a23c526223..f7a13104598 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -516,20 +516,23 @@ def regenerate(self, all_specs, roots): if spec.concrete: # Do not link unconcretized roots specs_for_view.append(spec.copy(deps=('link', 'run'))) - installed_specs_for_view = set(s for s in specs_for_view - if s in self and s.package.installed) + # regeneration queries the database quite a bit; this read + # transaction ensures that we don't repeatedly lock/unlock. + with spack.store.db.read_transaction(): + installed_specs_for_view = set( + s for s in specs_for_view if s in self and s.package.installed) - view = self.view() + view = self.view() - view.clean() - specs_in_view = set(view.get_all_specs()) - tty.msg("Updating view at {0}".format(self.root)) + view.clean() + specs_in_view = set(view.get_all_specs()) + tty.msg("Updating view at {0}".format(self.root)) - rm_specs = specs_in_view - installed_specs_for_view - view.remove_specs(*rm_specs, with_dependents=False) + rm_specs = specs_in_view - installed_specs_for_view + view.remove_specs(*rm_specs, with_dependents=False) - add_specs = installed_specs_for_view - specs_in_view - view.add_specs(*add_specs, with_dependencies=False) + add_specs = installed_specs_for_view - specs_in_view + view.add_specs(*add_specs, with_dependencies=False) class Environment(object): From 98577e3af5cf571d5408fcb1da32de3586238ead Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:23:54 -0800 Subject: [PATCH 14/24] lock transactions: fix non-transactional writes Lock transactions were actually writing *after* the lock was released. The code was looking at the result of `release_write()` before writing, then writing based on whether the lock was released. This is pretty obviously wrong. - [x] Refactor `Lock` so that a release function can be passed to the `Lock` and called *only* when a lock is really released. - [x] Refactor `LockTransaction` classes to use the release function instead of checking the return value of `release_read()` / `release_write()` --- lib/spack/llnl/util/lock.py | 100 ++++--- lib/spack/spack/database.py | 7 +- lib/spack/spack/test/llnl/util/lock.py | 348 +++++++++++++++---------- lib/spack/spack/util/file_cache.py | 10 +- 4 files changed, 289 insertions(+), 176 deletions(-) diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 66cb067c884..c675c7c452b 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -95,10 +95,6 @@ def _lock(self, op, timeout=None): The lock is implemented as a spin lock using a nonblocking call to ``lockf()``. - On acquiring an exclusive lock, the lock writes this process's - pid and host to the lock file, in case the holding process needs - to be killed later. - If the lock times out, it raises a ``LockError``. If the lock is successfully acquired, the total wait time and the number of attempts is returned. @@ -284,11 +280,19 @@ def acquire_write(self, timeout=None): self._writes += 1 return False - def release_read(self): + def release_read(self, release_fn=None): """Releases a read lock. - Returns True if the last recursive lock was released, False if - there are still outstanding locks. + Arguments: + release_fn (callable): function to call *before* the last recursive + lock (read or write) is released. + + If the last recursive lock will be released, then this will call + release_fn and return its result (if provided), or return True + (if release_fn was not provided). + + Otherwise, we are still nested inside some other lock, so do not + call the release_fn and, return False. Does limited correctness checking: if a read lock is released when none are held, this will raise an assertion error. @@ -300,18 +304,30 @@ def release_read(self): self._debug( 'READ LOCK: {0.path}[{0._start}:{0._length}] [Released]' .format(self)) + + result = True + if release_fn is not None: + result = release_fn() + self._unlock() # can raise LockError. self._reads -= 1 - return True + return result else: self._reads -= 1 return False - def release_write(self): + def release_write(self, release_fn=None): """Releases a write lock. - Returns True if the last recursive lock was released, False if - there are still outstanding locks. + Arguments: + release_fn (callable): function to call before the last recursive + write is released. + + If the last recursive *write* lock will be released, then this + will call release_fn and return its result (if provided), or + return True (if release_fn was not provided). Otherwise, we are + still nested inside some other write lock, so do not call the + release_fn, and return False. Does limited correctness checking: if a read lock is released when none are held, this will raise an assertion error. @@ -323,9 +339,16 @@ def release_write(self): self._debug( 'WRITE LOCK: {0.path}[{0._start}:{0._length}] [Released]' .format(self)) + + # we need to call release_fn before releasing the lock + result = True + if release_fn is not None: + result = release_fn() + self._unlock() # can raise LockError. self._writes -= 1 - return True + return result + else: self._writes -= 1 return False @@ -349,28 +372,36 @@ def _acquired_debug(self, lock_type, wait_time, nattempts): class LockTransaction(object): """Simple nested transaction context manager that uses a file lock. - This class can trigger actions when the lock is acquired for the - first time and released for the last. + Arguments: + lock (Lock): underlying lock for this transaction to be accquired on + enter and released on exit + acquire (callable or contextmanager): function to be called after lock + is acquired, or contextmanager to enter after acquire and leave + before release. + release (callable): function to be called before release. If + ``acquire`` is a contextmanager, this will be called *after* + exiting the nexted context and before the lock is released. + timeout (float): number of seconds to set for the timeout when + accquiring the lock (default no timeout) If the ``acquire_fn`` returns a value, it is used as the return value for ``__enter__``, allowing it to be passed as the ``as`` argument of a ``with`` statement. If ``acquire_fn`` returns a context manager, *its* ``__enter__`` function - will be called in ``__enter__`` after ``acquire_fn``, and its ``__exit__`` - funciton will be called before ``release_fn`` in ``__exit__``, allowing you - to nest a context manager to be used along with the lock. + will be called after the lock is acquired, and its ``__exit__`` funciton + will be called before ``release_fn`` in ``__exit__``, allowing you to + nest a context manager inside this one. Timeout for lock is customizable. """ - def __init__(self, lock, acquire_fn=None, release_fn=None, - timeout=None): + def __init__(self, lock, acquire=None, release=None, timeout=None): self._lock = lock self._timeout = timeout - self._acquire_fn = acquire_fn - self._release_fn = release_fn + self._acquire_fn = acquire + self._release_fn = release self._as = None def __enter__(self): @@ -383,13 +414,18 @@ def __enter__(self): def __exit__(self, type, value, traceback): suppress = False - if self._exit(): - if self._as and hasattr(self._as, '__exit__'): - if self._as.__exit__(type, value, traceback): - suppress = True - if self._release_fn: - if self._release_fn(type, value, traceback): - suppress = True + + def release_fn(): + if self._release_fn is not None: + return self._release_fn(type, value, traceback) + + if self._as and hasattr(self._as, '__exit__'): + if self._as.__exit__(type, value, traceback): + suppress = True + + if self._exit(release_fn): + suppress = True + return suppress @@ -398,8 +434,8 @@ class ReadTransaction(LockTransaction): def _enter(self): return self._lock.acquire_read(self._timeout) - def _exit(self): - return self._lock.release_read() + def _exit(self, release_fn): + return self._lock.release_read(release_fn) class WriteTransaction(LockTransaction): @@ -407,8 +443,8 @@ class WriteTransaction(LockTransaction): def _enter(self): return self._lock.acquire_write(self._timeout) - def _exit(self): - return self._lock.release_write() + def _exit(self, release_fn): + return self._lock.release_write(release_fn) class LockError(Exception): diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index e6e82f9803d..243f1a20d53 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -332,11 +332,12 @@ def __init__(self, root, db_dir=None, upstream_dbs=None, def write_transaction(self): """Get a write lock context manager for use in a `with` block.""" - return WriteTransaction(self.lock, self._read, self._write) + return WriteTransaction( + self.lock, acquire=self._read, release=self._write) def read_transaction(self): """Get a read lock context manager for use in a `with` block.""" - return ReadTransaction(self.lock, self._read) + return ReadTransaction(self.lock, acquire=self._read) def prefix_lock(self, spec): """Get a lock on a particular spec's installation directory. @@ -624,7 +625,7 @@ def _read_suppress_error(): self._data = {} transaction = WriteTransaction( - self.lock, _read_suppress_error, self._write + self.lock, acquire=_read_suppress_error, release=self._write ) with transaction: diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index d8081d108c4..3bf8a236b12 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -42,6 +42,7 @@ actually on a shared filesystem. """ +import collections import os import socket import shutil @@ -776,189 +777,258 @@ def p3(barrier): multiproc_test(p1, p2, p3) -def test_transaction(lock_path): +class AssertLock(lk.Lock): + """Test lock class that marks acquire/release events.""" + def __init__(self, lock_path, vals): + super(AssertLock, self).__init__(lock_path) + self.vals = vals + + def acquire_read(self, timeout=None): + self.assert_acquire_read() + result = super(AssertLock, self).acquire_read(timeout) + self.vals['acquired_read'] = True + return result + + def acquire_write(self, timeout=None): + self.assert_acquire_write() + result = super(AssertLock, self).acquire_write(timeout) + self.vals['acquired_write'] = True + return result + + def release_read(self, release_fn=None): + self.assert_release_read() + result = super(AssertLock, self).release_read(release_fn) + self.vals['released_read'] = True + return result + + def release_write(self, release_fn=None): + self.assert_release_write() + result = super(AssertLock, self).release_write(release_fn) + self.vals['released_write'] = True + return result + + +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_read(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_acquire_write(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_write(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + def enter_fn(): - vals['entered'] = True + # assert enter_fn is called while lock is held + assert vals['acquired_%s' % type] + vals['entered_fn'] = True def exit_fn(t, v, tb): - vals['exited'] = True + # assert exit_fn is called while lock is held + assert not vals['released_%s' % type] + vals['exited_fn'] = True vals['exception'] = (t or v or tb) - lock = lk.Lock(lock_path) - vals = {'entered': False, 'exited': False, 'exception': False} - with lk.ReadTransaction(lock, enter_fn, exit_fn): - pass + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] + with transaction(lock, acquire=enter_fn, release=exit_fn): + assert vals['acquired_%s' % type] + assert not vals['released_%s' % type] - vals = {'entered': False, 'exited': False, 'exception': False} - with lk.WriteTransaction(lock, enter_fn, exit_fn): - pass - - assert vals['entered'] - assert vals['exited'] + assert vals['entered_fn'] + assert vals['exited_fn'] + assert vals['acquired_%s' % type] + assert vals['released_%s' % type] assert not vals['exception'] -def test_transaction_with_exception(lock_path): +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction_with_exception(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_read(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_acquire_write(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_write(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + def enter_fn(): - vals['entered'] = True + assert vals['acquired_%s' % type] + vals['entered_fn'] = True def exit_fn(t, v, tb): - vals['exited'] = True + assert not vals['released_%s' % type] + vals['exited_fn'] = True vals['exception'] = (t or v or tb) + return exit_result - lock = lk.Lock(lock_path) + exit_result = False + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) - def do_read_with_exception(): - with lk.ReadTransaction(lock, enter_fn, exit_fn): - raise Exception() - - def do_write_with_exception(): - with lk.WriteTransaction(lock, enter_fn, exit_fn): - raise Exception() - - vals = {'entered': False, 'exited': False, 'exception': False} with pytest.raises(Exception): - do_read_with_exception() - assert vals['entered'] - assert vals['exited'] + with transaction(lock, acquire=enter_fn, release=exit_fn): + raise Exception() + + assert vals['entered_fn'] + assert vals['exited_fn'] assert vals['exception'] - vals = {'entered': False, 'exited': False, 'exception': False} - with pytest.raises(Exception): - do_write_with_exception() - assert vals['entered'] - assert vals['exited'] + # test suppression of exceptions from exit_fn + exit_result = True + vals.clear() + + # should not raise now. + with transaction(lock, acquire=enter_fn, release=exit_fn): + raise Exception() + + assert vals['entered_fn'] + assert vals['exited_fn'] assert vals['exception'] -def test_transaction_with_context_manager(lock_path): - class TestContextManager(object): +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction_with_context_manager(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_ctx'] + assert not vals['exited_ctx'] - def __enter__(self): - vals['entered'] = True + def assert_release_read(self): + assert vals['entered_ctx'] + assert vals['exited_ctx'] - def __exit__(self, t, v, tb): - vals['exited'] = True - vals['exception'] = (t or v or tb) + def assert_acquire_write(self): + assert not vals['entered_ctx'] + assert not vals['exited_ctx'] - def exit_fn(t, v, tb): - vals['exited_fn'] = True - vals['exception_fn'] = (t or v or tb) + def assert_release_write(self): + assert vals['entered_ctx'] + assert vals['exited_ctx'] - lock = lk.Lock(lock_path) - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.ReadTransaction(lock, TestContextManager, exit_fn): - pass - - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert vals['exited_fn'] - assert not vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.ReadTransaction(lock, TestContextManager): - pass - - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert not vals['exited_fn'] - assert not vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.WriteTransaction(lock, TestContextManager, exit_fn): - pass - - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert vals['exited_fn'] - assert not vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.WriteTransaction(lock, TestContextManager): - pass - - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert not vals['exited_fn'] - assert not vals['exception_fn'] - - -def test_transaction_with_context_manager_and_exception(lock_path): class TestContextManager(object): def __enter__(self): - vals['entered'] = True + vals['entered_ctx'] = True def __exit__(self, t, v, tb): - vals['exited'] = True - vals['exception'] = (t or v or tb) + assert not vals['released_%s' % type] + vals['exited_ctx'] = True + vals['exception_ctx'] = (t or v or tb) + return exit_ctx_result def exit_fn(t, v, tb): + assert not vals['released_%s' % type] vals['exited_fn'] = True vals['exception_fn'] = (t or v or tb) + return exit_fn_result - lock = lk.Lock(lock_path) + exit_fn_result, exit_ctx_result = False, False + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) - def do_read_with_exception(exit_fn): - with lk.ReadTransaction(lock, TestContextManager, exit_fn): - raise Exception() + with transaction(lock, acquire=TestContextManager, release=exit_fn): + pass - def do_write_with_exception(exit_fn): - with lk.WriteTransaction(lock, TestContextManager, exit_fn): - raise Exception() - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_read_with_exception(exit_fn) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] + assert vals['entered_ctx'] + assert vals['exited_ctx'] assert vals['exited_fn'] - assert vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_read_with_exception(None) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] - assert not vals['exited_fn'] + assert not vals['exception_ctx'] assert not vals['exception_fn'] - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_write_with_exception(exit_fn) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] - assert vals['exited_fn'] - assert vals['exception_fn'] + vals.clear() + with transaction(lock, acquire=TestContextManager): + pass - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_write_with_exception(None) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] + assert vals['entered_ctx'] + assert vals['exited_ctx'] assert not vals['exited_fn'] + assert not vals['exception_ctx'] assert not vals['exception_fn'] + # below are tests for exceptions with and without suppression + def assert_ctx_and_fn_exception(raises=True): + vals.clear() + + if raises: + with pytest.raises(Exception): + with transaction( + lock, acquire=TestContextManager, release=exit_fn): + raise Exception() + else: + with transaction( + lock, acquire=TestContextManager, release=exit_fn): + raise Exception() + + assert vals['entered_ctx'] + assert vals['exited_ctx'] + assert vals['exited_fn'] + assert vals['exception_ctx'] + assert vals['exception_fn'] + + def assert_only_ctx_exception(raises=True): + vals.clear() + + if raises: + with pytest.raises(Exception): + with transaction(lock, acquire=TestContextManager): + raise Exception() + else: + with transaction(lock, acquire=TestContextManager): + raise Exception() + + assert vals['entered_ctx'] + assert vals['exited_ctx'] + assert not vals['exited_fn'] + assert vals['exception_ctx'] + assert not vals['exception_fn'] + + # no suppression + assert_ctx_and_fn_exception(raises=True) + assert_only_ctx_exception(raises=True) + + # suppress exception only in function + exit_fn_result, exit_ctx_result = True, False + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=True) + + # suppress exception only in context + exit_fn_result, exit_ctx_result = False, True + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=False) + + # suppress exception in function and context + exit_fn_result, exit_ctx_result = True, True + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=False) + def test_lock_debug_output(lock_path): host = socket.getfqdn() diff --git a/lib/spack/spack/util/file_cache.py b/lib/spack/spack/util/file_cache.py index d56f2b33c51..0227edf1559 100644 --- a/lib/spack/spack/util/file_cache.py +++ b/lib/spack/spack/util/file_cache.py @@ -107,7 +107,8 @@ def read_transaction(self, key): """ return ReadTransaction( - self._get_lock(key), lambda: open(self.cache_path(key))) + self._get_lock(key), acquire=lambda: open(self.cache_path(key)) + ) def write_transaction(self, key): """Get a write transaction on a file cache item. @@ -117,6 +118,10 @@ def write_transaction(self, key): moves the file into place on top of the old file atomically. """ + # TODO: this nested context manager adds a lot of complexity and + # TODO: is pretty hard to reason about in llnl.util.lock. At some + # TODO: point we should just replace it with functions and simplify + # TODO: the locking code. class WriteContextManager(object): def __enter__(cm): # noqa @@ -142,7 +147,8 @@ def __exit__(cm, type, value, traceback): # noqa else: os.rename(cm.tmp_filename, cm.orig_filename) - return WriteTransaction(self._get_lock(key), WriteContextManager) + return WriteTransaction( + self._get_lock(key), acquire=WriteContextManager) def mtime(self, key): """Return modification time of cache file, or 0 if it does not exist. From b3a5f2e3c3ce4e3e9836301504f5b5987117248e Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:29:53 -0800 Subject: [PATCH 15/24] lock transactions: ensure that nested write transactions write If a write transaction was nested inside a read transaction, it would not write properly on release, e.g., in a sequence like this, inside our `LockTransaction` class: ``` 1 with spack.store.db.read_transaction(): 2 with spack.store.db.write_transaction(): 3 ... 4 with spack.store.db.read_transaction(): ... ``` The WriteTransaction on line 2 had no way of knowing that its `__exit__()` call was the last *write* in the nesting, and it would skip calling its write function. The `__exit__()` call of the `ReadTransaction` on line 1 wouldn't know how to write, and the file would never be written. The DB would be correct in memory, but the `ReadTransaction` on line 4 would re-read the whole DB assuming that other processes may have modified it. Since the DB was never written, we got stale data. - [x] Make `Lock.release_write()` return `True` whenever we release the *last write* in a nest. --- lib/spack/llnl/util/lock.py | 8 +++- lib/spack/spack/test/llnl/util/lock.py | 57 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index c675c7c452b..86a45e2d7cf 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -351,7 +351,13 @@ def release_write(self, release_fn=None): else: self._writes -= 1 - return False + + # when the last *write* is released, we call release_fn here + # instead of immediately before releasing the lock. + if self._writes == 0: + return release_fn() if release_fn is not None else True + else: + return False def _debug(self, *args): tty.debug(*args) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 3bf8a236b12..2b0892a25e3 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -783,6 +783,12 @@ def __init__(self, lock_path, vals): super(AssertLock, self).__init__(lock_path) self.vals = vals + # assert hooks for subclasses + assert_acquire_read = lambda self: None + assert_acquire_write = lambda self: None + assert_release_read = lambda self: None + assert_release_write = lambda self: None + def acquire_read(self, timeout=None): self.assert_acquire_read() result = super(AssertLock, self).acquire_read(timeout) @@ -1030,6 +1036,57 @@ def assert_only_ctx_exception(raises=True): assert_only_ctx_exception(raises=False) +def test_nested_write_transaction(lock_path): + """Ensure that the outermost write transaction writes.""" + + def write(t, v, tb): + vals['wrote'] = True + + vals = collections.defaultdict(lambda: False) + lock = AssertLock(lock_path, vals) + + # write/write + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + # read/write + vals.clear() + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert vals['wrote'] + + # write/read/write + vals.clear() + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + # read/write/read/write + vals.clear() + with lk.ReadTransaction(lock): + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + def test_lock_debug_output(lock_path): host = socket.getfqdn() From d87ededddce53a8e3ae18ba8f75e206f6e395793 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:31:28 -0800 Subject: [PATCH 16/24] lock transactions: avoid redundant reading in write transactions Our `LockTransaction` class was reading overly aggressively. In cases like this: ``` 1 with spack.store.db.read_transaction(): 2 with spack.store.db.write_transaction(): 3 ... ``` The `ReadTransaction` on line 1 would read in the DB, but the WriteTransaction on line 2 would read in the DB *again*, even though we had a read lock the whole time. `WriteTransaction`s were only considering nested writes to decide when to read, but they didn't know when we already had a read lock. - [x] `Lock.acquire_write()` return `False` in cases where we already had a read lock. --- lib/spack/llnl/util/lock.py | 7 +++- lib/spack/spack/test/llnl/util/lock.py | 56 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 86a45e2d7cf..3a58093491b 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -275,7 +275,12 @@ def acquire_write(self, timeout=None): wait_time, nattempts = self._lock(fcntl.LOCK_EX, timeout=timeout) self._acquired_debug('WRITE LOCK', wait_time, nattempts) self._writes += 1 - return True + + # return True only if we weren't nested in a read lock. + # TODO: we may need to return two values: whether we got + # the write lock, and whether this is acquiring a read OR + # write lock for the first time. Now it returns the latter. + return self._reads == 0 else: self._writes += 1 return False diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 2b0892a25e3..ca879cdc0b0 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -1087,6 +1087,62 @@ def write(t, v, tb): assert vals['wrote'] +def test_nested_reads(lock_path): + """Ensure that write transactions won't re-read data.""" + + def read(): + vals['read'] += 1 + + vals = collections.defaultdict(lambda: 0) + lock = AssertLock(lock_path, vals) + + # read/read + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # write/write + vals.clear() + assert vals['read'] == 0 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # read/write + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # write/read/write + vals.clear() + assert vals['read'] == 0 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # read/write/read/write + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + def test_lock_debug_output(lock_path): host = socket.getfqdn() From be6d7db2a81d9013c4fd05c15b2a01288c5725f0 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 00:21:59 -0800 Subject: [PATCH 17/24] performance: add read transactions for `install_all()` and `install()` Environments need to read the DB a lot when installing all specs. - [x] Put a read transaction around `install_all()` and `install()` to avoid repeated locking --- lib/spack/spack/environment.py | 58 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index f7a13104598..2a2be004250 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -992,19 +992,22 @@ def install(self, user_spec, concrete_spec=None, **install_args): spec = Spec(user_spec) - if self.add(spec): - concrete = concrete_spec if concrete_spec else spec.concretized() - self._add_concrete_spec(spec, concrete) - else: - # spec might be in the user_specs, but not installed. - # TODO: Redo name-based comparison for old style envs - spec = next(s for s in self.user_specs if s.satisfies(user_spec)) - concrete = self.specs_by_hash.get(spec.build_hash()) - if not concrete: - concrete = spec.concretized() + with spack.store.db.read_transaction(): + if self.add(spec): + concrete = concrete_spec or spec.concretized() self._add_concrete_spec(spec, concrete) + else: + # spec might be in the user_specs, but not installed. + # TODO: Redo name-based comparison for old style envs + spec = next( + s for s in self.user_specs if s.satisfies(user_spec) + ) + concrete = self.specs_by_hash.get(spec.build_hash()) + if not concrete: + concrete = spec.concretized() + self._add_concrete_spec(spec, concrete) - self._install(concrete, **install_args) + self._install(concrete, **install_args) def _install(self, spec, **install_args): spec.package.do_install(**install_args) @@ -1177,26 +1180,27 @@ def _add_concrete_spec(self, spec, concrete, new=True): def install_all(self, args=None): """Install all concretized specs in an environment.""" - for concretized_hash in self.concretized_order: - spec = self.specs_by_hash[concretized_hash] + with spack.store.db.read_transaction(): + for concretized_hash in self.concretized_order: + spec = self.specs_by_hash[concretized_hash] - # Parse cli arguments and construct a dictionary - # that will be passed to Package.do_install API - kwargs = dict() - if args: - spack.cmd.install.update_kwargs_from_args(args, kwargs) + # Parse cli arguments and construct a dictionary + # that will be passed to Package.do_install API + kwargs = dict() + if args: + spack.cmd.install.update_kwargs_from_args(args, kwargs) - self._install(spec, **kwargs) + self._install(spec, **kwargs) - if not spec.external: - # Link the resulting log file into logs dir - build_log_link = os.path.join( - self.log_path, '%s-%s.log' % (spec.name, spec.dag_hash(7))) - if os.path.lexists(build_log_link): - os.remove(build_log_link) - os.symlink(spec.package.build_log_path, build_log_link) + if not spec.external: + # Link the resulting log file into logs dir + log_name = '%s-%s' % (spec.name, spec.dag_hash(7)) + build_log_link = os.path.join(self.log_path, log_name) + if os.path.lexists(build_log_link): + os.remove(build_log_link) + os.symlink(spec.package.build_log_path, build_log_link) - self.regenerate_views() + self.regenerate_views() def all_specs_by_hash(self): """Map of hashes to spec for all specs in this environment.""" From 79ddf6cf0daa95ffef3aa1457b893bb245fa23de Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 11:34:12 -0800 Subject: [PATCH 18/24] performance: only regenerate env views once in `spack install` `spack install` previously concretized, writes the entire environment out, regenerated views, then wrote and regenerated views again. Regenerating views is slow, so ensure that we only do that once. - [x] add an option to env.write() to skip view regeneration - [x] add a note on whether regenerate_views() shouldn't just be a separate operation -- not clear if we want to keep it as part of write to ensure consistency, or take it out to avoid performance issues. --- lib/spack/spack/cmd/install.py | 7 ++++++- lib/spack/spack/environment.py | 31 +++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/lib/spack/spack/cmd/install.py b/lib/spack/spack/cmd/install.py index 8fd63beeded..ab012eaead0 100644 --- a/lib/spack/spack/cmd/install.py +++ b/lib/spack/spack/cmd/install.py @@ -229,9 +229,14 @@ def install(parser, args, **kwargs): if not args.only_concrete: concretized_specs = env.concretize() ev.display_specs(concretized_specs) - env.write() + + # save view regeneration for later, so that we only do it + # once, as it can be slow. + env.write(regenerate_views=False) + tty.msg("Installing environment %s" % env.name) env.install_all(args) + env.regenerate_views() return else: tty.die("install requires a package argument or a spack.yaml file") diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 2a2be004250..5b526bbe113 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -1179,7 +1179,12 @@ def _add_concrete_spec(self, spec, concrete, new=True): self.specs_by_hash[h] = concrete def install_all(self, args=None): - """Install all concretized specs in an environment.""" + """Install all concretized specs in an environment. + + Note: this does not regenerate the views for the environment; + that needs to be done separately with a call to write(). + + """ with spack.store.db.read_transaction(): for concretized_hash in self.concretized_order: spec = self.specs_by_hash[concretized_hash] @@ -1200,8 +1205,6 @@ def install_all(self, args=None): os.remove(build_log_link) os.symlink(spec.package.build_log_path, build_log_link) - self.regenerate_views() - def all_specs_by_hash(self): """Map of hashes to spec for all specs in this environment.""" # Note this uses dag-hashes calculated without build deps as keys, @@ -1367,10 +1370,17 @@ def _read_lockfile_dict(self, d): self.concretized_order = [ old_hash_to_new.get(h, h) for h in self.concretized_order] - def write(self): + def write(self, regenerate_views=True): """Writes an in-memory environment to its location on disk. - This will also write out package files for each newly concretized spec. + Write out package files for each newly concretized spec. Also + regenerate any views associated with the environment, if + regenerate_views is True. + + Arguments: + regenerate_views (bool): regenerate views as well as + writing if True. + """ # ensure path in var/spack/environments fs.mkdirp(self.path) @@ -1478,9 +1488,14 @@ def write(self): with fs.write_tmp_and_move(self.manifest_path) as f: _write_yaml(self.yaml, f) - # TODO: for operations that just add to the env (install etc.) this - # could just call update_view - self.regenerate_views() + # TODO: rethink where this needs to happen along with + # writing. For some of the commands (like install, which write + # concrete specs AND regen) this might as well be a separate + # call. But, having it here makes the views consistent witht the + # concretized environment for most operations. Which is the + # special case? + if regenerate_views: + self.regenerate_views() def __enter__(self): self._previous_active = _active_environment From f01368739741ef9b3bcc827f7dbc2ab9d5b48df4 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:50:15 -0800 Subject: [PATCH 19/24] performance: reduce system calls required for remove_dead_links `os.path.exists()` will report False if the target of a symlink doesn't exist, so we can avoid a costly call to realpath here. --- lib/spack/llnl/util/filesystem.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index 0095e3fd215..f6c8d161d7f 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -917,10 +917,8 @@ def remove_if_dead_link(path): Parameters: path (str): The potential dead link """ - if os.path.islink(path): - real_path = os.path.realpath(path) - if not os.path.exists(real_path): - os.unlink(path) + if os.path.islink(path) and not os.path.exists(path): + os.unlink(path) def remove_linked_tree(path): From e3939b0c72b2c6bc5f41e9f33de3c04cf4785c05 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 23:45:30 -0800 Subject: [PATCH 20/24] performance: don't recompute hashes when regenerating environments `ViewDescriptor.regenerate()` was copying specs and stripping build dependencies, which clears `_hash` and other cached fields on concrete specs, which causes a bunch of YAML hashes to be recomputed. - [x] Preserve the `_hash` and `_normal` fields on stripped specs, as these will be unchanged. --- lib/spack/spack/environment.py | 11 ++++++++--- lib/spack/spack/spec.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 5b526bbe113..56440fbe922 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -511,10 +511,15 @@ def regenerate(self, all_specs, roots): specs = all_specs if self.link == 'all' else roots for spec in specs: # The view does not store build deps, so if we want it to - # recognize environment specs (which do store build deps), then - # they need to be stripped + # recognize environment specs (which do store build deps), + # then they need to be stripped. if spec.concrete: # Do not link unconcretized roots - specs_for_view.append(spec.copy(deps=('link', 'run'))) + # We preserve _hash _normal to avoid recomputing DAG + # hashes (DAG hashes don't consider build deps) + spec_copy = spec.copy(deps=('link', 'run')) + spec_copy._hash = spec._hash + spec_copy._normal = spec._normal + specs_for_view.append(spec_copy) # regeneration queries the database quite a bit; this read # transaction ensures that we don't repeatedly lock/unlock. diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 86ac62e0d75..c553da796dc 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -2999,7 +2999,7 @@ def _dup(self, other, deps=True, cleardeps=True, caches=None): before possibly copying the dependencies of ``other`` onto ``self`` caches (bool or None): preserve cached fields such as - ``_normal``, ``_concrete``, and ``_cmp_key_cache``. By + ``_normal``, ``_hash``, and ``_cmp_key_cache``. By default this is ``False`` if DAG structure would be changed by the copy, ``True`` if it's an exact copy. From e22d3250dd78eac769906620da55594690fbbca7 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 23:45:47 -0800 Subject: [PATCH 21/24] performance: dont' read `spec.yaml` files twice in view regeneration `ViewDescriptor.regenerate()` calls `get_all_specs()`, which reads `spec.yaml` files, which is slow. It's fine to do this once, but `view.remove_specs()` *also* calls it immediately afterwards. - [x] Pass the result of `get_all_specs()` as an optional parameter to `view.remove_specs()` to avoid reading `spec.yaml` files twice. --- lib/spack/spack/environment.py | 7 +++++-- lib/spack/spack/filesystem_view.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 56440fbe922..bf9af075ce5 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -534,9 +534,12 @@ def regenerate(self, all_specs, roots): tty.msg("Updating view at {0}".format(self.root)) rm_specs = specs_in_view - installed_specs_for_view - view.remove_specs(*rm_specs, with_dependents=False) - add_specs = installed_specs_for_view - specs_in_view + + # pass all_specs in, as it's expensive to read all the + # spec.yaml files twice. + view.remove_specs(*rm_specs, with_dependents=False, + all_specs=specs_in_view) view.add_specs(*add_specs, with_dependencies=False) diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 448254f26b3..5455ccb1077 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -371,6 +371,9 @@ def remove_specs(self, *specs, **kwargs): with_dependents = kwargs.get("with_dependents", True) with_dependencies = kwargs.get("with_dependencies", False) + # caller can pass this in, as get_all_specs() is expensive + all_specs = kwargs.get("all_specs", None) or set(self.get_all_specs()) + specs = set(specs) if with_dependencies: @@ -379,8 +382,6 @@ def remove_specs(self, *specs, **kwargs): if kwargs.get("exclude", None): specs = set(filter_exclude(specs, kwargs["exclude"])) - all_specs = set(self.get_all_specs()) - to_deactivate = specs to_keep = all_specs - to_deactivate From 8616a264063de99087bf4f1204d598ff2462f1d4 Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Mon, 16 Dec 2019 10:56:54 +0100 Subject: [PATCH 22/24] Travis exits at the first failing test, pin codecov at v4.5.4 (#14179) Before this commit we used to run the entire unit test suite in the presence of a failure. Since we currently rely a lot on the state of the filesystem etc. the end report was most of the time showing spurious failures that were a consequence of the first failing test. This PR makes unit tests exit at the first failing test Also, pin codecov at v4.5.4 (last one supporting Python 2.6) --- .travis.yml | 2 +- share/spack/qa/run-unit-tests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f8f0778caef..c9d442ee8e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -177,7 +177,7 @@ install: - pip install --upgrade pip - pip install --upgrade six - pip install --upgrade setuptools - - pip install --upgrade codecov + - pip install --upgrade codecov coverage==4.5.4 - pip install --upgrade flake8 - pip install --upgrade pep8-naming - if [[ "$TEST_SUITE" == "doc" ]]; then diff --git a/share/spack/qa/run-unit-tests b/share/spack/qa/run-unit-tests index 4403d53a766..11f3ac1fcb5 100755 --- a/share/spack/qa/run-unit-tests +++ b/share/spack/qa/run-unit-tests @@ -46,7 +46,7 @@ extra_args="" if [[ -n "$@" ]]; then extra_args="-k $@" fi -${coverage_run} bin/spack test --verbose "$extra_args" +${coverage_run} bin/spack test -x --verbose "$extra_args" #----------------------------------------------------------- # Run tests for setup-env.sh From 231e237764e9bcf96c8ac233d05e302a16b2c02d Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 23 Dec 2019 23:23:22 -0800 Subject: [PATCH 23/24] version bump: 0.13.3 --- lib/spack/spack/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spack/spack/__init__.py b/lib/spack/spack/__init__.py index f7caf373e3f..00744096613 100644 --- a/lib/spack/spack/__init__.py +++ b/lib/spack/spack/__init__.py @@ -5,7 +5,7 @@ #: major, minor, patch version for Spack, in a tuple -spack_version_info = (0, 13, 2) +spack_version_info = (0, 13, 3) #: String containing Spack version joined with .'s spack_version = '.'.join(str(v) for v in spack_version_info) From 55d5b435c8061d9fdc51feb03bf291b0b7a0f24c Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 23 Dec 2019 23:23:38 -0800 Subject: [PATCH 24/24] update CHANGELOG.md for 0.13.3 --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd18a030cee..2b2a1cf9f01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +# v0.13.3 (2019-12-23) + +This release contains more major performance improvements for Spack +environments, as well as bugfixes for mirrors and a `python` issue with +RHEL8. + +* mirror bugfixes: symlinks, duplicate patches, and exception handling (#13789) +* don't try to fetch `BundlePackages` (#13908) +* avoid re-fetching patches already added to a mirror (#13908) +* avoid re-fetching alread added patches (#13908) +* avoid re-fetching alread added patches (#13908) +* allow repeated invocations of `spack mirror create` on the same dir (#13908) +* bugfix for RHEL8 when `python` is unavailable (#14252) +* improve concretization performance in environments (#14190) +* improve installation performance in environments (#14263) + # v0.13.2 (2019-12-04) This release contains major performance improvements for Spack environments, as