mirror create --all can mirror everything (#12940)

Support mirroring all packages with `spack mirror create --all`.

In this mode there is no concretization:

* Spack pulls every version of every package into the created mirror.
* It also makes multiple attempts for each package/version combination
  (if there is a temporary connection failure).
* Continues if all attempts fail. i.e., this makes its best effort to
  fetch evrerything, even if all attempts to fetch one package fail.

This also changes mirroring logic to prefer storing sources by their hash
or by a unique name derived from the source.  For example:

* Archives with checksums are named by the sha256 sum, i.e.,
  `archive/f6/f6cf3bd233f9ea6147b21c7c02cac24e5363570ce4fd6be11dab9f499ed6a7d8.tar.gz`
  vs the previous `<package-name>-package-version>.tar.gz`
* VCS repositories are stored by a path derived from their URL,
  e.g. `git/google/leveldb.git/master.tar.gz`.

The new mirror layout allows different packages to refer to the same
resource or source without duplicating that download in the
mirror/cache. This change is not essential to mirroring everything but is
expected to save space when mirroring packages that all use the same
resource.

The new structure of the mirror is:

```
<base directory>/
  _source-cache/   <-- the _source-cache directory is new
    archive/       <-- archives/resources/patches stored by hash
      00/          <-- 2-letter sha256 prefix
        002748bdd0319d5ab82606cf92dc210fc1c05d0607a2e1d5538f60512b029056.tar.gz
      01/
        0154c25c45b5506b6d618ca8e18d0ef093dac47946ac0df464fb21e77b504118.tar.gz
        0173a74a515211997a3117a47e7b9ea43594a04b865b69da5a71c0886fa829ea.tar.gz
        ...
    git/
      OpenFAST/
        openfast.git/
          master.tar.gz     <-- repo by branch name
      PHASTA/
        phasta.git/
          11f431f2d1a53a529dab4b0f079ab8aab7ca1109.tar.gz  <-- repo by commit
      ...
    svn/      <-- each fetch strategy has its own subdirectory
      ...
  openmpi/   <-- the remaining package directories have the old format
    openmpi-1.10.1.tar.gz  <-- human-readable name is symlink to _source-cache
```

In addition to the archive names as described above, `mirror create` now
also creates symlinks with the old format to help users understand which
package each mirrored archive is associated with, and to allow mirrors to
work with old spack versions. The symlinks are relative so the mirror
directory can still itself be archived.

Other improvements:

* `spack mirror create` will not re-download resources that have already
  been placed in it.

* When creating a mirror, the resources downloaded to the mirror will not
  be cached (things are not stored twice).
This commit is contained in:
Peter Scheibel 2019-10-25 21:47:28 -07:00 committed by Todd Gamblin
parent 5582de837e
commit 4af448724f
11 changed files with 451 additions and 214 deletions

View File

@ -15,7 +15,6 @@
import spack.fetch_strategy import spack.fetch_strategy
import spack.util.file_cache import spack.util.file_cache
import spack.util.path import spack.util.path
import spack.util.url as url_util
def _misc_cache(): def _misc_cache():
@ -52,25 +51,25 @@ def _fetch_cache():
class MirrorCache(object): class MirrorCache(object):
def __init__(self, root): def __init__(self, root):
self.root = url_util.local_file_path(root) self.root = os.path.abspath(root)
if not self.root:
raise spack.error.SpackError(
'MirrorCaches only work with file:// URLs')
self.new_resources = set() def store(self, fetcher, relative_dest, cosmetic_path=None):
self.existing_resources = set()
def store(self, fetcher, relative_dest):
# Note this will archive package sources even if they would not # Note this will archive package sources even if they would not
# normally be cached (e.g. the current tip of an hg/git branch) # normally be cached (e.g. the current tip of an hg/git branch)
dst = os.path.join(self.root, relative_dest) dst = os.path.join(self.root, relative_dest)
mkdirp(os.path.dirname(dst))
fetcher.archive(dst)
if os.path.exists(dst): # Add a symlink path that a human can read to understand what resource
self.existing_resources.add(relative_dest) # the archive path refers to
else: if not cosmetic_path:
self.new_resources.add(relative_dest) return
mkdirp(os.path.dirname(dst)) cosmetic_path = os.path.join(self.root, cosmetic_path)
fetcher.archive(dst) relative_dst = os.path.relpath(
dst, start=os.path.dirname(cosmetic_path))
if not os.path.exists(cosmetic_path):
mkdirp(os.path.dirname(cosmetic_path))
os.symlink(relative_dst, cosmetic_path)
#: Spack's local cache for downloaded source archives #: Spack's local cache for downloaded source archives

View File

@ -38,18 +38,25 @@ def setup_parser(subparser):
create_parser = sp.add_parser('create', help=mirror_create.__doc__) create_parser = sp.add_parser('create', help=mirror_create.__doc__)
create_parser.add_argument('-d', '--directory', default=None, create_parser.add_argument('-d', '--directory', default=None,
help="directory in which to create mirror") help="directory in which to create mirror")
create_parser.add_argument( create_parser.add_argument(
'specs', nargs=argparse.REMAINDER, 'specs', nargs=argparse.REMAINDER,
help="specs of packages to put in mirror") help="specs of packages to put in mirror")
create_parser.add_argument(
'-a', '--all', action='store_true',
help="mirror all versions of all packages in Spack, or all packages"
" in the current environment if there is an active environment"
" (this requires significant time and space)")
create_parser.add_argument( create_parser.add_argument(
'-f', '--file', help="file with specs of packages to put in mirror") '-f', '--file', help="file with specs of packages to put in mirror")
create_parser.add_argument( create_parser.add_argument(
'-D', '--dependencies', action='store_true', '-D', '--dependencies', action='store_true',
help="also fetch all dependencies") help="also fetch all dependencies")
create_parser.add_argument( create_parser.add_argument(
'-n', '--versions-per-spec', type=int, '-n', '--versions-per-spec',
default=1, help="the number of versions to fetch for each spec, choose 'all' to"
help="the number of versions to fetch for each spec") " retrieve all versions of each package")
# used to construct scope arguments below # used to construct scope arguments below
scopes = spack.config.scopes() scopes = spack.config.scopes()
@ -225,6 +232,25 @@ def _read_specs_from_file(filename):
def mirror_create(args): def mirror_create(args):
"""Create a directory to be used as a spack mirror, and fill it with """Create a directory to be used as a spack mirror, and fill it with
package archives.""" package archives."""
if args.specs and args.all:
raise SpackError("Cannot specify specs on command line if you"
" chose to mirror all specs with '--all'")
elif args.file and args.all:
raise SpackError("Cannot specify specs with a file ('-f') if you"
" chose to mirror all specs with '--all'")
if not args.versions_per_spec:
num_versions = 1
elif args.versions_per_spec == 'all':
num_versions = 'all'
else:
try:
num_versions = int(args.versions_per_spec)
except ValueError:
raise SpackError(
"'--versions-per-spec' must be a number or 'all',"
" got '{0}'".format(args.versions_per_spec))
# try to parse specs from the command line first. # try to parse specs from the command line first.
with spack.concretize.disable_compiler_existence_check(): with spack.concretize.disable_compiler_existence_check():
specs = spack.cmd.parse_specs(args.specs, concretize=True) specs = spack.cmd.parse_specs(args.specs, concretize=True)
@ -235,56 +261,67 @@ def mirror_create(args):
tty.die("Cannot pass specs on the command line with --file.") tty.die("Cannot pass specs on the command line with --file.")
specs = _read_specs_from_file(args.file) specs = _read_specs_from_file(args.file)
# If nothing is passed, use environment or all if no active env
if not specs: if not specs:
# If nothing is passed, use environment or all if no active env
if not args.all:
tty.die("No packages were specified.",
"To mirror all packages, use the '--all' option"
" (this will require significant time and space).")
env = ev.get_env(args, 'mirror') env = ev.get_env(args, 'mirror')
if env: if env:
specs = env.specs_by_hash.values() mirror_specs = env.specs_by_hash.values()
else: else:
specs = [Spec(n) for n in spack.repo.all_package_names()] specs = [Spec(n) for n in spack.repo.all_package_names()]
specs.sort(key=lambda s: s.format("{name}{@version}").lower()) mirror_specs = spack.mirror.get_all_versions(specs)
mirror_specs.sort(
key=lambda s: (s.name, s.version))
else:
# If the user asked for dependencies, traverse spec DAG get them.
if args.dependencies:
new_specs = set()
for spec in specs:
spec.concretize()
for s in spec.traverse():
new_specs.add(s)
specs = list(new_specs)
# If the user asked for dependencies, traverse spec DAG get them. # Skip external specs, as they are already installed
if args.dependencies: external_specs = [s for s in specs if s.external]
new_specs = set() specs = [s for s in specs if not s.external]
for spec in specs:
spec.concretize()
for s in spec.traverse():
new_specs.add(s)
specs = list(new_specs)
# Skip external specs, as they are already installed for spec in external_specs:
external_specs = [s for s in specs if s.external] msg = 'Skipping {0} as it is an external spec.'
specs = [s for s in specs if not s.external] tty.msg(msg.format(spec.cshort_spec))
for spec in external_specs: if num_versions == 'all':
msg = 'Skipping {0} as it is an external spec.' mirror_specs = spack.mirror.get_all_versions(specs)
tty.msg(msg.format(spec.cshort_spec)) else:
mirror_specs = spack.mirror.get_matching_versions(
specs, num_versions=num_versions)
mirror = spack.mirror.Mirror( mirror = spack.mirror.Mirror(
args.directory or spack.config.get('config:source_cache')) args.directory or spack.config.get('config:source_cache'))
directory = url_util.format(mirror.push_url) directory = url_util.format(mirror.push_url)
# Make sure nothing is in the way. existed = web_util.url_exists(directory)
existed = web_util.url_exists(directory)
# Actually do the work to create the mirror # Actually do the work to create the mirror
present, mirrored, error = spack.mirror.create( present, mirrored, error = spack.mirror.create(directory, mirror_specs)
directory, specs, num_versions=args.versions_per_spec) p, m, e = len(present), len(mirrored), len(error)
p, m, e = len(present), len(mirrored), len(error)
verb = "updated" if existed else "created" verb = "updated" if existed else "created"
tty.msg( tty.msg(
"Successfully %s mirror in %s" % (verb, directory), "Successfully %s mirror in %s" % (verb, directory),
"Archive stats:", "Archive stats:",
" %-4d already present" % p, " %-4d already present" % p,
" %-4d added" % m, " %-4d added" % m,
" %-4d failed to fetch." % e) " %-4d failed to fetch." % e)
if error: if error:
tty.error("Failed downloads:") tty.error("Failed downloads:")
colify(s.cformat("{name}{@version}") for s in error) colify(s.cformat("{name}{@version}") for s in error)
sys.exit(1) sys.exit(1)
def mirror(parser, args): def mirror(parser, args):

View File

@ -166,11 +166,23 @@ def cachable(self):
def source_id(self): def source_id(self):
"""A unique ID for the source. """A unique ID for the source.
It is intended that a human could easily generate this themselves using
the information available to them in the Spack package.
The returned value is added to the content which determines the full The returned value is added to the content which determines the full
hash for a package using `str()`. hash for a package using `str()`.
""" """
raise NotImplementedError raise NotImplementedError
def mirror_id(self):
"""This is a unique ID for a source that is intended to help identify
reuse of resources across packages.
It is unique like source-id, but it does not include the package name
and is not necessarily easy for a human to create themselves.
"""
raise NotImplementedError
def __str__(self): # Should be human readable URL. def __str__(self): # Should be human readable URL.
return "FetchStrategy.__str___" return "FetchStrategy.__str___"
@ -273,6 +285,15 @@ def curl(self):
def source_id(self): def source_id(self):
return self.digest return self.digest
def mirror_id(self):
if not self.digest:
return None
# The filename is the digest. A directory is also created based on
# truncating the digest to avoid creating a directory with too many
# entries
return os.path.sep.join(
['archive', self.digest[:2], self.digest])
@_needs_stage @_needs_stage
def fetch(self): def fetch(self):
if self.archive_file: if self.archive_file:
@ -727,6 +748,13 @@ def cachable(self):
def source_id(self): def source_id(self):
return self.commit or self.tag return self.commit or self.tag
def mirror_id(self):
repo_ref = self.commit or self.tag or self.branch
if repo_ref:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['git', repo_path, repo_ref])
return result
def get_source_id(self): def get_source_id(self):
if not self.branch: if not self.branch:
return return
@ -908,6 +936,12 @@ def get_source_id(self):
info = xml.etree.ElementTree.fromstring(output) info = xml.etree.ElementTree.fromstring(output)
return info.find('entry/commit').get('revision') return info.find('entry/commit').get('revision')
def mirror_id(self):
if self.revision:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['svn', repo_path, self.revision])
return result
@_needs_stage @_needs_stage
def fetch(self): def fetch(self):
if self.stage.expanded: if self.stage.expanded:
@ -1011,6 +1045,12 @@ def cachable(self):
def source_id(self): def source_id(self):
return self.revision return self.revision
def mirror_id(self):
if self.revision:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['hg', repo_path, self.revision])
return result
def get_source_id(self): def get_source_id(self):
output = self.hg('id', self.url, output=str) output = self.hg('id', self.url, output=str)
if output: if output:

View File

@ -13,6 +13,7 @@
""" """
import sys import sys
import os import os
import traceback
import os.path import os.path
import operator import operator
@ -37,7 +38,6 @@
import spack.util.url as url_util import spack.util.url as url_util
import spack.spec import spack.spec
from spack.version import VersionList from spack.version import VersionList
from spack.util.compression import allowed_archive
from spack.util.spack_yaml import syaml_dict from spack.util.spack_yaml import syaml_dict
@ -233,27 +233,16 @@ def __len__(self):
return len(self._mirrors) return len(self._mirrors)
def mirror_archive_filename(spec, fetcher, resource_id=None): def _determine_extension(fetcher):
"""Get the name of the spec's archive in the mirror."""
if not spec.version.concrete:
raise ValueError("mirror.path requires spec with concrete version.")
if isinstance(fetcher, fs.URLFetchStrategy): if isinstance(fetcher, fs.URLFetchStrategy):
if fetcher.expand_archive: if fetcher.expand_archive:
# If we fetch with a URLFetchStrategy, use URL's archive type # If we fetch with a URLFetchStrategy, use URL's archive type
ext = url.determine_url_file_extension(fetcher.url) ext = url.determine_url_file_extension(fetcher.url)
# If the filename does not end with a normal suffix,
# see if the package explicitly declares the extension
if not ext:
ext = spec.package.versions[spec.package.version].get(
'extension', None)
if ext: if ext:
# Remove any leading dots # Remove any leading dots
ext = ext.lstrip('.') ext = ext.lstrip('.')
else:
if not ext:
msg = """\ msg = """\
Unable to parse extension from {0}. Unable to parse extension from {0}.
@ -276,21 +265,92 @@ def mirror_archive_filename(spec, fetcher, resource_id=None):
# Otherwise we'll make a .tar.gz ourselves # Otherwise we'll make a .tar.gz ourselves
ext = 'tar.gz' ext = 'tar.gz'
if resource_id: return ext
filename = "%s-%s" % (resource_id, spec.version) + ".%s" % ext
else:
filename = "%s-%s" % (spec.package.name, spec.version) + ".%s" % ext
return filename
def mirror_archive_path(spec, fetcher, resource_id=None): class MirrorReference(object):
"""Get the relative path to the spec's archive within a mirror.""" """A ``MirrorReference`` stores the relative paths where you can store a
return os.path.join( package/resource in a mirror directory.
spec.name, mirror_archive_filename(spec, fetcher, resource_id))
The appropriate storage location is given by ``storage_path``. The
``cosmetic_path`` property provides a reference that a human could generate
themselves based on reading the details of the package.
A user can iterate over a ``MirrorReference`` object to get all the
possible names that might be used to refer to the resource in a mirror;
this includes names generated by previous naming schemes that are no-longer
reported by ``storage_path`` or ``cosmetic_path``.
"""
def __init__(self, cosmetic_path, global_path=None):
self.global_path = global_path
self.cosmetic_path = cosmetic_path
@property
def storage_path(self):
if self.global_path:
return self.global_path
else:
return self.cosmetic_path
def __iter__(self):
if self.global_path:
yield self.global_path
yield self.cosmetic_path
def get_matching_versions(specs, **kwargs): def mirror_archive_paths(fetcher, per_package_ref, spec=None):
"""Returns a ``MirrorReference`` object which keeps track of the relative
storage path of the resource associated with the specified ``fetcher``."""
ext = None
if spec:
ext = spec.package.versions[spec.package.version].get(
'extension', None)
# If the spec does not explicitly specify an extension (the default case),
# then try to determine it automatically. An extension can only be
# specified for the primary source of the package (e.g. the source code
# identified in the 'version' declaration). Resources/patches don't have
# an option to specify an extension, so it must be inferred for those.
ext = ext or _determine_extension(fetcher)
if ext:
per_package_ref += ".%s" % ext
global_ref = fetcher.mirror_id()
if global_ref:
global_ref = os.path.join('_source-cache', global_ref)
if global_ref and ext:
global_ref += ".%s" % ext
return MirrorReference(per_package_ref, global_ref)
def get_all_versions(specs):
"""Given a set of initial specs, return a new set of specs that includes
each version of each package in the original set.
Note that if any spec in the original set specifies properties other than
version, this information will be omitted in the new set; for example; the
new set of specs will not include variant settings.
"""
version_specs = []
for spec in specs:
pkg = spec.package
# Skip any package that has no known versions.
if not pkg.versions:
tty.msg("No safe (checksummed) versions for package %s" % pkg.name)
continue
for version in pkg.versions:
version_spec = spack.spec.Spec(pkg.name)
version_spec.versions = VersionList([version])
version_specs.append(version_spec)
return version_specs
def get_matching_versions(specs, num_versions=1):
"""Get a spec for EACH known version matching any spec in the list. """Get a spec for EACH known version matching any spec in the list.
For concrete specs, this retrieves the concrete version and, if more For concrete specs, this retrieves the concrete version and, if more
than one version per spec is requested, retrieves the latest versions than one version per spec is requested, retrieves the latest versions
@ -305,7 +365,7 @@ def get_matching_versions(specs, **kwargs):
tty.msg("No safe (checksummed) versions for package %s" % pkg.name) tty.msg("No safe (checksummed) versions for package %s" % pkg.name)
continue continue
pkg_versions = kwargs.get('num_versions', 1) pkg_versions = num_versions
version_order = list(reversed(sorted(pkg.versions))) version_order = list(reversed(sorted(pkg.versions)))
matching_spec = [] matching_spec = []
@ -338,19 +398,7 @@ def get_matching_versions(specs, **kwargs):
return matching return matching
def suggest_archive_basename(resource): def create(path, specs):
"""Return a tentative basename for an archive.
Raises:
RuntimeError: if the name is not an allowed archive type.
"""
basename = os.path.basename(resource.fetcher.url)
if not allowed_archive(basename):
raise RuntimeError("%s is not an allowed archive tye" % basename)
return basename
def create(path, specs, **kwargs):
"""Create a directory to be used as a spack mirror, and fill it with """Create a directory to be used as a spack mirror, and fill it with
package archives. package archives.
@ -359,10 +407,6 @@ def create(path, specs, **kwargs):
specs: Any package versions matching these specs will be added \ specs: Any package versions matching these specs will be added \
to the mirror. to the mirror.
Keyword args:
num_versions: Max number of versions to fetch per spec, \
(default is 1 each spec)
Return Value: Return Value:
Returns a tuple of lists: (present, mirrored, error) Returns a tuple of lists: (present, mirrored, error)
@ -376,69 +420,107 @@ def create(path, specs, **kwargs):
""" """
parsed = url_util.parse(path) parsed = url_util.parse(path)
mirror_root = url_util.local_file_path(parsed) mirror_root = url_util.local_file_path(parsed)
if not mirror_root:
# Make sure nothing is in the way. raise spack.error.SpackError(
if mirror_root and os.path.isfile(mirror_root): 'MirrorCaches only work with file:// URLs')
raise MirrorError("%s already exists and is a file." % mirror_root)
# automatically spec-ify anything in the specs array. # automatically spec-ify anything in the specs array.
specs = [ specs = [
s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s) s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
for s in specs] for s in specs]
# Get concrete specs for each matching version of these specs.
version_specs = get_matching_versions(
specs, num_versions=kwargs.get('num_versions', 1))
for s in version_specs:
s.concretize()
# Get the absolute path of the root before we start jumping around. # Get the absolute path of the root before we start jumping around.
if mirror_root and not os.path.isdir(mirror_root): if not os.path.isdir(mirror_root):
try: try:
mkdirp(mirror_root) mkdirp(mirror_root)
except OSError as e: except OSError as e:
raise MirrorError( raise MirrorError(
"Cannot create directory '%s':" % mirror_root, str(e)) "Cannot create directory '%s':" % mirror_root, str(e))
# Things to keep track of while parsing specs. mirror_cache = spack.caches.MirrorCache(mirror_root)
categories = { mirror_stats = MirrorStats()
'present': [],
'mirrored': [],
'error': []
}
mirror_cache = spack.caches.MirrorCache(parsed)
try: try:
spack.caches.mirror_cache = mirror_cache spack.caches.mirror_cache = mirror_cache
# Iterate through packages and download all safe tarballs for each # Iterate through packages and download all safe tarballs for each
for spec in version_specs: for spec in specs:
add_single_spec(spec, parsed, categories, **kwargs) mirror_stats.next_spec(spec)
add_single_spec(spec, mirror_root, mirror_stats)
finally: finally:
spack.caches.mirror_cache = None spack.caches.mirror_cache = None
categories['mirrored'] = list(mirror_cache.new_resources) return mirror_stats.stats()
categories['present'] = list(mirror_cache.existing_resources)
return categories['present'], categories['mirrored'], categories['error']
def add_single_spec(spec, mirror_root, categories, **kwargs): class MirrorStats(object):
def __init__(self):
self.present = {}
self.new = {}
self.errors = set()
self.current_spec = None
self.added_resources = set()
self.existing_resources = set()
def next_spec(self, spec):
self._tally_current_spec()
self.current_spec = spec
def _tally_current_spec(self):
if self.current_spec:
if self.added_resources:
self.new[self.current_spec] = len(self.added_resources)
if self.existing_resources:
self.present[self.current_spec] = len(self.existing_resources)
self.added_resources = set()
self.existing_resources = set()
self.current_spec = None
def stats(self):
self._tally_current_spec()
return list(self.present), list(self.new), list(self.errors)
def already_existed(self, resource):
# If an error occurred after caching a subset of a spec's
# resources, a secondary attempt may consider them already added
if resource not in self.added_resources:
self.existing_resources.add(resource)
def added(self, resource):
self.added_resources.add(resource)
def error(self):
self.errors.add(self.current_spec)
def add_single_spec(spec, mirror_root, mirror_stats):
tty.msg("Adding package {pkg} to mirror".format( tty.msg("Adding package {pkg} to mirror".format(
pkg=spec.format("{name}{@version}") pkg=spec.format("{name}{@version}")
)) ))
try: num_retries = 3
spec.package.do_fetch() while num_retries > 0:
spec.package.do_clean() try:
with spec.package.stage as pkg_stage:
pkg_stage.cache_mirror(mirror_stats)
for patch in spec.package.all_patches():
patch.fetch(pkg_stage)
if patch.cache():
patch.cache().cache_mirror(mirror_stats)
patch.clean()
exception = None
break
except Exception as e:
exc_tuple = sys.exc_info()
exception = e
num_retries -= 1
except Exception as e: if exception:
tty.debug(e)
if spack.config.get('config:debug'): if spack.config.get('config:debug'):
sys.excepthook(*sys.exc_info()) traceback.print_exception(file=sys.stderr, *exc_tuple)
else: else:
tty.warn( tty.warn(
"Error while fetching %s" % spec.cformat('{name}{@version}'), "Error while fetching %s" % spec.cformat('{name}{@version}'),
e.message) exception.message)
categories['error'].append(spec) mirror_stats.error()
class MirrorError(spack.error.SpackError): class MirrorError(spack.error.SpackError):

View File

@ -663,7 +663,8 @@ def global_license_file(self):
@property @property
def version(self): def version(self):
if not self.spec.versions.concrete: if not self.spec.versions.concrete:
raise ValueError("Can only get of package with concrete version.") raise ValueError("Version requested for a package that"
" does not have a concrete version.")
return self.spec.versions[0] return self.spec.versions[0]
@memoized @memoized
@ -741,19 +742,23 @@ def url_for_version(self, version):
def _make_resource_stage(self, root_stage, fetcher, resource): def _make_resource_stage(self, root_stage, fetcher, resource):
resource_stage_folder = self._resource_stage(resource) resource_stage_folder = self._resource_stage(resource)
resource_mirror = spack.mirror.mirror_archive_path( mirror_paths = spack.mirror.mirror_archive_paths(
self.spec, fetcher, resource.name) fetcher,
os.path.join(self.name, "%s-%s" % (resource.name, self.version)))
stage = ResourceStage(resource.fetcher, stage = ResourceStage(resource.fetcher,
root=root_stage, root=root_stage,
resource=resource, resource=resource,
name=resource_stage_folder, name=resource_stage_folder,
mirror_path=resource_mirror, mirror_paths=mirror_paths,
path=self.path) path=self.path)
return stage return stage
def _make_root_stage(self, fetcher): def _make_root_stage(self, fetcher):
# Construct a mirror path (TODO: get this out of package.py) # Construct a mirror path (TODO: get this out of package.py)
mp = spack.mirror.mirror_archive_path(self.spec, fetcher) mirror_paths = spack.mirror.mirror_archive_paths(
fetcher,
os.path.join(self.name, "%s-%s" % (self.name, self.version)),
self.spec)
# Construct a path where the stage should build.. # Construct a path where the stage should build..
s = self.spec s = self.spec
stage_name = "{0}{1}-{2}-{3}".format(stage_prefix, s.name, s.version, stage_name = "{0}{1}-{2}-{3}".format(stage_prefix, s.name, s.version,
@ -763,8 +768,8 @@ def download_search():
dynamic_fetcher = fs.from_list_url(self) dynamic_fetcher = fs.from_list_url(self)
return [dynamic_fetcher] if dynamic_fetcher else [] return [dynamic_fetcher] if dynamic_fetcher else []
stage = Stage(fetcher, mirror_path=mp, name=stage_name, path=self.path, stage = Stage(fetcher, mirror_paths=mirror_paths, name=stage_name,
search_fn=download_search) path=self.path, search_fn=download_search)
return stage return stage
def _make_stage(self): def _make_stage(self):
@ -794,8 +799,9 @@ def stage(self):
doesn't have one yet, but it does not create the Stage directory doesn't have one yet, but it does not create the Stage directory
on the filesystem. on the filesystem.
""" """
if not self.spec.concrete: if not self.spec.versions.concrete:
raise ValueError("Can only get a stage for a concrete package.") raise ValueError(
"Cannot retrieve stage for package without concrete version.")
if self._stage is None: if self._stage is None:
self._stage = self._make_stage() self._stage = self._make_stage()
return self._stage return self._stage
@ -873,8 +879,8 @@ def _make_fetcher(self):
@property @property
def fetcher(self): def fetcher(self):
if not self.spec.versions.concrete: if not self.spec.versions.concrete:
raise ValueError( raise ValueError("Cannot retrieve fetcher for"
"Can only get a fetcher for a package with concrete versions.") " package without concrete version.")
if not self._fetcher: if not self._fetcher:
self._fetcher = self._make_fetcher() self._fetcher = self._make_fetcher()
return self._fetcher return self._fetcher
@ -1081,6 +1087,8 @@ def do_fetch(self, mirror_only=False):
for patch in self.spec.patches: for patch in self.spec.patches:
patch.fetch(self.stage) patch.fetch(self.stage)
if patch.cache():
patch.cache().cache_local()
def do_stage(self, mirror_only=False): def do_stage(self, mirror_only=False):
"""Unpacks and expands the fetched tarball.""" """Unpacks and expands the fetched tarball."""
@ -1193,6 +1201,26 @@ def do_patch(self):
else: else:
touch(no_patches_file) touch(no_patches_file)
@classmethod
def all_patches(cls):
"""Retrieve all patches associated with the package.
Retrieves patches on the package itself as well as patches on the
dependencies of the package."""
patches = []
for _, patch_list in cls.patches.items():
for patch in patch_list:
patches.append(patch)
pkg_deps = cls.dependencies
for dep_name in pkg_deps:
for _, dependency in pkg_deps[dep_name].items():
for _, patch_list in dependency.patches.items():
for patch in patch_list:
patches.append(patch)
return patches
def content_hash(self, content=None): def content_hash(self, content=None):
"""Create a hash based on the sources and logic used to build the """Create a hash based on the sources and logic used to build the
package. This includes the contents of all applied patches and the package. This includes the contents of all applied patches and the
@ -1366,9 +1394,21 @@ def _if_ninja_target_execute(self, target, *args, **kwargs):
def _get_needed_resources(self): def _get_needed_resources(self):
resources = [] resources = []
# Select the resources that are needed for this build # Select the resources that are needed for this build
for when_spec, resource_list in self.resources.items(): if self.spec.concrete:
if when_spec in self.spec: for when_spec, resource_list in self.resources.items():
resources.extend(resource_list) if when_spec in self.spec:
resources.extend(resource_list)
else:
for when_spec, resource_list in self.resources.items():
# Note that variant checking is always strict for specs where
# the name is not specified. But with strict variant checking,
# only variants mentioned in 'other' are checked. Here we only
# want to make sure that no constraints in when_spec
# conflict with the spec, so we need to invoke
# when_spec.satisfies(self.spec) vs.
# self.spec.satisfies(when_spec)
if when_spec.satisfies(self.spec, strict=False):
resources.extend(resource_list)
# Sorts the resources by the length of the string representing their # Sorts the resources by the length of the string representing their
# destination. Since any nested resource must contain another # destination. Since any nested resource must contain another
# resource's name in its path, it seems that should work # resource's name in its path, it seems that should work

View File

@ -16,6 +16,7 @@
import spack.repo import spack.repo
import spack.stage import spack.stage
import spack.util.spack_json as sjson import spack.util.spack_json as sjson
import spack
from spack.util.compression import allowed_archive from spack.util.compression import allowed_archive
from spack.util.crypto import checksum, Checker from spack.util.crypto import checksum, Checker
@ -87,6 +88,9 @@ def apply(self, stage):
apply_patch(stage, self.path, self.level, self.working_dir) apply_patch(stage, self.path, self.level, self.working_dir)
def cache(self):
return None
def to_dict(self): def to_dict(self):
"""Partial dictionary -- subclases should add to this.""" """Partial dictionary -- subclases should add to this."""
return { return {
@ -180,6 +184,7 @@ def __init__(self, pkg, url, level=1, working_dir='.', ordering_key=None,
if not self.sha256: if not self.sha256:
raise PatchDirectiveError("URL patches require a sha256 checksum") raise PatchDirectiveError("URL patches require a sha256 checksum")
# TODO: this function doesn't use the stage arg
def fetch(self, stage): def fetch(self, stage):
"""Retrieve the patch in a temporary stage and compute self.path """Retrieve the patch in a temporary stage and compute self.path
@ -191,15 +196,19 @@ def fetch(self, stage):
if self.archive_sha256: if self.archive_sha256:
fetch_digest = self.archive_sha256 fetch_digest = self.archive_sha256
fetcher = fs.URLFetchStrategy(self.url, fetch_digest) fetcher = fs.URLFetchStrategy(self.url, fetch_digest,
mirror = os.path.join(os.path.dirname(stage.mirror_path), expand=bool(self.archive_sha256))
os.path.basename(self.url))
self.stage = spack.stage.Stage(fetcher, mirror_path=mirror) per_package_ref = os.path.join(
self.owner.split('.')[-1], os.path.basename(self.url))
# Reference starting with "spack." is required to avoid cyclic imports
mirror_ref = spack.mirror.mirror_archive_paths(
fetcher, per_package_ref)
self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref)
self.stage.create() self.stage.create()
self.stage.fetch() self.stage.fetch()
self.stage.check() self.stage.check()
self.stage.cache_local()
root = self.stage.path root = self.stage.path
if self.archive_sha256: if self.archive_sha256:
@ -230,6 +239,9 @@ def fetch(self, stage):
"sha256 checksum failed for %s" % self.path, "sha256 checksum failed for %s" % self.path,
"Expected %s but got %s" % (self.sha256, checker.sum)) "Expected %s but got %s" % (self.sha256, checker.sum))
def cache(self):
return self.stage
def clean(self): def clean(self):
self.stage.destroy() self.stage.destroy()

View File

@ -166,6 +166,14 @@ def get_stage_root():
return _stage_root return _stage_root
def _mirror_roots():
mirrors = spack.config.get('mirrors')
return [
sup.substitute_path_variables(root) if root.endswith(os.sep)
else sup.substitute_path_variables(root) + os.sep
for root in mirrors.values()]
class Stage(object): class Stage(object):
"""Manages a temporary stage directory for building. """Manages a temporary stage directory for building.
@ -216,7 +224,7 @@ class Stage(object):
def __init__( def __init__(
self, url_or_fetch_strategy, self, url_or_fetch_strategy,
name=None, mirror_path=None, keep=False, path=None, lock=True, name=None, mirror_paths=None, keep=False, path=None, lock=True,
search_fn=None): search_fn=None):
"""Create a stage object. """Create a stage object.
Parameters: Parameters:
@ -230,10 +238,10 @@ def __init__(
stage object later). If name is not provided, then this stage object later). If name is not provided, then this
stage will be given a unique name automatically. stage will be given a unique name automatically.
mirror_path mirror_paths
If provided, Stage will search Spack's mirrors for If provided, Stage will search Spack's mirrors for
this archive at the mirror_path, before using the this archive at each of the provided relative mirror paths
default fetch strategy. before using the default fetch strategy.
keep keep
By default, when used as a context manager, the Stage By default, when used as a context manager, the Stage
@ -276,7 +284,7 @@ def __init__(
self.name = name self.name = name
if name is None: if name is None:
self.name = stage_prefix + next(tempfile._get_candidate_names()) self.name = stage_prefix + next(tempfile._get_candidate_names())
self.mirror_path = mirror_path self.mirror_paths = mirror_paths
# Use the provided path or construct an optionally named stage path. # Use the provided path or construct an optionally named stage path.
if path is not None: if path is not None:
@ -350,8 +358,8 @@ def expected_archive_files(self):
expanded = self.default_fetcher.expand_archive expanded = self.default_fetcher.expand_archive
fnames.append(os.path.basename(self.default_fetcher.url)) fnames.append(os.path.basename(self.default_fetcher.url))
if self.mirror_path: if self.mirror_paths:
fnames.append(os.path.basename(self.mirror_path)) fnames.extend(os.path.basename(x) for x in self.mirror_paths)
paths.extend(os.path.join(self.path, f) for f in fnames) paths.extend(os.path.join(self.path, f) for f in fnames)
if not expanded: if not expanded:
@ -399,10 +407,14 @@ def fetch(self, mirror_only=False):
# TODO: Or @alalazo may have some ideas about how to use a # TODO: Or @alalazo may have some ideas about how to use a
# TODO: CompositeFetchStrategy here. # TODO: CompositeFetchStrategy here.
self.skip_checksum_for_mirror = True self.skip_checksum_for_mirror = True
if self.mirror_path: if self.mirror_paths:
urls = [ # Join URLs of mirror roots with mirror paths. Because
url_util.join(mirror.fetch_url, self.mirror_path) # urljoin() will strip everything past the final '/' in
for mirror in spack.mirror.MirrorCollection().values()] # the root, so we add a '/' if it is not present.
urls = []
for mirror in spack.mirror.MirrorCollection().values():
for rel_path in self.mirror_paths:
urls.append(url_util.join(mirror.fetch_url, rel_path))
# If this archive is normally fetched from a tarball URL, # If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that # then use the same digest. `spack mirror` ensures that
@ -428,10 +440,11 @@ def fetch(self, mirror_only=False):
# url, digest, expand=expand, extension=extension)) # url, digest, expand=expand, extension=extension))
if self.default_fetcher.cachable: if self.default_fetcher.cachable:
fetchers.insert( for rel_path in reversed(list(self.mirror_paths)):
0, spack.caches.fetch_cache.fetcher( cache_fetcher = spack.caches.fetch_cache.fetcher(
self.mirror_path, digest, expand=expand, rel_path, digest, expand=expand,
extension=extension)) extension=extension)
fetchers.insert(0, cache_fetcher)
def generate_fetchers(): def generate_fetchers():
for fetcher in fetchers: for fetcher in fetchers:
@ -476,10 +489,24 @@ def check(self):
self.fetcher.check() self.fetcher.check()
def cache_local(self): def cache_local(self):
spack.caches.fetch_cache.store(self.fetcher, self.mirror_path) spack.caches.fetch_cache.store(
self.fetcher, self.mirror_paths.storage_path)
if spack.caches.mirror_cache: def cache_mirror(self, stats):
spack.caches.mirror_cache.store(self.fetcher, self.mirror_path) """Perform a fetch if the resource is not already cached"""
dst_root = spack.caches.mirror_cache.root
absolute_storage_path = os.path.join(
dst_root, self.mirror_paths.storage_path)
if os.path.exists(absolute_storage_path):
stats.already_existed(absolute_storage_path)
return
self.fetch()
spack.caches.mirror_cache.store(
self.fetcher, self.mirror_paths.storage_path,
self.mirror_paths.cosmetic_path)
stats.added(absolute_storage_path)
def expand_archive(self): def expand_archive(self):
"""Changes to the stage directory and attempt to expand the downloaded """Changes to the stage directory and attempt to expand the downloaded
@ -591,7 +618,7 @@ def _add_to_root_stage(self):
@pattern.composite(method_list=[ @pattern.composite(method_list=[
'fetch', 'create', 'created', 'check', 'expand_archive', 'restage', 'fetch', 'create', 'created', 'check', 'expand_archive', 'restage',
'destroy', 'cache_local', 'managed_by_spack']) 'destroy', 'cache_local', 'cache_mirror', 'managed_by_spack'])
class StageComposite: class StageComposite:
"""Composite for Stage type objects. The first item in this composite is """Composite for Stage type objects. The first item in this composite is
considered to be the root package, and operations that return a value are considered to be the root package, and operations that return a value are
@ -629,10 +656,6 @@ def path(self):
def archive_file(self): def archive_file(self):
return self[0].archive_file return self[0].archive_file
@property
def mirror_path(self):
return self[0].mirror_path
class DIYStage(object): class DIYStage(object):
""" """

View File

@ -37,7 +37,7 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config,
add('git-test') add('git-test')
concretize() concretize()
with spack.config.override('config:checksum', False): with spack.config.override('config:checksum', False):
mirror('create', '-d', mirror_dir) mirror('create', '-d', mirror_dir, '--all')
e = ev.read(env_name) e = ev.read(env_name)
assert set(os.listdir(mirror_dir)) == set([s.name for s in e.user_specs]) assert set(os.listdir(mirror_dir)) == set([s.name for s in e.user_specs])

View File

@ -460,11 +460,11 @@ def test_unconcretized_install(install_mockery, mock_fetch, mock_packages):
with pytest.raises(ValueError, match="only install concrete packages"): with pytest.raises(ValueError, match="only install concrete packages"):
spec.package.do_install() spec.package.do_install()
with pytest.raises(ValueError, match="fetch concrete packages"): with pytest.raises(ValueError, match="only fetch concrete packages"):
spec.package.do_fetch() spec.package.do_fetch()
with pytest.raises(ValueError, match="stage concrete packages"): with pytest.raises(ValueError, match="only stage concrete packages"):
spec.package.do_stage() spec.package.do_stage()
with pytest.raises(ValueError, match="patch concrete packages"): with pytest.raises(ValueError, match="only patch concrete packages"):
spec.package.do_patch() spec.package.do_patch()

View File

@ -52,46 +52,50 @@ def check_mirror():
mirrors = {'spack-mirror-test': 'file://' + mirror_root} mirrors = {'spack-mirror-test': 'file://' + mirror_root}
spack.config.set('mirrors', mirrors) spack.config.set('mirrors', mirrors)
with spack.config.override('config:checksum', False): with spack.config.override('config:checksum', False):
spack.mirror.create(mirror_root, repos) specs = [Spec(x).concretized() for x in repos]
spack.mirror.create(mirror_root, specs)
# Stage directory exists # Stage directory exists
assert os.path.isdir(mirror_root) assert os.path.isdir(mirror_root)
# check that there are subdirs for each package for spec in specs:
for name in repos: fetcher = spec.package.fetcher[0]
subdir = os.path.join(mirror_root, name) per_package_ref = os.path.join(
assert os.path.isdir(subdir) spec.name, '-'.join([spec.name, str(spec.version)]))
mirror_paths = spack.mirror.mirror_archive_paths(
fetcher,
per_package_ref)
expected_path = os.path.join(
mirror_root, mirror_paths.storage_path)
assert os.path.exists(expected_path)
files = os.listdir(subdir) # Now try to fetch each package.
assert len(files) == 1 for name, mock_repo in repos.items():
spec = Spec(name).concretized()
pkg = spec.package
# Now try to fetch each package. with spack.config.override('config:checksum', False):
for name, mock_repo in repos.items(): with pkg.stage:
spec = Spec(name).concretized() pkg.do_stage(mirror_only=True)
pkg = spec.package
with spack.config.override('config:checksum', False): # Compare the original repo with the expanded archive
with pkg.stage: original_path = mock_repo.path
pkg.do_stage(mirror_only=True) if 'svn' in name:
# have to check out the svn repo to compare.
original_path = os.path.join(
mock_repo.path, 'checked_out')
# Compare the original repo with the expanded archive svn = which('svn', required=True)
original_path = mock_repo.path svn('checkout', mock_repo.url, original_path)
if 'svn' in name:
# have to check out the svn repo to compare.
original_path = os.path.join(
mock_repo.path, 'checked_out')
svn = which('svn', required=True) dcmp = filecmp.dircmp(
svn('checkout', mock_repo.url, original_path) original_path, pkg.stage.source_path)
dcmp = filecmp.dircmp( # make sure there are no new files in the expanded
original_path, pkg.stage.source_path) # tarball
assert not dcmp.right_only
# make sure there are no new files in the expanded # and that all original files are present.
# tarball assert all(l in exclude for l in dcmp.left_only)
assert not dcmp.right_only
# and that all original files are present.
assert all(l in exclude for l in dcmp.left_only)
def test_url_mirror(mock_archive): def test_url_mirror(mock_archive):
@ -148,7 +152,7 @@ def test_mirror_with_url_patches(mock_packages, config, monkeypatch):
files_cached_in_mirror = set() files_cached_in_mirror = set()
def record_store(_class, fetcher, relative_dst): def record_store(_class, fetcher, relative_dst, cosmetic_path=None):
files_cached_in_mirror.add(os.path.basename(relative_dst)) files_cached_in_mirror.add(os.path.basename(relative_dst))
def successful_fetch(_class): def successful_fetch(_class):
@ -178,5 +182,7 @@ def successful_apply(*args, **kwargs):
with spack.config.override('config:checksum', False): with spack.config.override('config:checksum', False):
spack.mirror.create(mirror_root, list(spec.traverse())) spack.mirror.create(mirror_root, list(spec.traverse()))
assert not (set(['urlpatch.patch', 'urlpatch2.patch.gz']) - assert not (set([
files_cached_in_mirror) 'abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234',
'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd.gz' # NOQA: ignore=E501
]) - files_cached_in_mirror)

View File

@ -92,9 +92,7 @@ def test_buildcache(mock_archive, tmpdir):
# Create the build cache and # Create the build cache and
# put it directly into the mirror # put it directly into the mirror
mirror_path = os.path.join(str(tmpdir), 'test-mirror') mirror_path = os.path.join(str(tmpdir), 'test-mirror')
spack.mirror.create( spack.mirror.create(mirror_path, specs=[])
mirror_path, specs=[], no_checksum=True
)
# register mirror with spack config # register mirror with spack config
mirrors = {'spack-mirror-test': 'file://' + mirror_path} mirrors = {'spack-mirror-test': 'file://' + mirror_path}