Fetching from URLs falls back to mirrors if they exist (#13881)

Users can now list mirrors of the main url in packages.

- [x] Instead of just a single `url` attribute, users can provide a list (`urls`) in the package, and these will be tried by in order by the fetch strategy.

- [x] To handle one of the most common mirror cases, define a `GNUMirrorPackage` mixin to handle all the standard GNU mirrors.  GNU packages can set `gnu_mirror_path` to define the path within a mirror, and the mixin handles setting up all the requisite GNU mirror URLs.

- [x] update all GNU packages in `builtin` to use the `GNUMirrorPackage` mixin.
This commit is contained in:
Massimiliano Culpo
2019-12-20 23:32:18 +01:00
committed by Todd Gamblin
parent 1b93320848
commit 497fddfcb9
66 changed files with 321 additions and 218 deletions

View File

@@ -553,6 +553,34 @@ version. This is useful for packages that have an easy to extrapolate URL, but
keep changing their URL format every few releases. With this method, you only
need to specify the ``url`` when the URL changes.
"""""""""""""""""""""""
Mirrors of the main URL
"""""""""""""""""""""""
Spack supports listing mirrors of the main URL in a package by defining
the ``urls`` attribute:
.. code-block:: python
class Foo(Package):
urls = [
'http://example.com/foo-1.0.tar.gz',
'http://mirror.com/foo-1.0.tar.gz'
]
instead of just a single ``url``. This attribute is a list of possible URLs that
will be tried in order when fetching packages. Notice that either one of ``url``
or ``urls`` can be present in a package, but not both at the same time.
A well-known case of packages that can be fetched from multiple mirrors is that
of GNU. For that, Spack goes a step further and defines a mixin class that
takes care of all of the plumbing and requires packagers to just define a proper
``gnu_mirror_path`` attribute:
.. literalinclude:: _spack_root/var/spack/repos/builtin/packages/autoconf/package.py
:lines: 9-18
^^^^^^^^^^^^^^^^^^^^^^^^
Skipping the expand step
^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -0,0 +1,37 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os.path
import spack.package
class GNUMirrorPackage(spack.package.PackageBase):
"""Mixin that takes care of setting url and mirrors for GNU packages."""
#: Path of the package in a GNU mirror
gnu_mirror_path = None
#: List of GNU mirrors used by Spack
base_mirrors = [
'https://ftp.gnu.org/gnu',
'https://ftpmirror.gnu.org/',
# Fall back to http if https didn't work (for instance because
# Spack is bootstrapping curl)
'http://ftpmirror.gnu.org/'
]
@property
def urls(self):
self._ensure_gnu_mirror_path_is_set_or_raise()
return [
os.path.join(m, self.gnu_mirror_path) for m in self.base_mirrors
]
def _ensure_gnu_mirror_path_is_set_or_raise(self):
if self.gnu_mirror_path is None:
cls_name = type(self).__name__
msg = ('{0} must define a `gnu_mirror_path` attribute'
' [none defined]')
raise AttributeError(msg.format(cls_name))

View File

@@ -135,7 +135,7 @@ def url_list(args):
# Gather set of URLs from all packages
for pkg in spack.repo.path.all_packages():
url = getattr(pkg.__class__, 'url', None)
url = getattr(pkg, 'url', None)
urls = url_list_parsing(args, urls, url, pkg)
for params in pkg.versions.values():
@@ -174,7 +174,7 @@ def url_summary(args):
for pkg in spack.repo.path.all_packages():
urls = set()
url = getattr(pkg.__class__, 'url', None)
url = getattr(pkg, 'url', None)
if url:
urls.add(url)

View File

@@ -22,33 +22,30 @@
* archive()
Archive a source directory, e.g. for creating a mirror.
"""
import copy
import functools
import os
import os.path
import sys
import re
import shutil
import copy
import sys
import xml.etree.ElementTree
from functools import wraps
from six import string_types, with_metaclass
import six.moves.urllib.parse as urllib_parse
import llnl.util.tty as tty
from llnl.util.filesystem import (
working_dir, mkdirp, temp_rename, temp_cwd, get_single_file)
import six
import six.moves.urllib.parse as urllib_parse
import spack.config
import spack.error
import spack.util.crypto as crypto
import spack.util.pattern as pattern
import spack.util.web as web_util
import spack.util.url as url_util
import spack.util.web as web_util
from llnl.util.filesystem import (
working_dir, mkdirp, temp_rename, temp_cwd, get_single_file)
from spack.util.compression import decompressor_for, extension
from spack.util.executable import which
from spack.util.string import comma_and, quote
from spack.version import Version, ver
from spack.util.compression import decompressor_for, extension
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
@@ -69,7 +66,7 @@ def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set
using set_stage(). This decorator adds a check for self.stage."""
@wraps(fun)
@functools.wraps(fun)
def wrapper(self, *args, **kwargs):
if not self.stage:
raise NoStageError(fun)
@@ -85,18 +82,14 @@ def _ensure_one_stage_entry(stage_path):
return os.path.join(stage_path, stage_entries[0])
class FSMeta(type):
"""This metaclass registers all fetch strategies in a list."""
def __init__(cls, name, bases, dict):
type.__init__(cls, name, bases, dict)
if cls.enabled:
all_strategies.append(cls)
def fetcher(cls):
"""Decorator used to register fetch strategies."""
all_strategies.append(cls)
return cls
class FetchStrategy(with_metaclass(FSMeta, object)):
class FetchStrategy(object):
"""Superclass of all fetch strategies."""
enabled = False # Non-abstract subclasses should be enabled.
#: The URL attribute must be specified either at the package class
#: level, or as a keyword argument to ``version()``. It is used to
#: distinguish fetchers for different versions in the package DSL.
@@ -113,16 +106,7 @@ def __init__(self, **kwargs):
self.stage = None
# Enable or disable caching for this strategy based on
# 'no_cache' option from version directive.
self._cache_enabled = not kwargs.pop('no_cache', False)
def set_stage(self, stage):
"""This is called by Stage before any of the fetching
methods are called on the stage."""
self.stage = stage
@property
def cache_enabled(self):
return self._cache_enabled
self.cache_enabled = not kwargs.pop('no_cache', False)
# Subclasses need to implement these methods
def fetch(self):
@@ -186,13 +170,18 @@ def mirror_id(self):
def __str__(self): # Should be human readable URL.
return "FetchStrategy.__str___"
# This method is used to match fetch strategies to version()
# arguments in packages.
@classmethod
def matches(cls, args):
"""Predicate that matches fetch strategies to arguments of
the version directive.
Args:
args: arguments of the version directive
"""
return cls.url_attr in args
@fetcher
class BundleFetchStrategy(FetchStrategy):
"""
Fetch strategy associated with bundle, or no-code, packages.
@@ -204,9 +193,6 @@ class BundleFetchStrategy(FetchStrategy):
TODO: Remove this class by refactoring resource handling and the link
between composite stages and composite fetch strategies (see #11981).
"""
#: This is a concrete fetch strategy for no-code packages.
enabled = True
#: There is no associated URL keyword in ``version()`` for no-code
#: packages but this property is required for some strategy-related
#: functions (e.g., check_pkg_attributes).
@@ -236,7 +222,6 @@ class FetchStrategyComposite(object):
Implements the GoF composite pattern.
"""
matches = FetchStrategy.matches
set_stage = FetchStrategy.set_stage
def source_id(self):
component_ids = tuple(i.source_id() for i in self)
@@ -244,13 +229,13 @@ def source_id(self):
return component_ids
@fetcher
class URLFetchStrategy(FetchStrategy):
"""URLFetchStrategy pulls source code from a URL for an archive, check the
archive against a checksum, and decompresses the archive.
The destination for the resulting file(s) is the standard stage path.
"""
FetchStrategy that pulls source code from a URL for an archive, check the
archive against a checksum, and decompresses the archive. The destination
for the resulting file(s) is the standard stage source path.
"""
enabled = True
url_attr = 'url'
# these are checksum types. The generic 'checksum' is deprecated for
@@ -262,6 +247,7 @@ def __init__(self, url=None, checksum=None, **kwargs):
# Prefer values in kwargs to the positionals.
self.url = kwargs.get('url', url)
self.mirrors = kwargs.get('mirrors', [])
# digest can be set as the first argument, or from an explicit
# kwarg by the hash name.
@@ -297,20 +283,36 @@ def mirror_id(self):
return os.path.sep.join(
['archive', self.digest[:2], self.digest])
@property
def candidate_urls(self):
return [self.url] + (self.mirrors or [])
@_needs_stage
def fetch(self):
if self.archive_file:
tty.msg("Already downloaded %s" % self.archive_file)
return
for url in self.candidate_urls:
try:
partial_file, save_file = self._fetch_from_url(url)
if save_file:
os.rename(partial_file, save_file)
break
except FetchError as e:
tty.msg(str(e))
pass
if not self.archive_file:
raise FailedDownloadError(self.url)
def _fetch_from_url(self, url):
save_file = None
partial_file = None
if self.stage.save_filename:
save_file = self.stage.save_filename
partial_file = self.stage.save_filename + '.part'
tty.msg("Fetching %s" % self.url)
tty.msg("Fetching %s" % url)
if partial_file:
save_args = ['-C',
'-', # continue partial downloads
@@ -324,7 +326,9 @@ def fetch(self):
'-D',
'-', # print out HTML headers
'-L', # resolve 3xx redirects
self.url,
# Timeout if can't establish a connection after 10 sec.
'--connect-timeout', '10',
url,
]
if not spack.config.get('config:verify_ssl'):
@@ -380,12 +384,7 @@ def fetch(self):
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
warn_content_type_mismatch(self.archive_file or "the archive")
if save_file:
os.rename(partial_file, save_file)
if not self.archive_file:
raise FailedDownloadError(self.url)
return partial_file, save_file
@property
@_needs_stage
@@ -395,7 +394,7 @@ def archive_file(self):
@property
def cachable(self):
return self._cache_enabled and bool(self.digest)
return self.cache_enabled and bool(self.digest)
@_needs_stage
def expand(self):
@@ -522,6 +521,7 @@ def __str__(self):
return "[no url]"
@fetcher
class CacheURLFetchStrategy(URLFetchStrategy):
"""The resource associated with a cache URL may be out of date."""
@@ -597,7 +597,7 @@ def archive(self, destination, **kwargs):
patterns = kwargs.get('exclude', None)
if patterns is not None:
if isinstance(patterns, string_types):
if isinstance(patterns, six.string_types):
patterns = [patterns]
for p in patterns:
tar.add_default_arg('--exclude=%s' % p)
@@ -621,6 +621,7 @@ def __repr__(self):
return "%s<%s>" % (self.__class__, self.url)
@fetcher
class GoFetchStrategy(VCSFetchStrategy):
"""Fetch strategy that employs the `go get` infrastructure.
@@ -634,7 +635,6 @@ class GoFetchStrategy(VCSFetchStrategy):
The fetched source will be moved to the standard stage sourcepath directory
during the expand step.
"""
enabled = True
url_attr = 'go'
def __init__(self, **kwargs):
@@ -691,6 +691,7 @@ def __str__(self):
return "[go] %s" % self.url
@fetcher
class GitFetchStrategy(VCSFetchStrategy):
"""
@@ -712,7 +713,6 @@ class GitFetchStrategy(VCSFetchStrategy):
Repositories are cloned into the standard stage source path directory.
"""
enabled = True
url_attr = 'git'
optional_attrs = ['tag', 'branch', 'commit', 'submodules', 'get_full_repo']
@@ -746,7 +746,7 @@ def git(self):
@property
def cachable(self):
return self._cache_enabled and bool(self.commit or self.tag)
return self.cache_enabled and bool(self.commit or self.tag)
def source_id(self):
return self.commit or self.tag
@@ -892,6 +892,7 @@ def __str__(self):
return '[git] {0}'.format(self._repo_info())
@fetcher
class SvnFetchStrategy(VCSFetchStrategy):
"""Fetch strategy that gets source code from a subversion repository.
@@ -906,7 +907,6 @@ class SvnFetchStrategy(VCSFetchStrategy):
Repositories are checked out into the standard stage source path directory.
"""
enabled = True
url_attr = 'svn'
optional_attrs = ['revision']
@@ -929,7 +929,7 @@ def svn(self):
@property
def cachable(self):
return self._cache_enabled and bool(self.revision)
return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
@@ -991,6 +991,7 @@ def __str__(self):
return "[svn] %s" % self.url
@fetcher
class HgFetchStrategy(VCSFetchStrategy):
"""
@@ -1013,7 +1014,6 @@ class HgFetchStrategy(VCSFetchStrategy):
Repositories are cloned into the standard stage source path directory.
"""
enabled = True
url_attr = 'hg'
optional_attrs = ['revision']
@@ -1043,7 +1043,7 @@ def hg(self):
@property
def cachable(self):
return self._cache_enabled and bool(self.revision)
return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
@@ -1108,9 +1108,9 @@ def __str__(self):
return "[hg] %s" % self.url
@fetcher
class S3FetchStrategy(URLFetchStrategy):
"""FetchStrategy that pulls from an S3 bucket."""
enabled = True
url_attr = 's3'
def __init__(self, *args, **kwargs):
@@ -1244,10 +1244,15 @@ def _from_merged_attrs(fetcher, pkg, version):
"""Create a fetcher from merged package and version attributes."""
if fetcher.url_attr == 'url':
url = pkg.url_for_version(version)
# TODO: refactor this logic into its own method or function
# TODO: to avoid duplication
mirrors = [spack.url.substitute_version(u, version)
for u in getattr(pkg, 'urls', [])]
attrs = {fetcher.url_attr: url, 'mirrors': mirrors}
else:
url = getattr(pkg, fetcher.url_attr)
attrs = {fetcher.url_attr: url}
attrs = {fetcher.url_attr: url}
attrs.update(pkg.versions[version])
return fetcher(**attrs)

View File

@@ -510,8 +510,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)):
maintainers = []
#: List of attributes to be excluded from a package's hash.
metadata_attrs = ['homepage', 'url', 'list_url', 'extendable', 'parallel',
'make_jobs']
metadata_attrs = ['homepage', 'url', 'urls', 'list_url', 'extendable',
'parallel', 'make_jobs']
def __init__(self, spec):
# this determines how the package should be built.
@@ -524,6 +524,12 @@ def __init__(self, spec):
# a binary cache.
self.installed_from_binary_cache = False
# Ensure that only one of these two attributes are present
if getattr(self, 'url', None) and getattr(self, 'urls', None):
msg = "a package can have either a 'url' or a 'urls' attribute"
msg += " [package '{0.name}' defines both]"
raise ValueError(msg.format(self))
# Set a default list URL (place to find available versions)
if not hasattr(self, 'list_url'):
self.list_url = None
@@ -750,7 +756,9 @@ def url_for_version(self, version):
return version_urls[version]
# If no specific URL, use the default, class-level URL
default_url = getattr(self, 'url', None)
url = getattr(self, 'url', None)
urls = getattr(self, 'urls', [None])
default_url = url or urls.pop(0)
# if no exact match AND no class-level default, use the nearest URL
if not default_url:

View File

@@ -30,6 +30,7 @@
from spack.build_systems.intel import IntelPackage
from spack.build_systems.meson import MesonPackage
from spack.build_systems.sip import SIPPackage
from spack.build_systems.gnu import GNUMirrorPackage
from spack.mixins import filter_compiler_wrappers

View File

@@ -271,7 +271,7 @@ def __init__(
else:
raise ValueError(
"Can't construct Stage without url or fetch strategy")
self.fetcher.set_stage(self)
self.fetcher.stage = self
# self.fetcher can change with mirrors.
self.default_fetcher = self.fetcher
self.search_fn = search_fn
@@ -458,7 +458,7 @@ def generate_fetchers():
for fetcher in generate_fetchers():
try:
fetcher.set_stage(self)
fetcher.stage = self
self.fetcher = fetcher
self.fetcher.fetch()
break

View File

@@ -240,9 +240,6 @@ def fetcher(self, target_path, digest, **kwargs):
return MockCacheFetcher()
class MockCacheFetcher(object):
def set_stage(self, stage):
pass
def fetch(self):
raise FetchError('Mock cache always fails for tests')

View File

@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections
import os
import pytest
@@ -10,8 +11,7 @@
import spack.repo
import spack.config
from spack.fetch_strategy import FailedDownloadError
from spack.fetch_strategy import from_list_url, URLFetchStrategy
import spack.fetch_strategy as fs
from spack.spec import Spec
from spack.stage import Stage
from spack.version import ver
@@ -23,10 +23,30 @@ def checksum_type(request):
return request.param
@pytest.fixture
def pkg_factory():
Pkg = collections.namedtuple(
'Pkg', ['url_for_version', 'urls', 'url', 'versions']
)
def factory(url, urls):
def fn(v):
main_url = url or urls.pop(0)
return spack.url.substitute_version(main_url, v)
return Pkg(
url_for_version=fn, url=url, urls=urls,
versions=collections.defaultdict(dict)
)
return factory
def test_urlfetchstrategy_sans_url():
"""Ensure constructor with no URL fails."""
with pytest.raises(ValueError):
with URLFetchStrategy(None):
with fs.URLFetchStrategy(None):
pass
@@ -34,8 +54,8 @@ def test_urlfetchstrategy_bad_url(tmpdir):
"""Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir)
with pytest.raises(FailedDownloadError):
fetcher = URLFetchStrategy(url='file:///does-not-exist')
with pytest.raises(fs.FailedDownloadError):
fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
assert fetcher is not None
with Stage(fetcher, path=testpath) as stage:
@@ -106,8 +126,8 @@ def test_from_list_url(mock_packages, config, spec, url, digest):
"""
specification = Spec(spec).concretized()
pkg = spack.repo.get(specification)
fetch_strategy = from_list_url(pkg)
assert isinstance(fetch_strategy, URLFetchStrategy)
fetch_strategy = fs.from_list_url(pkg)
assert isinstance(fetch_strategy, fs.URLFetchStrategy)
assert os.path.basename(fetch_strategy.url) == url
assert fetch_strategy.digest == digest
@@ -118,8 +138,8 @@ def test_from_list_url_unspecified(mock_packages, config):
spec = Spec('url-list-test @2.0.0').concretized()
pkg = spack.repo.get(spec)
fetch_strategy = from_list_url(pkg)
assert isinstance(fetch_strategy, URLFetchStrategy)
fetch_strategy = fs.from_list_url(pkg)
assert isinstance(fetch_strategy, fs.URLFetchStrategy)
assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
assert fetch_strategy.digest is None
@@ -128,7 +148,7 @@ def test_nosource_from_list_url(mock_packages, config):
"""This test confirms BundlePackages do not have list url."""
pkg = spack.repo.get('nosource')
fetch_strategy = from_list_url(pkg)
fetch_strategy = fs.from_list_url(pkg)
assert fetch_strategy is None
@@ -148,9 +168,26 @@ def test_url_extra_fetch(tmpdir, mock_archive):
"""Ensure a fetch after downloading is effectively a no-op."""
testpath = str(tmpdir)
fetcher = URLFetchStrategy(mock_archive.url)
fetcher = fs.URLFetchStrategy(mock_archive.url)
with Stage(fetcher, path=testpath) as stage:
assert fetcher.archive_file is None
stage.fetch()
assert fetcher.archive_file is not None
fetcher.fetch()
@pytest.mark.parametrize('url,urls,version,expected', [
(None,
['https://ftpmirror.gnu.org/autoconf/autoconf-2.69.tar.gz',
'https://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz'],
'2.62',
['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz',
'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz'])
])
def test_candidate_urls(pkg_factory, url, urls, version, expected):
"""Tests that candidate urls include mirrors and that they go through
pattern matching and substitution for versions.
"""
pkg = pkg_factory(url, urls)
f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
assert f.candidate_urls == expected