Fetch strategies: new global option no_cache, new git option get_full_repo.

* All fetch strategies now accept the Boolean version keyword option `no_cache` in order to allow per-version control of cache-ability.
* New git-specific version keyword option `get_full_repo` (Boolean). When true, disables the default `--depth 1` and `--single-branch` optimizations that are applied if supported by the git version and (in the former case) transport protocol.
* The try / catch blog attempting `--depth 1` and retrying on failure has been removed in favor of more accurately ascertaining when the `--depth` option should work based on git version and protocol choice. Any failure is now treated as a real problem, and the clone is only attempted once.
* Test improvements:
  * `mock_git_repository.checks[type_of_test].args['git']` is now specified as the URL (with leading `file://`) in order to avoid complaints when using `--depth`.
  * New type_of_test `tag-branch`.
  * mock_git_repository now provides `git_exe`.
  * Improved the action of the `git_version` fixture, which was previously hard-wired.
  * New tests of `--single-branch` and `--depth 1` behavior.
* Add documentation of new options to the packaging guide.
This commit is contained in:
Chris Green 2019-07-24 14:49:23 -05:00 committed by Peter Scheibel
parent 1b8e0c36cb
commit ad4309b782
4 changed files with 112 additions and 34 deletions

View File

@ -574,7 +574,11 @@ Download caching
Spack maintains a cache (described :ref:`here <caching>`) which saves files Spack maintains a cache (described :ref:`here <caching>`) which saves files
retrieved during package installations to avoid re-downloading in the case that retrieved during package installations to avoid re-downloading in the case that
a package is installed with a different specification (but the same version) or a package is installed with a different specification (but the same version) or
reinstalled on account of a change in the hashing scheme. reinstalled on account of a change in the hashing scheme. It may (rarely) be
necessary to avoid caching for a particular version by adding ``no_cache=True``
as an option to the ``version()`` directive. Example situations would be a
"snapshot"-like Version Control System (VCS) tag, a VCS branch such as
``v6-16-00-patches``, or a URL specifying a regularly updated snapshot tarball.
^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^
Version comparison Version comparison
@ -880,6 +884,11 @@ Git fetching supports the following parameters to ``version``:
* ``tag``: Name of a tag to fetch. * ``tag``: Name of a tag to fetch.
* ``commit``: SHA hash (or prefix) of a commit to fetch. * ``commit``: SHA hash (or prefix) of a commit to fetch.
* ``submodules``: Also fetch submodules recursively when checking out this repository. * ``submodules``: Also fetch submodules recursively when checking out this repository.
* ``get_full_repo``: Ensure the full git history is checked out with all remote
branch information. Normally (``get_full_repo=False``, the default), the git
option ``--depth 1`` will be used if the version of git and the specified
transport protocol support it, and ``--single-branch`` will be used if the
version of git supports it.
Only one of ``tag``, ``branch``, or ``commit`` can be used at a time. Only one of ``tag``, ``branch``, or ``commit`` can be used at a time.

View File

@ -90,17 +90,24 @@ class FetchStrategy(with_metaclass(FSMeta, object)):
#: classes have multiple ``url_attrs`` at the top-level. #: classes have multiple ``url_attrs`` at the top-level.
optional_attrs = [] # optional attributes in version() args. optional_attrs = [] # optional attributes in version() args.
def __init__(self): def __init__(self, **kwargs):
# The stage is initialized late, so that fetch strategies can be # The stage is initialized late, so that fetch strategies can be
# constructed at package construction time. This is where things # constructed at package construction time. This is where things
# will be fetched. # will be fetched.
self.stage = None self.stage = None
# Enable or disable caching for this strategy based on
# 'no_cache' option from version directive.
self._cache_enabled = not kwargs.pop('no_cache', False)
def set_stage(self, stage): def set_stage(self, stage):
"""This is called by Stage before any of the fetching """This is called by Stage before any of the fetching
methods are called on the stage.""" methods are called on the stage."""
self.stage = stage self.stage = stage
@property
def cache_enabled(self):
return self._cache_enabled
# Subclasses need to implement these methods # Subclasses need to implement these methods
def fetch(self): def fetch(self):
"""Fetch source code archive or repo. """Fetch source code archive or repo.
@ -187,7 +194,7 @@ class URLFetchStrategy(FetchStrategy):
optional_attrs = list(crypto.hashes.keys()) + ['checksum'] optional_attrs = list(crypto.hashes.keys()) + ['checksum']
def __init__(self, url=None, checksum=None, **kwargs): def __init__(self, url=None, checksum=None, **kwargs):
super(URLFetchStrategy, self).__init__() super(URLFetchStrategy, self).__init__(**kwargs)
# Prefer values in kwargs to the positionals. # Prefer values in kwargs to the positionals.
self.url = kwargs.get('url', url) self.url = kwargs.get('url', url)
@ -320,7 +327,7 @@ def archive_file(self):
@property @property
def cachable(self): def cachable(self):
return bool(self.digest) return self._cache_enabled and bool(self.digest)
@_needs_stage @_needs_stage
def expand(self): def expand(self):
@ -490,7 +497,7 @@ class VCSFetchStrategy(FetchStrategy):
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(VCSFetchStrategy, self).__init__() super(VCSFetchStrategy, self).__init__(**kwargs)
# Set a URL based on the type of fetch strategy. # Set a URL based on the type of fetch strategy.
self.url = kwargs.get(self.url_attr, None) self.url = kwargs.get(self.url_attr, None)
@ -636,7 +643,7 @@ class GitFetchStrategy(VCSFetchStrategy):
""" """
enabled = True enabled = True
url_attr = 'git' url_attr = 'git'
optional_attrs = ['tag', 'branch', 'commit', 'submodules'] optional_attrs = ['tag', 'branch', 'commit', 'submodules', 'get_full_repo']
def __init__(self, **kwargs): def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next call # Discards the keywords in kwargs that may conflict with the next call
@ -647,6 +654,7 @@ def __init__(self, **kwargs):
self._git = None self._git = None
self.submodules = kwargs.get('submodules', False) self.submodules = kwargs.get('submodules', False)
self.get_full_repo = kwargs.get('get_full_repo', False)
@property @property
def git_version(self): def git_version(self):
@ -667,7 +675,7 @@ def git(self):
@property @property
def cachable(self): def cachable(self):
return bool(self.commit or self.tag) return self._cache_enabled and bool(self.commit or self.tag)
def source_id(self): def source_id(self):
return self.commit or self.tag return self.commit or self.tag
@ -734,24 +742,22 @@ def fetch(self):
# Try to be efficient if we're using a new enough git. # Try to be efficient if we're using a new enough git.
# This checks out only one branch's history # This checks out only one branch's history
if self.git_version > ver('1.7.10'): if self.git_version >= ver('1.7.10'):
args.append('--single-branch') if self.get_full_repo:
args.append('--no-single-branch')
else:
args.append('--single-branch')
with temp_cwd(): with temp_cwd():
cloned = False # Yet more efficiency: only download a 1-commit deep
# Yet more efficiency, only download a 1-commit deep tree # tree, if the in-use git and protocol permit it.
if self.git_version >= ver('1.7.1'): if (not self.get_full_repo) and \
try: self.git_version >= ver('1.7.1') and \
git(*(args + ['--depth', '1', self.url])) self.protocol_supports_shallow_clone():
cloned = True args.extend(['--depth', '1'])
except spack.error.SpackError as e:
# This will fail with the dumb HTTP transport
# continue and try without depth, cleanup first
tty.debug(e)
if not cloned: args.extend([self.url])
args.extend([self.url]) git(*args)
git(*args)
repo_name = get_single_file('.') repo_name = get_single_file('.')
self.stage.srcdir = repo_name self.stage.srcdir = repo_name
@ -797,6 +803,13 @@ def reset(self):
self.git(*co_args) self.git(*co_args)
self.git(*clean_args) self.git(*clean_args)
def protocol_supports_shallow_clone(self):
"""Shallow clone operations (--depth #) are not supported by the basic
HTTP protocol or by no-protocol file specifications.
Use (e.g.) https:// or file:// instead."""
return not (self.url.startswith('http://') or
self.url.startswith('/'))
def __str__(self): def __str__(self):
return '[git] {0}'.format(self._repo_info()) return '[git] {0}'.format(self._repo_info())
@ -838,7 +851,7 @@ def svn(self):
@property @property
def cachable(self): def cachable(self):
return bool(self.revision) return self._cache_enabled and bool(self.revision)
def source_id(self): def source_id(self):
return self.revision return self.revision
@ -946,7 +959,7 @@ def hg(self):
@property @property
def cachable(self): def cachable(self):
return bool(self.revision) return self._cache_enabled and bool(self.revision)
def source_id(self): def source_id(self):
return self.revision return self.revision
@ -1065,7 +1078,9 @@ def _check_version_attributes(fetcher, pkg, version):
all_optionals = set(a for s in all_strategies for a in s.optional_attrs) all_optionals = set(a for s in all_strategies for a in s.optional_attrs)
args = pkg.versions[version] args = pkg.versions[version]
extra = set(args) - set(fetcher.optional_attrs) - set([fetcher.url_attr]) extra\
= set(args) - set(fetcher.optional_attrs) - \
set([fetcher.url_attr, 'no_cache'])
extra.intersection_update(all_optionals) extra.intersection_update(all_optionals)
if extra: if extra:

View File

@ -687,22 +687,28 @@ def mock_git_repository(tmpdir_factory):
checks = { checks = {
'master': Bunch( 'master': Bunch(
revision='master', file=r0_file, args={'git': str(repodir)} revision='master', file=r0_file, args={'git': url}
), ),
'branch': Bunch( 'branch': Bunch(
revision=branch, file=branch_file, args={ revision=branch, file=branch_file, args={
'git': str(repodir), 'branch': branch 'git': url, 'branch': branch
}
),
'tag-branch': Bunch(
revision=tag_branch, file=tag_file, args={
'git': url, 'branch': tag_branch
} }
), ),
'tag': Bunch( 'tag': Bunch(
revision=tag, file=tag_file, args={'git': str(repodir), 'tag': tag} revision=tag, file=tag_file, args={'git': url, 'tag': tag}
), ),
'commit': Bunch( 'commit': Bunch(
revision=r1, file=r1_file, args={'git': str(repodir), 'commit': r1} revision=r1, file=r1_file, args={'git': url, 'commit': r1}
) )
} }
t = Bunch(checks=checks, url=url, hash=rev_hash, path=str(repodir)) t = Bunch(checks=checks, url=url, hash=rev_hash,
path=str(repodir), git_exe=git)
yield t yield t

View File

@ -3,6 +3,7 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import copy
import os import os
import shutil import shutil
@ -26,7 +27,8 @@
_mock_transport_error = 'Mock HTTP transport error' _mock_transport_error = 'Mock HTTP transport error'
@pytest.fixture(params=[None, '1.8.5.2', '1.8.5.1', '1.7.10', '1.7.0']) @pytest.fixture(params=[None, '1.8.5.2', '1.8.5.1',
'1.7.10', '1.7.1', '1.7.0'])
def git_version(request, monkeypatch): def git_version(request, monkeypatch):
"""Tests GitFetchStrategy behavior for different git versions. """Tests GitFetchStrategy behavior for different git versions.
@ -39,7 +41,8 @@ def git_version(request, monkeypatch):
real_git_version = ver(git('--version', output=str).lstrip('git version ')) real_git_version = ver(git('--version', output=str).lstrip('git version '))
if request.param is None: if request.param is None:
yield # don't patch; run with the real git_version method. # Don't patch; run with the real git_version method.
yield real_git_version
else: else:
test_git_version = ver(request.param) test_git_version = ver(request.param)
if test_git_version > real_git_version: if test_git_version > real_git_version:
@ -48,8 +51,8 @@ def git_version(request, monkeypatch):
# Patch the fetch strategy to think it's using a lower git version. # Patch the fetch strategy to think it's using a lower git version.
# we use this to test what we'd need to do with older git versions # we use this to test what we'd need to do with older git versions
# using a newer git installation. # using a newer git installation.
monkeypatch.setattr(GitFetchStrategy, 'git_version', ver('1.7.1')) monkeypatch.setattr(GitFetchStrategy, 'git_version', test_git_version)
yield yield test_git_version
@pytest.fixture @pytest.fixture
@ -169,3 +172,48 @@ def test_needs_stage():
matches=_mock_transport_error): matches=_mock_transport_error):
fetcher = GitFetchStrategy(git='file:///not-a-real-git-repo') fetcher = GitFetchStrategy(git='file:///not-a-real-git-repo')
fetcher.fetch() fetcher.fetch()
@pytest.mark.parametrize("get_full_repo", [True, False])
def test_get_full_repo(get_full_repo, git_version, mock_git_repository,
config, mutable_mock_packages):
"""Ensure that we can clone a full repository."""
if git_version < ver('1.7.1'):
pytest.skip('Not testing get_full_repo for older git {0}'.
format(git_version))
secure = True
type_of_test = 'tag-branch'
t = mock_git_repository.checks[type_of_test]
spec = Spec('git-test')
spec.concretize()
pkg = spack.repo.get(spec)
args = copy.copy(t.args)
args['get_full_repo'] = get_full_repo
pkg.versions[ver('git')] = args
with pkg.stage:
with spack.config.override('config:verify_ssl', secure):
pkg.do_stage()
with working_dir(pkg.stage.source_path):
branches\
= mock_git_repository.git_exe('branch', '-a',
output=str).splitlines()
nbranches = len(branches)
commits\
= mock_git_repository.\
git_exe('log', '--graph',
'--pretty=format:%h -%d %s (%ci) <%an>',
'--abbrev-commit',
output=str).splitlines()
ncommits = len(commits)
if get_full_repo:
assert(nbranches == 5)
assert(ncommits == 2)
else:
assert(nbranches == 2)
assert(ncommits == 1)