Adding ability to compare git references to spack install (#24639)

This will allow a user to (from anywhere a Spec is parsed including both name and version) refer to a git commit in lieu of 
a package version, and be able to make comparisons with releases in the history based on commits (or with other commits). We do this by way of:

 - Adding a property, is_commit, to a version, meaning I can always check if a version is a commit and then change some action.
 - Adding an attribute to the Version object which can lookup commits from a git repo and find the last known version before that commit, and the distance
 - Construct new Version comparators, which are tuples. For normal versions, they are unchanged. For commits with a previous version x.y.z, d commits away, the comparator is (x, y, z, '', d). For commits with no previous version, the comparator is ('', d) where d is the distance from the first commit in the repo.
 - Metadata on git commits is cached in the misc_cache, for quick lookup later.
 - Git repos are cached as bare repos in `~/.spack/git_repos`
 - In both caches, git repo urls are turned into file paths within the cache

If a commit cannot be found in the cached git repo, we fetch from the repo. If a commit is found in the cached metadata, we do not recompare to newly downloaded tags (assuming repo structure does not change). The cached metadata may be thrown out by using the `spack clean -m` option if you know the repo structure has changed in a way that invalidates existing entries. Future work will include automatic updates.

# Finding previous versions
Spack will search the repo for any tags that match the string of a version given by the `version` directive. Spack will also search for any tags that match `v + string` for any version string. Beyond that, Spack will search for tags that match a SEMVER regex (i.e., tags of the form x.y.z) and interpret those tags as valid versions as well. Future work will increase the breadth of tags understood by Spack

For each tag, Spack queries git to determine whether the tag is an ancestor of the commit in question or not. Spack then sorts the tags that are ancestors of the commit by commit-distance in the repo, and takes the nearest ancestor. The version represented by that tag is listed as the previous version for the commit.

Not all commits will find a previous version, depending on the package workflow. Future work may enable more tangential relationships between commits and versions to be discovered, but many commits in real world git repos require human knowledge to associate with a most recent previous version. Future work will also allow packages to specify commit/tag/version relationships manually for such situations.

# Version comparisons.
The empty string is a valid component of a Spack version tuple, and is in fact the lowest-valued component. It cannot be generated as part of any valid version. These two characteristics make it perfect for delineating previous versions from distances. For any version x.y.z, (x, y, z, '', _) will be less than any "real" version beginning x.y.z. This ensures that no distance from a release will cause the commit to be interpreted as "greater than" a version which is not an ancestor of it.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>

Co-authored-by: vsoch <vsoch@users.noreply.github.com>
Co-authored-by: Gregory Becker <becker33@llnl.gov>
Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
This commit is contained in:
Vanessasaurus 2021-09-14 23:12:34 -06:00 committed by GitHub
parent c3bc3e61aa
commit ef5ad4eb34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 638 additions and 43 deletions

View File

@ -49,10 +49,10 @@
import spack.util.pattern as pattern
import spack.util.url as url_util
import spack.util.web as web_util
import spack.version
from spack.util.compression import decompressor_for, extension
from spack.util.executable import CommandNotFoundError, which
from spack.util.string import comma_and, quote
from spack.version import Version, ver
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
@ -750,7 +750,7 @@ def __init__(self, **kwargs):
@property
def go_version(self):
vstring = self.go('version', output=str).split(' ')[2]
return Version(vstring)
return spack.version.Version(vstring)
@property
def go(self):
@ -843,7 +843,7 @@ def version_from_git(git_exe):
"""
version_output = git_exe('--version', output=str)
m = re.search(GitFetchStrategy.git_version_re, version_output)
return Version(m.group(1))
return spack.version.Version(m.group(1))
@property
def git(self):
@ -852,7 +852,7 @@ def git(self):
# Disable advice for a quieter fetch
# https://github.com/git/git/blob/master/Documentation/RelNotes/1.7.2.txt
if self.git_version >= Version('1.7.2'):
if self.git_version >= spack.version.Version('1.7.2'):
self._git.add_default_arg('-c')
self._git.add_default_arg('advice.detachedHead=false')
@ -895,25 +895,52 @@ def fetch(self):
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
self.clone(commit=self.commit, branch=self.branch, tag=self.tag)
def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
"""
Clone a repository to a path.
This method handles cloning from git, but does not require a stage.
Arguments:
dest (str or None): The path into which the code is cloned. If None,
requires a stage and uses the stage's source path.
commit (str or None): A commit to fetch from the remote. Only one of
commit, branch, and tag may be non-None.
branch (str or None): A branch to fetch from the remote.
tag (str or None): A tag to fetch from the remote.
bare (bool): Execute a "bare" git clone (--bare option to git)
"""
# Default to spack source path
dest = dest or self.stage.source_path
tty.debug('Cloning git repository: {0}'.format(self._repo_info()))
git = self.git
if self.commit:
debug = spack.config.get('config:debug')
if bare:
# We don't need to worry about which commit/branch/tag is checked out
clone_args = ['clone', '--bare']
if not debug:
clone_args.append('--quiet')
clone_args.extend([self.url, dest])
git(*clone_args)
elif commit:
# Need to do a regular clone and check out everything if
# they asked for a particular commit.
debug = spack.config.get('config:debug')
clone_args = ['clone', self.url]
if not debug:
clone_args.insert(1, '--quiet')
with temp_cwd():
git(*clone_args)
repo_name = get_single_file('.')
self.stage.srcdir = repo_name
shutil.move(repo_name, self.stage.source_path)
if self.stage:
self.stage.srcdir = repo_name
shutil.move(repo_name, dest)
with working_dir(self.stage.source_path):
checkout_args = ['checkout', self.commit]
with working_dir(dest):
checkout_args = ['checkout', commit]
if not debug:
checkout_args.insert(1, '--quiet')
git(*checkout_args)
@ -921,18 +948,18 @@ def fetch(self):
else:
# Can be more efficient if not checking out a specific commit.
args = ['clone']
if not spack.config.get('config:debug'):
if not debug:
args.append('--quiet')
# If we want a particular branch ask for it.
if self.branch:
args.extend(['--branch', self.branch])
elif self.tag and self.git_version >= ver('1.8.5.2'):
args.extend(['--branch', self.tag])
if branch:
args.extend(['--branch', branch])
elif tag and self.git_version >= spack.version.ver('1.8.5.2'):
args.extend(['--branch', tag])
# Try to be efficient if we're using a new enough git.
# This checks out only one branch's history
if self.git_version >= ver('1.7.10'):
if self.git_version >= spack.version.ver('1.7.10'):
if self.get_full_repo:
args.append('--no-single-branch')
else:
@ -942,7 +969,7 @@ def fetch(self):
# Yet more efficiency: only download a 1-commit deep
# tree, if the in-use git and protocol permit it.
if (not self.get_full_repo) and \
self.git_version >= ver('1.7.1') and \
self.git_version >= spack.version.ver('1.7.1') and \
self.protocol_supports_shallow_clone():
args.extend(['--depth', '1'])
@ -950,14 +977,15 @@ def fetch(self):
git(*args)
repo_name = get_single_file('.')
self.stage.srcdir = repo_name
shutil.move(repo_name, self.stage.source_path)
if self.stage:
self.stage.srcdir = repo_name
shutil.move(repo_name, dest)
with working_dir(self.stage.source_path):
with working_dir(dest):
# For tags, be conservative and check them out AFTER
# cloning. Later git versions can do this with clone
# --branch, but older ones fail.
if self.tag and self.git_version < ver('1.8.5.2'):
if tag and self.git_version < spack.version.ver('1.8.5.2'):
# pull --tags returns a "special" error code of 1 in
# older versions that we have to ignore.
# see: https://github.com/git/git/commit/19d122b
@ -971,7 +999,7 @@ def fetch(self):
git(*co_args)
if self.submodules_delete:
with working_dir(self.stage.source_path):
with working_dir(dest):
for submodule_to_delete in self.submodules_delete:
args = ['rm', submodule_to_delete]
if not spack.config.get('config:debug'):
@ -980,7 +1008,7 @@ def fetch(self):
# Init submodules if the user asked for them.
if self.submodules:
with working_dir(self.stage.source_path):
with working_dir(dest):
args = ['submodule', 'update', '--init', '--recursive']
if not spack.config.get('config:debug'):
args.insert(1, '--quiet')
@ -1502,8 +1530,15 @@ def for_package_version(pkg, version):
check_pkg_attributes(pkg)
if not isinstance(version, Version):
version = Version(version)
if not isinstance(version, spack.version.Version):
version = spack.version.Version(version)
# if it's a commit, we must use a GitFetchStrategy
if version.is_commit and hasattr(pkg, "git"):
# Populate the version with comparisons to other commits
version.generate_commit_lookup(pkg)
fetcher = GitFetchStrategy(git=pkg.git, commit=str(version))
return fetcher
# If it's not a known version, try to extrapolate one by URL
if version not in pkg.versions:

View File

@ -56,6 +56,9 @@
reports_path = os.path.join(user_config_path, "reports")
monitor_path = os.path.join(reports_path, "monitor")
# We cache repositories (git) in first, extracted metadata in second
user_repos_cache_path = os.path.join(user_config_path, 'git_repos')
opt_path = os.path.join(prefix, "opt")
etc_path = os.path.join(prefix, "etc")
system_etc_path = '/etc'

View File

@ -4699,6 +4699,15 @@ def do_parse(self):
except spack.parse.ParseError as e:
raise SpecParseError(e)
# Generate lookups for git-commit-based versions
for spec in specs:
# Cannot do lookups for versions in anonymous specs
# Only allow concrete versions using git for now
if spec.name and spec.versions.concrete and spec.version.is_commit:
pkg = spec.package
if hasattr(pkg, 'git'):
spec.version.generate_commit_lookup(pkg)
return specs
def spec_from_file(self):

View File

@ -21,6 +21,7 @@
import spack.environment as ev
import spack.hash_types as ht
import spack.package
import spack.util.executable
from spack.error import SpackError
from spack.main import SpackCommand
from spack.spec import CompilerSpec, Spec
@ -224,7 +225,7 @@ def test_install_overwrite(
def test_install_overwrite_not_installed(
mock_packages, mock_archive, mock_fetch, config, install_mockery
mock_packages, mock_archive, mock_fetch, config, install_mockery,
):
# Try to install a spec and then to reinstall it.
spec = Spec('libdwarf')
@ -236,6 +237,32 @@ def test_install_overwrite_not_installed(
assert os.path.exists(spec.prefix)
def test_install_commit(
mock_git_version_info, install_mockery, mock_packages, monkeypatch):
"""
Test installing a git package from a commit.
This ensures Spack appropriately associates commit versions with their
packages in time to do version lookups. Details of version lookup tested elsewhere
"""
repo_path, filename, commits = mock_git_version_info
monkeypatch.setattr(spack.package.PackageBase,
'git', 'file://%s' % repo_path,
raising=False)
commit = commits[-1]
spec = spack.spec.Spec('git-test-commit@%s' % commit)
spec.concretize()
spec.package.do_install()
# Ensure first commit file contents were written
installed = os.listdir(spec.prefix.bin)
assert filename in installed
with open(spec.prefix.bin.join(filename), 'r') as f:
content = f.read().strip()
assert content == '[]' # contents are weird for another test
def test_install_overwrite_multiple(
mock_packages, mock_archive, mock_fetch, config, install_mockery
):

View File

@ -61,6 +61,103 @@ def last_two_git_commits(scope='session'):
yield regex.findall(git_log_out)
def write_file(filename, contents):
with open(filename, 'w') as f:
f.write(contents)
commit_counter = 0
@pytest.fixture
def mock_git_version_info(tmpdir, scope="function"):
"""Create a mock git repo with known structure
The structure of commits in this repo is as follows::
| o fourth 1.x commit (1.2)
| o third 1.x commit
| |
o | fourth main commit (v2.0)
o | third main commit
| |
| o second 1.x commit (v1.1)
| o first 1.x commit
| /
|/
o second commit (v1.0)
o first commit
The repo consists of a single file, in which the Version._cmp representation
of each commit is expressed as a string.
Important attributes of the repo for test coverage are: multiple branches,
version tags on multiple branches, and version order is not equal to time
order or topological order.
"""
git = spack.util.executable.which('git', required=True)
repo_path = str(tmpdir.mkdir('git_repo'))
filename = 'file.txt'
def commit(message):
global commit_counter
git('commit', '--date', '2020-01-%02d 12:0:00 +0300' % commit_counter,
'-am', message)
commit_counter += 1
with working_dir(repo_path):
git("init")
git('config', 'user.name', 'Spack')
git('config', 'user.email', 'spack@spack.io')
# Add two commits on main branch
write_file(filename, '[]')
git('add', filename)
commit('first commit')
# Get name of default branch (differs by git version)
main = git('rev-parse', '--abbrev-ref', 'HEAD', output=str, error=str).strip()
# Tag second commit as v1.0
write_file(filename, "[1, 0]")
commit('second commit')
git('tag', 'v1.0')
# Add two commits and a tag on 1.x branch
git('checkout', '-b', '1.x')
write_file(filename, "[1, 0, '', 1]")
commit('first 1.x commit')
write_file(filename, "[1, 1]")
commit('second 1.x commit')
git('tag', 'v1.1')
# Add two commits and a tag on main branch
git('checkout', main)
write_file(filename, "[1, 0, '', 1]")
commit('third main commit')
write_file(filename, "[2, 0]")
commit('fourth main commit')
git('tag', 'v2.0')
# Add two more commits on 1.x branch to ensure we aren't cheating by using time
git('checkout', '1.x')
write_file(filename, "[1, 1, '', 1]")
commit('third 1.x commit')
write_file(filename, "[1, 2]")
commit('fourth 1.x commit')
git('tag', '1.2') # test robust parsing to different syntax, no v
# Get the commits in topo order
log = git('log', '--all', '--pretty=format:%H', '--topo-order',
output=str, error=str)
commits = [c for c in log.split('\n') if c]
# Return the git directory to install, the filename used, and the commits
yield repo_path, filename, commits
@pytest.fixture(autouse=True)
def clear_recorded_monkeypatches():
yield

View File

@ -212,7 +212,7 @@ def test_get_full_repo(get_full_repo, git_version, mock_git_repository,
ncommits = len(commits)
if get_full_repo:
assert(nbranches == 5)
assert(nbranches >= 5)
assert(ncommits == 2)
else:
assert(nbranches == 2)

View File

@ -7,6 +7,8 @@
import os
import os.path
import pytest
import spack.paths
import spack.util.url as url_util
@ -303,3 +305,73 @@ def test_url_join_absolute_paths():
assert(url_util.join(*args, resolve_href=False) ==
'http://example.com/path/resource')
@pytest.mark.parametrize("url,parts", [
("ssh://user@host.xz:500/path/to/repo.git/",
("ssh", "user", "host.xz", 500, "/path/to/repo.git")),
("ssh://user@host.xz/path/to/repo.git/",
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
("ssh://host.xz:500/path/to/repo.git/",
("ssh", None, "host.xz", 500, "/path/to/repo.git")),
("ssh://host.xz/path/to/repo.git/",
("ssh", None, "host.xz", None, "/path/to/repo.git")),
("ssh://user@host.xz/path/to/repo.git/",
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
("ssh://host.xz/path/to/repo.git/",
("ssh", None, "host.xz", None, "/path/to/repo.git")),
("ssh://user@host.xz/~user/path/to/repo.git/",
("ssh", "user", "host.xz", None, "~user/path/to/repo.git")),
("ssh://host.xz/~user/path/to/repo.git/",
("ssh", None, "host.xz", None, "~user/path/to/repo.git")),
("ssh://user@host.xz/~/path/to/repo.git",
("ssh", "user", "host.xz", None, "~/path/to/repo.git")),
("ssh://host.xz/~/path/to/repo.git",
("ssh", None, "host.xz", None, "~/path/to/repo.git")),
("git@github.com:spack/spack.git",
(None, "git", "github.com", None, "spack/spack.git")),
("user@host.xz:/path/to/repo.git/",
(None, "user", "host.xz", None, "/path/to/repo.git")),
("host.xz:/path/to/repo.git/",
(None, None, "host.xz", None, "/path/to/repo.git")),
("user@host.xz:~user/path/to/repo.git/",
(None, "user", "host.xz", None, "~user/path/to/repo.git")),
("host.xz:~user/path/to/repo.git/",
(None, None, "host.xz", None, "~user/path/to/repo.git")),
("user@host.xz:path/to/repo.git",
(None, "user", "host.xz", None, "path/to/repo.git")),
("host.xz:path/to/repo.git",
(None, None, "host.xz", None, "path/to/repo.git")),
("rsync://host.xz/path/to/repo.git/",
("rsync", None, "host.xz", None, "/path/to/repo.git")),
("git://host.xz/path/to/repo.git/",
("git", None, "host.xz", None, "/path/to/repo.git")),
("git://host.xz/~user/path/to/repo.git/",
("git", None, "host.xz", None, "~user/path/to/repo.git")),
("http://host.xz/path/to/repo.git/",
("http", None, "host.xz", None, "/path/to/repo.git")),
("https://host.xz/path/to/repo.git/",
("https", None, "host.xz", None, "/path/to/repo.git")),
("https://github.com/spack/spack",
("https", None, "github.com", None, "/spack/spack")),
("https://github.com/spack/spack/",
("https", None, "github.com", None, "/spack/spack")),
("file:///path/to/repo.git/",
("file", None, None, None, "/path/to/repo.git")),
("file://~/path/to/repo.git/",
("file", None, None, None, "~/path/to/repo.git")),
# bad ports should give us None
("ssh://host.xz:port/path/to/repo.git/", None),
# bad ports should give us None
("ssh://host-foo.xz:port/path/to/repo.git/", None),
# regular file paths should give us None
("/path/to/repo.git/", None),
("path/to/repo.git/", None),
("~/path/to/repo.git", None),
])
def test_git_url_parse(url, parts):
if parts is None:
with pytest.raises(ValueError):
url_util.parse_git_url(url)
else:
assert parts == url_util.parse_git_url(url)

View File

@ -7,8 +7,15 @@
We try to maintain compatibility with RPM's version semantics
where it makes sense.
"""
import os
import pytest
from llnl.util.filesystem import working_dir
import spack.package
import spack.spec
from spack.util.executable import which
from spack.version import Version, VersionList, ver
@ -576,3 +583,22 @@ def test_invalid_versions(version_str):
"""Ensure invalid versions are rejected with a ValueError"""
with pytest.raises(ValueError):
Version(version_str)
def test_versions_from_git(mock_git_version_info, monkeypatch, mock_packages):
repo_path, filename, commits = mock_git_version_info
monkeypatch.setattr(spack.package.PackageBase, 'git', 'file://%s' % repo_path,
raising=False)
for commit in commits:
spec = spack.spec.Spec('git-test-commit@%s' % commit)
version = spec.version
comparator = [str(v) if not isinstance(v, int) else v
for v in version._cmp(version.commit_lookup)]
with working_dir(repo_path):
which('git')('checkout', commit)
with open(os.path.join(repo_path, filename), 'r') as f:
expected = f.read()
assert str(comparator) == expected

View File

@ -36,7 +36,7 @@
import spack.error
import spack.util.compression as comp
from spack.version import Version
import spack.version
#
@ -621,7 +621,7 @@ def parse_version(path):
UndetectableVersionError: If the URL does not match any regexes
"""
version, start, length, i, regex = parse_version_offset(path)
return Version(version)
return spack.version.Version(version)
def parse_name_offset(path, v=None):

View File

@ -248,3 +248,56 @@ def _join(base_url, path, *extra, **kwargs):
params=params,
query=query,
fragment=None))
git_re = (
r"^(?:([a-z]+)://)?" # 1. optional scheme
r"(?:([^@]+)@)?" # 2. optional user
r"([^:/~]+)?" # 3. optional hostname
r"(?(1)(?::([^:/]+))?|:)" # 4. :<optional port> if scheme else :
r"(.*[^/])/?$" # 5. path
)
def parse_git_url(url):
"""Parse git URL into components.
This parses URLs that look like:
* ``https://host.com:443/path/to/repo.git``, or
* ``git@host.com:path/to/repo.git``
Anything not matching those patterns is likely a local
file or invalid.
Returned components are as follows (optional values can be ``None``):
1. ``scheme`` (optional): git, ssh, http, https
2. ``user`` (optional): ``git@`` for github, username for http or ssh
3. ``hostname``: domain of server
4. ``port`` (optional): port on server
5. ``path``: path on the server, e.g. spack/spack
Returns:
(tuple): tuple containing URL components as above
Raises ``ValueError`` for invalid URLs.
"""
match = re.match(git_re, url)
if not match:
raise ValueError("bad git URL: %s" % url)
# initial parse
scheme, user, hostname, port, path = match.groups()
# special handling for ~ paths (they're never absolute)
if path.startswith("/~"):
path = path[1:]
if port is not None:
try:
port = int(port)
except ValueError:
raise ValueError("bad port in git url: %s" % url)
return (scheme, user, hostname, port, path)

View File

@ -25,13 +25,21 @@
concrete
"""
import numbers
import os
import re
from bisect import bisect_left
from functools import wraps
from six import string_types
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, working_dir
import spack.caches
import spack.error
import spack.paths
import spack.util.executable
import spack.util.spack_json as sjson
from spack.util.spack_yaml import syaml_dict
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
@ -39,9 +47,17 @@
# Valid version characters
VALID_VERSION = re.compile(r'^[A-Za-z0-9_.-]+$')
# regex for a commit version
COMMIT_VERSION = re.compile(r'^[a-z0-9]{40}$')
# regex for version segments
SEGMENT_REGEX = re.compile(r'(?:(?P<num>[0-9]+)|(?P<str>[a-zA-Z]+))(?P<sep>[_.-]*)')
# regular expression for semantic versioning
SEMVER_REGEX = re.compile(".+(?P<semver>([0-9]+)[.]([0-9]+)[.]([0-9]+)"
"(?:-([0-9A-Za-z-]+(?:[.][0-9A-Za-z-]+)*))?"
"(?:[+][0-9A-Za-z-]+)?)")
# Infinity-like versions. The order in the list implies the comparison rules
infinity_versions = ['develop', 'main', 'master', 'head', 'trunk']
@ -151,7 +167,7 @@ def __gt__(self, other):
class Version(object):
"""Class to represent versions"""
__slots__ = ['version', 'separators', 'string']
__slots__ = ['version', 'separators', 'string', 'commit_lookup']
def __init__(self, string):
if not isinstance(string, str):
@ -164,13 +180,40 @@ def __init__(self, string):
if not VALID_VERSION.match(string):
raise ValueError("Bad characters in version string: %s" % string)
# Split version into alphabetical and numeric segments simultaneously
# An object that can lookup git commits to compare them to versions
self.commit_lookup = None
segments = SEGMENT_REGEX.findall(string)
self.version = tuple(
int(m[0]) if m[0] else VersionStrComponent(m[1]) for m in segments
)
self.separators = tuple(m[2] for m in segments)
def _cmp(self, other_lookups=None):
commit_lookup = self.commit_lookup or other_lookups
if self.is_commit and commit_lookup:
commit_info = commit_lookup.get(self.string)
if commit_info:
prev_version, distance = commit_info
# Extend previous version by empty component and distance
# If commit is exactly a known version, no distance suffix
prev_tuple = Version(prev_version).version if prev_version else ()
dist_suffix = (VersionStrComponent(''), distance) if distance else ()
return prev_tuple + dist_suffix
return self.version
@property
def is_commit(self):
"""
Determine if the original string is referencing a commit.
"""
if self.string in infinity_versions:
return False
return COMMIT_VERSION.match(self.string) is not None
@property
def dotted(self):
"""The dotted representation of the version.
@ -276,10 +319,13 @@ def satisfies(self, other):
gcc@4.7 so that when a user asks to build with gcc@4.7, we can find
a suitable compiler.
"""
self_cmp = self._cmp(other.commit_lookup)
other_cmp = other._cmp(self.commit_lookup)
nself = len(self.version)
nother = len(other.version)
return nother <= nself and self.version[:nother] == other.version
# Do the final comparison
nself = len(self_cmp)
nother = len(other_cmp)
return nother <= nself and self_cmp[:nother] == other_cmp
def __iter__(self):
return iter(self.version)
@ -331,13 +377,22 @@ def __lt__(self, other):
if other is None:
return False
# If either is a commit and we haven't indexed yet, can't compare
if (other.is_commit or self.is_commit) and not (self.commit_lookup or
other.commit_lookup):
return False
# Use tuple comparison assisted by VersionStrComponent for performance
return self.version < other.version
return self._cmp(other.commit_lookup) < other._cmp(self.commit_lookup)
@coerced
def __eq__(self, other):
return (other is not None and
type(other) == Version and self.version == other.version)
# Cut out early if we don't have a version
if other is None or type(other) != Version:
return False
return self._cmp(other.commit_lookup) == other._cmp(self.commit_lookup)
@coerced
def __ne__(self, other):
@ -362,18 +417,23 @@ def __hash__(self):
def __contains__(self, other):
if other is None:
return False
return other.version[:len(self.version)] == self.version
self_cmp = self._cmp(other.commit_lookup)
return other._cmp(self.commit_lookup)[:len(self_cmp)] == self_cmp
def is_predecessor(self, other):
"""True if the other version is the immediate predecessor of this one.
That is, NO versions v exist such that:
That is, NO non-commit versions v exist such that:
(self < v < other and v not in self).
"""
if len(self.version) != len(other.version):
self_cmp = self._cmp(self.commit_lookup)
other_cmp = other._cmp(other.commit_lookup)
if self_cmp[:-1] != other_cmp[:-1]:
return False
sl = self.version[-1]
ol = other.version[-1]
sl = self_cmp[-1]
ol = other_cmp[-1]
return type(sl) == int and type(ol) == int and (ol - sl == 1)
def is_successor(self, other):
@ -401,6 +461,34 @@ def intersection(self, other):
else:
return VersionList()
def generate_commit_lookup(self, pkg):
"""
Use the git fetcher to look up a version for a commit.
Since we want to optimize the clone and lookup, we do the clone once
and store it in the user specified git repository cache. We also need
context of the package to get known versions, which could be tags if
they are linked to Git Releases. If we are unable to determine the
context of the version, we cannot continue. This implementation is
alongside the GitFetcher because eventually the git repos cache will
be one and the same with the source cache.
Args:
fetcher: the fetcher to use.
versions: the known versions of the package
"""
if self.commit_lookup:
return
# Sanity check we have a commit
if not self.is_commit:
tty.die("%s is not a commit." % self)
# Generate a commit looker-upper
self.commit_lookup = CommitLookup(pkg)
self.commit_lookup.get(self.string)
self.commit_lookup.save()
class VersionRange(object):
@ -886,3 +974,164 @@ class VersionError(spack.error.SpackError):
class VersionChecksumError(VersionError):
"""Raised for version checksum errors."""
class VersionLookupError(VersionError):
"""Raised for errors looking up git commits as versions."""
class CommitLookup(object):
"""An object for cached lookups of git commits
CommitLookup objects delegate to the misc_cache for locking.
CommitLookup objects may be attached to a Version object for which
Version.is_commit returns True to allow for comparisons between git commits
and versions as represented by tags in the git repository.
"""
def __init__(self, pkg):
self.pkg = pkg
# We require the full git repository history
import spack.fetch_strategy # break cycle
fetcher = spack.fetch_strategy.GitFetchStrategy(git=pkg.git)
fetcher.get_full_repo = True
self.fetcher = fetcher
self.data = {}
# Cache data in misc_cache
key_base = 'git_metadata'
if not self.repository_uri.startswith('/'):
key_base += '/'
self.cache_key = key_base + self.repository_uri
spack.caches.misc_cache.init_entry(self.cache_key)
self.cache_path = spack.caches.misc_cache.cache_path(self.cache_key)
@property
def repository_uri(self):
"""
Identifier for git repos used within the repo and metadata caches.
"""
try:
components = [str(c).lstrip('/')
for c in spack.util.url.parse_git_url(self.pkg.git)
if c]
return os.path.join(*components)
except ValueError:
# If it's not a git url, it's a local path
return os.path.abspath(self.pkg.git)
def save(self):
"""
Save the data to file
"""
with spack.caches.misc_cache.write_transaction(self.cache_key) as (old, new):
sjson.dump(self.data, new)
def load_data(self):
"""
Load data if the path already exists.
"""
if os.path.isfile(self.cache_path):
with spack.caches.misc_cache.read_transaction(self.cache_key) as cache_file:
self.data = sjson.load(cache_file)
def get(self, commit):
if not self.data:
self.load_data()
if commit not in self.data:
self.data[commit] = self.lookup_commit(commit)
self.save()
return self.data[commit]
def lookup_commit(self, commit):
"""Lookup the previous version and distance for a given commit.
We use git to compare the known versions from package to the git tags,
as well as any git tags that are SEMVER versions, and find the latest
known version prior to the commit, as well as the distance from that version
to the commit in the git repo. Those values are used to compare Version objects.
"""
dest = os.path.join(spack.paths.user_repos_cache_path, self.repository_uri)
if dest.endswith('.git'):
dest = dest[:-4]
# prepare a cache for the repository
dest_parent = os.path.dirname(dest)
if not os.path.exists(dest_parent):
mkdirp(dest_parent)
# Only clone if we don't have it!
if not os.path.exists(dest):
self.fetcher.clone(dest, bare=True)
# Lookup commit info
with working_dir(dest):
self.fetcher.git("fetch", '--tags')
# Ensure commit is an object known to git
# Note the brackets are literals, the commit replaces the format string
# This will raise a ProcessError if the commit does not exist
# We may later design a custom error to re-raise
self.fetcher.git('cat-file', '-e', '%s^{commit}' % commit)
# List tags (refs) by date, so last reference of a tag is newest
tag_info = self.fetcher.git(
"for-each-ref", "--sort=creatordate", "--format",
"%(objectname) %(refname)", "refs/tags", output=str).split('\n')
# Lookup of commits to spack versions
commit_to_version = {}
for entry in tag_info:
if not entry:
continue
tag_commit, tag = entry.split()
tag = tag.replace('refs/tags/', '', 1)
# For each tag, try to match to a version
for v in [v.string for v in self.pkg.versions]:
if v == tag or 'v' + v == tag:
commit_to_version[tag_commit] = v
break
else:
# try to parse tag to copare versions spack does not know
match = SEMVER_REGEX.match(tag)
if match:
semver = match.groupdict()['semver']
commit_to_version[tag_commit] = semver
ancestor_commits = []
for tag_commit in commit_to_version:
self.fetcher.git(
'merge-base', '--is-ancestor', tag_commit, commit,
ignore_errors=[1])
if self.fetcher.git.returncode == 0:
distance = self.fetcher.git(
'rev-list', '%s..%s' % (tag_commit, commit), '--count',
output=str, error=str).strip()
ancestor_commits.append((tag_commit, int(distance)))
# Get nearest ancestor that is a known version
ancestor_commits.sort(key=lambda x: x[1])
if ancestor_commits:
prev_version_commit, distance = ancestor_commits[0]
prev_version = commit_to_version[prev_version_commit]
else:
# Get list of all commits, this is in reverse order
# We use this to get the first commit below
commit_info = self.fetcher.git("log", "--all", "--pretty=format:%H",
output=str)
commits = [c for c in commit_info.split('\n') if c]
# No previous version and distance from first commit
prev_version = None
distance = int(self.fetcher.git(
'rev-list', '%s..%s' % (commits[-1], commit), '--count',
output=str, error=str
).strip())
return prev_version, distance

View File

@ -0,0 +1,24 @@
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack import *
class GitTestCommit(Package):
"""Mock package that tests installing specific commit"""
homepage = "http://www.git-fetch-example.com"
# git='to-be-filled-in-by-test'
version('1.0', tag='v1.0')
version('1.1', tag='v1.1')
version('1.2', tag='1.2') # not a typo
version('2.0', tag='v2.0')
def install(self, spec, prefix):
assert spec.satisfies('@:0')
mkdir(prefix.bin)
# This will only exist for some second commit
install('file.txt', prefix.bin)