Add options for sparse checkout in GitFetcher (#45473)
* Add options for sparse checkout in GitFetcher Newer versions of git have a beta feature called sparse checkout that allow users to check out a portion of a large repo. This feature will be ideal for monolithic repo projects that want to model their infrastructure via spack. This PR implements an addition to the GitFetcher that allows users to add a `git_sparse_paths` attribute to package classes or versions which will then use sparse checkout on those directories/files for the package. * Style * Split git clone into multiple functions * Add sparse-checkout impl * Internalize src clone functions * Docs * Adding sparse clone test * Add test for partial clone * [@spackbot] updating style on behalf of psakievich * Small fixes * Restore default branch status * Fix attributes for package * Update lib/spack/docs/packaging_guide.rst Co-authored-by: Matthew Mosby <44072882+mdmosby@users.noreply.github.com> * Extend unit test to multiple git versions * style --------- Co-authored-by: psakievich <psakievich@users.noreply.github.com> Co-authored-by: Matthew Mosby <44072882+mdmosby@users.noreply.github.com>
This commit is contained in:
parent
55b1b0f3f0
commit
1b82779087
@ -1263,6 +1263,11 @@ Git fetching supports the following parameters to ``version``:
|
||||
option ``--depth 1`` will be used if the version of git and the specified
|
||||
transport protocol support it, and ``--single-branch`` will be used if the
|
||||
version of git supports it.
|
||||
* ``git_sparse_paths``: Use ``sparse-checkout`` to only clone these relative paths.
|
||||
This feature requires ``git`` to be version ``2.25.0`` or later but is useful for
|
||||
large repositories that have separate portions that can be built independently.
|
||||
If paths provided are directories then all the subdirectories and associated files
|
||||
will also be cloned.
|
||||
|
||||
Only one of ``tag``, ``branch``, or ``commit`` can be used at a time.
|
||||
|
||||
@ -1361,6 +1366,41 @@ Submodules
|
||||
For more information about git submodules see the manpage of git: ``man
|
||||
git-submodule``.
|
||||
|
||||
Sparse-Checkout
|
||||
You can supply ``git_sparse_paths`` at the package or version level to utilize git's
|
||||
sparse-checkout feature. This will only clone the paths that are specified in the
|
||||
``git_sparse_paths`` attribute for the package along with the files in the top level directory.
|
||||
This feature allows you to only clone what you need from a large repository.
|
||||
Note that this is a newer feature in git and requries git ``2.25.0`` or greater.
|
||||
If ``git_sparse_paths`` is supplied and the git version is too old
|
||||
then a warning will be issued and that package will use the standard cloning operations instead.
|
||||
``git_sparse_paths`` should be supplied as a list of paths, a callable function for versions,
|
||||
or a more complex package attribute using the ``@property`` decorator. The return value should be
|
||||
a list for a callable implementation of ``git_sparse_paths``.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def sparse_path_function(package)
|
||||
"""a callable function that can be used in side a version"""
|
||||
# paths can be directories or functions, all subdirectories and files are included
|
||||
paths = ["doe", "rae", "me/file.cpp"]
|
||||
if package.spec.version > Version("1.2.0"):
|
||||
paths.extend(["fae"])
|
||||
return paths
|
||||
|
||||
class MyPackage(package):
|
||||
# can also be a package attribute that will be used if not specified in versions
|
||||
git_sparse_paths = ["doe", "rae"]
|
||||
|
||||
# use the package attribute
|
||||
version("1.0.0")
|
||||
version("1.1.0")
|
||||
# use the function
|
||||
version("1.1.5", git_sparse_paths=sparse_path_func)
|
||||
version("1.2.0", git_sparse_paths=sparse_path_func)
|
||||
version("1.2.5", git_sparse_paths=sparse_path_func)
|
||||
version("1.1.5", git_sparse_paths=sparse_path_func)
|
||||
|
||||
.. _github-fetch:
|
||||
|
||||
^^^^^^
|
||||
|
@ -720,6 +720,7 @@ class GitFetchStrategy(VCSFetchStrategy):
|
||||
"submodules",
|
||||
"get_full_repo",
|
||||
"submodules_delete",
|
||||
"git_sparse_paths",
|
||||
]
|
||||
|
||||
git_version_re = r"git version (\S+)"
|
||||
@ -735,6 +736,7 @@ def __init__(self, **kwargs):
|
||||
self.submodules = kwargs.get("submodules", False)
|
||||
self.submodules_delete = kwargs.get("submodules_delete", False)
|
||||
self.get_full_repo = kwargs.get("get_full_repo", False)
|
||||
self.git_sparse_paths = kwargs.get("git_sparse_paths", None)
|
||||
|
||||
@property
|
||||
def git_version(self):
|
||||
@ -802,38 +804,50 @@ def fetch(self):
|
||||
tty.debug("Already fetched {0}".format(self.stage.source_path))
|
||||
return
|
||||
|
||||
self.clone(commit=self.commit, branch=self.branch, tag=self.tag)
|
||||
if self.git_sparse_paths:
|
||||
self._sparse_clone_src(commit=self.commit, branch=self.branch, tag=self.tag)
|
||||
else:
|
||||
self._clone_src(commit=self.commit, branch=self.branch, tag=self.tag)
|
||||
self.submodule_operations()
|
||||
|
||||
def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
|
||||
def bare_clone(self, dest):
|
||||
"""
|
||||
Clone a repository to a path.
|
||||
Execute a bare clone for metadata only
|
||||
|
||||
This method handles cloning from git, but does not require a stage.
|
||||
|
||||
Arguments:
|
||||
dest (str or None): The path into which the code is cloned. If None,
|
||||
requires a stage and uses the stage's source path.
|
||||
commit (str or None): A commit to fetch from the remote. Only one of
|
||||
commit, branch, and tag may be non-None.
|
||||
branch (str or None): A branch to fetch from the remote.
|
||||
tag (str or None): A tag to fetch from the remote.
|
||||
bare (bool): Execute a "bare" git clone (--bare option to git)
|
||||
Requires a destination since bare cloning does not provide source
|
||||
and shouldn't be used for staging.
|
||||
"""
|
||||
# Default to spack source path
|
||||
dest = dest or self.stage.source_path
|
||||
tty.debug("Cloning git repository: {0}".format(self._repo_info()))
|
||||
|
||||
git = self.git
|
||||
debug = spack.config.get("config:debug")
|
||||
|
||||
if bare:
|
||||
# We don't need to worry about which commit/branch/tag is checked out
|
||||
clone_args = ["clone", "--bare"]
|
||||
if not debug:
|
||||
clone_args.append("--quiet")
|
||||
clone_args.extend([self.url, dest])
|
||||
git(*clone_args)
|
||||
elif commit:
|
||||
# We don't need to worry about which commit/branch/tag is checked out
|
||||
clone_args = ["clone", "--bare"]
|
||||
if not debug:
|
||||
clone_args.append("--quiet")
|
||||
clone_args.extend([self.url, dest])
|
||||
git(*clone_args)
|
||||
|
||||
def _clone_src(self, commit=None, branch=None, tag=None):
|
||||
"""
|
||||
Clone a repository to a path using git.
|
||||
|
||||
Arguments:
|
||||
commit (str or None): A commit to fetch from the remote. Only one of
|
||||
commit, branch, and tag may be non-None.
|
||||
branch (str or None): A branch to fetch from the remote.
|
||||
tag (str or None): A tag to fetch from the remote.
|
||||
"""
|
||||
# Default to spack source path
|
||||
dest = self.stage.source_path
|
||||
tty.debug("Cloning git repository: {0}".format(self._repo_info()))
|
||||
|
||||
git = self.git
|
||||
debug = spack.config.get("config:debug")
|
||||
|
||||
if commit:
|
||||
# Need to do a regular clone and check out everything if
|
||||
# they asked for a particular commit.
|
||||
clone_args = ["clone", self.url]
|
||||
@ -912,6 +926,85 @@ def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
|
||||
git(*pull_args, ignore_errors=1)
|
||||
git(*co_args)
|
||||
|
||||
def _sparse_clone_src(self, commit=None, branch=None, tag=None, **kwargs):
|
||||
"""
|
||||
Use git's sparse checkout feature to clone portions of a git repository
|
||||
|
||||
Arguments:
|
||||
commit (str or None): A commit to fetch from the remote. Only one of
|
||||
commit, branch, and tag may be non-None.
|
||||
branch (str or None): A branch to fetch from the remote.
|
||||
tag (str or None): A tag to fetch from the remote.
|
||||
"""
|
||||
dest = self.stage.source_path
|
||||
git = self.git
|
||||
|
||||
if self.git_version < spack.version.Version("2.25.0.0"):
|
||||
# code paths exist where the package is not set. Assure some indentifier for the
|
||||
# package that was configured for sparse checkout exists in the error message
|
||||
identifier = str(self.url)
|
||||
if self.package:
|
||||
identifier += f" ({self.package.name})"
|
||||
tty.warn(
|
||||
(
|
||||
f"{identifier} is configured for git sparse-checkout "
|
||||
"but the git version is too old to support sparse cloning. "
|
||||
"Cloning the full repository instead."
|
||||
)
|
||||
)
|
||||
self._clone_src(commit, branch, tag)
|
||||
else:
|
||||
# default to depth=2 to allow for retention of some git properties
|
||||
depth = kwargs.get("depth", 2)
|
||||
needs_fetch = branch or tag
|
||||
git_ref = branch or tag or commit
|
||||
|
||||
assert git_ref
|
||||
|
||||
clone_args = ["clone"]
|
||||
|
||||
if needs_fetch:
|
||||
clone_args.extend(["--branch", git_ref])
|
||||
|
||||
if self.get_full_repo:
|
||||
clone_args.append("--no-single-branch")
|
||||
else:
|
||||
clone_args.append("--single-branch")
|
||||
|
||||
clone_args.extend(
|
||||
[f"--depth={depth}", "--no-checkout", "--filter=blob:none", self.url]
|
||||
)
|
||||
|
||||
sparse_args = ["sparse-checkout", "set"]
|
||||
|
||||
if callable(self.git_sparse_paths):
|
||||
sparse_args.extend(self.git_sparse_paths())
|
||||
else:
|
||||
sparse_args.extend([p for p in self.git_sparse_paths])
|
||||
|
||||
sparse_args.append("--cone")
|
||||
|
||||
checkout_args = ["checkout", git_ref]
|
||||
|
||||
if not spack.config.get("config:debug"):
|
||||
clone_args.insert(1, "--quiet")
|
||||
checkout_args.insert(1, "--quiet")
|
||||
|
||||
with temp_cwd():
|
||||
git(*clone_args)
|
||||
repo_name = get_single_file(".")
|
||||
if self.stage:
|
||||
self.stage.srcdir = repo_name
|
||||
shutil.move(repo_name, dest)
|
||||
|
||||
with working_dir(dest):
|
||||
git(*sparse_args)
|
||||
git(*checkout_args)
|
||||
|
||||
def submodule_operations(self):
|
||||
dest = self.stage.source_path
|
||||
git = self.git
|
||||
|
||||
if self.submodules_delete:
|
||||
with working_dir(dest):
|
||||
for submodule_to_delete in self.submodules_delete:
|
||||
@ -1541,8 +1634,11 @@ def _from_merged_attrs(fetcher, pkg, version):
|
||||
attrs["fetch_options"] = pkg.fetch_options
|
||||
attrs.update(pkg.versions[version])
|
||||
|
||||
if fetcher.url_attr == "git" and hasattr(pkg, "submodules"):
|
||||
attrs.setdefault("submodules", pkg.submodules)
|
||||
if fetcher.url_attr == "git":
|
||||
pkg_attr_list = ["submodules", "git_sparse_paths"]
|
||||
for pkg_attr in pkg_attr_list:
|
||||
if hasattr(pkg, pkg_attr):
|
||||
attrs.setdefault(pkg_attr, getattr(pkg, pkg_attr))
|
||||
|
||||
return fetcher(**attrs)
|
||||
|
||||
|
@ -1418,6 +1418,24 @@ def mock_git_repository(git, tmpdir_factory):
|
||||
r1 = rev_hash(branch)
|
||||
r1_file = branch_file
|
||||
|
||||
multiple_directories_branch = "many_dirs"
|
||||
num_dirs = 3
|
||||
num_files = 2
|
||||
dir_files = []
|
||||
for i in range(num_dirs):
|
||||
for j in range(num_files):
|
||||
dir_files.append(f"dir{i}/file{j}")
|
||||
|
||||
git("checkout", "-b", multiple_directories_branch)
|
||||
for f in dir_files:
|
||||
repodir.ensure(f, file=True)
|
||||
git("add", f)
|
||||
|
||||
git("-c", "commit.gpgsign=false", "commit", "-m", "many_dirs add files")
|
||||
|
||||
# restore default
|
||||
git("checkout", default_branch)
|
||||
|
||||
# Map of version -> bunch. Each bunch includes; all the args
|
||||
# that must be specified as part of a version() declaration (used to
|
||||
# manufacture a version for the 'git-test' package); the associated
|
||||
@ -1437,6 +1455,11 @@ def mock_git_repository(git, tmpdir_factory):
|
||||
"default-no-per-version-git": Bunch(
|
||||
revision=default_branch, file=r0_file, args={"branch": default_branch}
|
||||
),
|
||||
"many-directories": Bunch(
|
||||
revision=multiple_directories_branch,
|
||||
file=dir_files[0],
|
||||
args={"git": url, "branch": multiple_directories_branch},
|
||||
),
|
||||
}
|
||||
|
||||
t = Bunch(
|
||||
|
@ -390,3 +390,38 @@ def submodules_callback(package):
|
||||
assert not os.path.isfile(file_path)
|
||||
file_path = os.path.join(s.package.stage.source_path, "third_party/submodule1/r0_file_1")
|
||||
assert not os.path.isfile(file_path)
|
||||
|
||||
|
||||
@pytest.mark.disable_clean_stage_check
|
||||
def test_git_sparse_paths_partial_clone(
|
||||
mock_git_repository, git_version, default_mock_concretization, mutable_mock_repo, monkeypatch
|
||||
):
|
||||
"""
|
||||
Test partial clone of repository when using git_sparse_paths property
|
||||
"""
|
||||
type_of_test = "many-directories"
|
||||
sparse_paths = ["dir0"]
|
||||
omitted_paths = ["dir1", "dir2"]
|
||||
t = mock_git_repository.checks[type_of_test]
|
||||
args = copy.copy(t.args)
|
||||
args["git_sparse_paths"] = sparse_paths
|
||||
s = default_mock_concretization("git-test")
|
||||
monkeypatch.setitem(s.package.versions, Version("git"), args)
|
||||
s.package.do_stage()
|
||||
with working_dir(s.package.stage.source_path):
|
||||
# top level directory files are cloned via sparse-checkout
|
||||
assert os.path.isfile("r0_file")
|
||||
|
||||
for p in sparse_paths:
|
||||
assert os.path.isdir(p)
|
||||
|
||||
if git_version < Version("2.25.0.0"):
|
||||
# older versions of git should fall back to a full clone
|
||||
for p in omitted_paths:
|
||||
assert os.path.isdir(p)
|
||||
else:
|
||||
for p in omitted_paths:
|
||||
assert not os.path.isdir(p)
|
||||
|
||||
# fixture file is in the sparse-path expansion tree
|
||||
assert os.path.isfile(t.file)
|
||||
|
@ -259,6 +259,7 @@ def test_git_url_top_level_git_versions(version_str, tag, commit, branch):
|
||||
assert fetcher.tag == tag
|
||||
assert fetcher.commit == commit
|
||||
assert fetcher.branch == branch
|
||||
assert fetcher.url == pkg_factory("git-url-top-level").git
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_packages", "config")
|
||||
@ -319,3 +320,14 @@ def test_package_deprecated_version(mock_packages, mock_fetch, mock_stage):
|
||||
|
||||
assert spack.package_base.deprecated_version(pkg_cls, "1.1.0")
|
||||
assert not spack.package_base.deprecated_version(pkg_cls, "1.0.0")
|
||||
|
||||
|
||||
def test_package_can_have_sparse_checkout_properties(mock_packages, mock_fetch, mock_stage):
|
||||
spec = Spec("git-sparsepaths-pkg")
|
||||
pkg_cls = spack.repo.PATH.get_pkg_class(spec.name)
|
||||
assert hasattr(pkg_cls, "git_sparse_paths")
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg_cls(spec), "1.0")
|
||||
assert isinstance(fetcher, spack.fetch_strategy.GitFetchStrategy)
|
||||
assert hasattr(fetcher, "git_sparse_paths")
|
||||
assert fetcher.git_sparse_paths == pkg_cls.git_sparse_paths
|
||||
|
@ -138,7 +138,7 @@ def lookup_ref(self, ref) -> Tuple[Optional[str], int]:
|
||||
|
||||
# Only clone if we don't have it!
|
||||
if not os.path.exists(dest):
|
||||
self.fetcher.clone(dest, bare=True)
|
||||
self.fetcher.bare_clone(dest)
|
||||
|
||||
# Lookup commit info
|
||||
with working_dir(dest):
|
||||
|
@ -0,0 +1,17 @@
|
||||
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
from spack.package import *
|
||||
|
||||
|
||||
class GitSparsepathsPkg(Package):
|
||||
"""Mock package with git_sparse_paths attribute"""
|
||||
|
||||
homepage = "http://www.git-fetch-example.com"
|
||||
git = "https://a/really.com/big/repo.git"
|
||||
|
||||
version("1.0", tag="v1.0")
|
||||
|
||||
git_sparse_paths = ["foo", "bar", "bing/bang"]
|
Loading…
Reference in New Issue
Block a user