binary_distribution.py: list parent dirs in binary tarball (#41773)
* Bump the build cache layout version from 1 to 2 * Version to lists parent directories of the prefix in the tarball too, which is required from some container runtimes
This commit is contained in:
		| @@ -68,7 +68,10 @@ | ||||
| 
 | ||||
| BUILD_CACHE_RELATIVE_PATH = "build_cache" | ||||
| BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp" | ||||
| CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1 | ||||
| 
 | ||||
| #: The build cache layout version that this version of Spack creates. | ||||
| #: Version 2: includes parent directories of the package prefix in the tarball | ||||
| CURRENT_BUILD_CACHE_LAYOUT_VERSION = 2 | ||||
| 
 | ||||
| 
 | ||||
| class BuildCacheDatabase(spack_db.Database): | ||||
| @@ -1174,6 +1177,16 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: | ||||
|     except OSError: | ||||
|         files_to_skip = [] | ||||
| 
 | ||||
|     # First add all directories leading up to `prefix` (Spack <= 0.21 did not do this, leading to | ||||
|     # issues when tarballs are used in runtimes like AWS lambda). Skip the file system root. | ||||
|     parent_dirs = reversed(pathlib.Path(prefix).parents) | ||||
|     next(parent_dirs)  # skip the root: slices are supported from python 3.10 | ||||
|     for parent_dir in parent_dirs: | ||||
|         dir_info = tarfile.TarInfo(_tarinfo_name(str(parent_dir))) | ||||
|         dir_info.type = tarfile.DIRTYPE | ||||
|         dir_info.mode = 0o755 | ||||
|         tar.addfile(dir_info) | ||||
| 
 | ||||
|     dir_stack = [prefix] | ||||
|     while dir_stack: | ||||
|         dir = dir_stack.pop() | ||||
| @@ -2047,11 +2060,12 @@ def _extract_inner_tarball(spec, filename, extract_to, signature_required: bool, | ||||
| 
 | ||||
| 
 | ||||
| def _tar_strip_component(tar: tarfile.TarFile, prefix: str): | ||||
|     """Strip the top-level directory `prefix` from the member names in a tarfile.""" | ||||
|     """Yield all members of tarfile that start with given prefix, and strip that prefix (including | ||||
|     symlinks)""" | ||||
|     # Including trailing /, otherwise we end up with absolute paths. | ||||
|     regex = re.compile(re.escape(prefix) + "/*") | ||||
| 
 | ||||
|     # Remove the top-level directory from the member (link)names. | ||||
|     # Only yield members in the package prefix. | ||||
|     # Note: when a tarfile is created, relative in-prefix symlinks are | ||||
|     # expanded to matching member names of tarfile entries. So, we have | ||||
|     # to ensure that those are updated too. | ||||
| @@ -2059,12 +2073,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str): | ||||
|     # them. | ||||
|     for m in tar.getmembers(): | ||||
|         result = regex.match(m.name) | ||||
|         assert result is not None | ||||
|         if not result: | ||||
|             continue | ||||
|         m.name = m.name[result.end() :] | ||||
|         if m.linkname: | ||||
|             result = regex.match(m.linkname) | ||||
|             if result: | ||||
|                 m.linkname = m.linkname[result.end() :] | ||||
|         yield m | ||||
| 
 | ||||
| 
 | ||||
| def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): | ||||
| @@ -2110,7 +2126,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): | ||||
|             _delete_staged_downloads(download_result) | ||||
|             shutil.rmtree(tmpdir) | ||||
|             raise e | ||||
|     elif layout_version == 1: | ||||
|     elif 1 <= layout_version <= 2: | ||||
|         # Newer buildcache layout: the .spack file contains just | ||||
|         # in the install tree, the signature, if it exists, is | ||||
|         # wrapped around the spec.json at the root.  If sig verify | ||||
| @@ -2138,8 +2154,10 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): | ||||
|     try: | ||||
|         with closing(tarfile.open(tarfile_path, "r")) as tar: | ||||
|             # Remove install prefix from tarfil to extract directly into spec.prefix | ||||
|             _tar_strip_component(tar, prefix=_ensure_common_prefix(tar)) | ||||
|             tar.extractall(path=spec.prefix) | ||||
|             tar.extractall( | ||||
|                 path=spec.prefix, | ||||
|                 members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)), | ||||
|             ) | ||||
|     except Exception: | ||||
|         shutil.rmtree(spec.prefix, ignore_errors=True) | ||||
|         _delete_staged_downloads(download_result) | ||||
| @@ -2174,20 +2192,47 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): | ||||
| 
 | ||||
| 
 | ||||
| def _ensure_common_prefix(tar: tarfile.TarFile) -> str: | ||||
|     # Get the shortest length directory. | ||||
|     common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None) | ||||
|     # Find the lowest `binary_distribution` file (hard-coded forward slash is on purpose). | ||||
|     binary_distribution = min( | ||||
|         ( | ||||
|             e.name | ||||
|             for e in tar.getmembers() | ||||
|             if e.isfile() and e.name.endswith(".spack/binary_distribution") | ||||
|         ), | ||||
|         key=len, | ||||
|         default=None, | ||||
|     ) | ||||
| 
 | ||||
|     if common_prefix is None: | ||||
|         raise ValueError("Tarball does not contain a common prefix") | ||||
|     if binary_distribution is None: | ||||
|         raise ValueError("Tarball is not a Spack package, missing binary_distribution file") | ||||
| 
 | ||||
|     # Validate that each file starts with the prefix | ||||
|     pkg_path = pathlib.PurePosixPath(binary_distribution).parent.parent | ||||
| 
 | ||||
|     # Even the most ancient Spack version has required to list the dir of the package itself, so | ||||
|     # guard against broken tarballs where `path.parent.parent` is empty. | ||||
|     if pkg_path == pathlib.PurePosixPath(): | ||||
|         raise ValueError("Invalid tarball, missing package prefix dir") | ||||
| 
 | ||||
|     pkg_prefix = str(pkg_path) | ||||
| 
 | ||||
|     # Ensure all tar entries are in the pkg_prefix dir, and if they're not, they should be parent | ||||
|     # dirs of it. | ||||
|     has_prefix = False | ||||
|     for member in tar.getmembers(): | ||||
|         if not member.name.startswith(common_prefix): | ||||
|             raise ValueError( | ||||
|                 f"Tarball contains file {member.name} outside of prefix {common_prefix}" | ||||
|             ) | ||||
|         stripped = member.name.rstrip("/") | ||||
|         if not ( | ||||
|             stripped.startswith(pkg_prefix) or member.isdir() and pkg_prefix.startswith(stripped) | ||||
|         ): | ||||
|             raise ValueError(f"Tarball contains file {stripped} outside of prefix {pkg_prefix}") | ||||
|         if member.isdir() and stripped == pkg_prefix: | ||||
|             has_prefix = True | ||||
| 
 | ||||
|     return common_prefix | ||||
|     # This is technically not required, but let's be defensive about the existence of the package | ||||
|     # prefix dir. | ||||
|     if not has_prefix: | ||||
|         raise ValueError(f"Tarball does not contain a common prefix {pkg_prefix}") | ||||
| 
 | ||||
|     return pkg_prefix | ||||
| 
 | ||||
| 
 | ||||
| def install_root_node(spec, unsigned=False, force=False, sha256=None): | ||||
|   | ||||
| @@ -14,7 +14,7 @@ | ||||
| import urllib.error | ||||
| import urllib.request | ||||
| import urllib.response | ||||
| from pathlib import PurePath | ||||
| from pathlib import Path, PurePath | ||||
| 
 | ||||
| import py | ||||
| import pytest | ||||
| @@ -201,6 +201,9 @@ def dummy_prefix(tmpdir): | ||||
|     with open(data, "w") as f: | ||||
|         f.write("hello world") | ||||
| 
 | ||||
|     with open(p.join(".spack", "binary_distribution"), "w") as f: | ||||
|         f.write("{}") | ||||
| 
 | ||||
|     os.symlink("app", relative_app_link) | ||||
|     os.symlink(app, absolute_app_link) | ||||
| 
 | ||||
| @@ -887,24 +890,29 @@ def urlopen(request: urllib.request.Request): | ||||
|         fetcher.conditional_fetch() | ||||
| 
 | ||||
| 
 | ||||
| def test_tarball_doesnt_include_buildinfo_twice(tmpdir): | ||||
| def _all_parents(prefix): | ||||
|     parts = [p for p in prefix.split("/")] | ||||
|     return ["/".join(parts[: i + 1]) for i in range(len(parts))] | ||||
| 
 | ||||
| 
 | ||||
| def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path): | ||||
|     """When tarballing a package that was installed from a buildcache, make | ||||
|     sure that the buildinfo file is not included twice in the tarball.""" | ||||
|     p = tmpdir.mkdir("prefix") | ||||
|     p.mkdir(".spack") | ||||
|     p = tmp_path / "prefix" | ||||
|     p.joinpath(".spack").mkdir(parents=True) | ||||
| 
 | ||||
|     # Create a binary_distribution file in the .spack folder | ||||
|     with open(p.join(".spack", "binary_distribution"), "w") as f: | ||||
|     with open(p / ".spack" / "binary_distribution", "w") as f: | ||||
|         f.write(syaml.dump({"metadata", "old"})) | ||||
| 
 | ||||
|     # Now create a tarball, which should include a new binary_distribution file | ||||
|     tarball = str(tmpdir.join("prefix.tar.gz")) | ||||
|     tarball = str(tmp_path / "prefix.tar.gz") | ||||
| 
 | ||||
|     bindist._do_create_tarball( | ||||
|         tarfile_path=tarball, binaries_dir=p.strpath, buildinfo={"metadata": "new"} | ||||
|         tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"} | ||||
|     ) | ||||
| 
 | ||||
|     expected_prefix = p.strpath.lstrip("/") | ||||
|     expected_prefix = str(p).lstrip("/") | ||||
| 
 | ||||
|     # Verify we don't have a repeated binary_distribution file, | ||||
|     # and that the tarball contains the new one, not the old one. | ||||
| @@ -913,21 +921,20 @@ def test_tarball_doesnt_include_buildinfo_twice(tmpdir): | ||||
|             "metadata": "new" | ||||
|         } | ||||
|         assert tar.getnames() == [ | ||||
|             f"{expected_prefix}", | ||||
|             *_all_parents(expected_prefix), | ||||
|             f"{expected_prefix}/.spack", | ||||
|             f"{expected_prefix}/.spack/binary_distribution", | ||||
|         ] | ||||
| 
 | ||||
| 
 | ||||
| def test_reproducible_tarball_is_reproducible(tmpdir): | ||||
|     p = tmpdir.mkdir("prefix") | ||||
|     p.mkdir("bin") | ||||
|     p.mkdir(".spack") | ||||
| def test_reproducible_tarball_is_reproducible(tmp_path: Path): | ||||
|     p = tmp_path / "prefix" | ||||
|     p.joinpath("bin").mkdir(parents=True) | ||||
|     p.joinpath(".spack").mkdir(parents=True) | ||||
|     app = p / "bin" / "app" | ||||
| 
 | ||||
|     app = p.join("bin", "app") | ||||
| 
 | ||||
|     tarball_1 = str(tmpdir.join("prefix-1.tar.gz")) | ||||
|     tarball_2 = str(tmpdir.join("prefix-2.tar.gz")) | ||||
|     tarball_1 = str(tmp_path / "prefix-1.tar.gz") | ||||
|     tarball_2 = str(tmp_path / "prefix-2.tar.gz") | ||||
| 
 | ||||
|     with open(app, "w") as f: | ||||
|         f.write("hello world") | ||||
| @@ -936,16 +943,16 @@ def test_reproducible_tarball_is_reproducible(tmpdir): | ||||
| 
 | ||||
|     # Create a tarball with a certain mtime of bin/app | ||||
|     os.utime(app, times=(0, 0)) | ||||
|     bindist._do_create_tarball(tarball_1, binaries_dir=p.strpath, buildinfo=buildinfo) | ||||
|     bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo) | ||||
| 
 | ||||
|     # Do it another time with different mtime of bin/app | ||||
|     os.utime(app, times=(10, 10)) | ||||
|     bindist._do_create_tarball(tarball_2, binaries_dir=p.strpath, buildinfo=buildinfo) | ||||
|     bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo) | ||||
| 
 | ||||
|     # They should be bitwise identical: | ||||
|     assert filecmp.cmp(tarball_1, tarball_2, shallow=False) | ||||
| 
 | ||||
|     expected_prefix = p.strpath.lstrip("/") | ||||
|     expected_prefix = str(p).lstrip("/") | ||||
| 
 | ||||
|     # Sanity check for contents: | ||||
|     with tarfile.open(tarball_1, mode="r") as f: | ||||
| @@ -954,7 +961,7 @@ def test_reproducible_tarball_is_reproducible(tmpdir): | ||||
|             assert m.uname == m.gname == "" | ||||
| 
 | ||||
|         assert set(f.getnames()) == { | ||||
|             f"{expected_prefix}", | ||||
|             *_all_parents(expected_prefix), | ||||
|             f"{expected_prefix}/bin", | ||||
|             f"{expected_prefix}/bin/app", | ||||
|             f"{expected_prefix}/.spack", | ||||
| @@ -1002,8 +1009,10 @@ def test_tarball_normalized_permissions(tmpdir): | ||||
| 
 | ||||
| 
 | ||||
| def test_tarball_common_prefix(dummy_prefix, tmpdir): | ||||
|     """Tests whether Spack can figure out the package directory | ||||
|     from the tarball contents, and strip them when extracting.""" | ||||
|     """Tests whether Spack can figure out the package directory from the tarball contents, and | ||||
|     strip them when extracting. This test creates a CURRENT_BUILD_CACHE_LAYOUT_VERSION=1 type | ||||
|     tarball where the parent directories of the package prefix are missing. Spack should be able | ||||
|     to figure out the common prefix and extract the files into the correct location.""" | ||||
| 
 | ||||
|     # When creating a tarball, Python (and tar) use relative paths, | ||||
|     # Absolute paths become relative to `/`, so drop the leading `/`. | ||||
| @@ -1020,11 +1029,10 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir): | ||||
|             common_prefix = bindist._ensure_common_prefix(tar) | ||||
|             assert common_prefix == expected_prefix | ||||
| 
 | ||||
|             # Strip the prefix from the tar entries | ||||
|             bindist._tar_strip_component(tar, common_prefix) | ||||
| 
 | ||||
|             # Extract into prefix2 | ||||
|             tar.extractall(path="prefix2") | ||||
|             tar.extractall( | ||||
|                 path="prefix2", members=bindist._tar_strip_component(tar, common_prefix) | ||||
|             ) | ||||
| 
 | ||||
|         # Verify files are all there at the correct level. | ||||
|         assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"} | ||||
| @@ -1044,13 +1052,30 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir): | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| def test_tarfile_missing_binary_distribution_file(tmpdir): | ||||
|     """A tarfile that does not contain a .spack/binary_distribution file cannot be | ||||
|     used to install.""" | ||||
|     with tmpdir.as_cwd(): | ||||
|         # An empty .spack dir. | ||||
|         with tarfile.open("empty.tar", mode="w") as tar: | ||||
|             tarinfo = tarfile.TarInfo(name="example/.spack") | ||||
|             tarinfo.type = tarfile.DIRTYPE | ||||
|             tar.addfile(tarinfo) | ||||
| 
 | ||||
|         with pytest.raises(ValueError, match="missing binary_distribution file"): | ||||
|             bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r")) | ||||
| 
 | ||||
| 
 | ||||
| def test_tarfile_without_common_directory_prefix_fails(tmpdir): | ||||
|     """A tarfile that only contains files without a common package directory | ||||
|     should fail to extract, as we won't know where to put the files.""" | ||||
|     with tmpdir.as_cwd(): | ||||
|         # Create a broken tarball with just a file, no directories. | ||||
|         with tarfile.open("empty.tar", mode="w") as tar: | ||||
|             tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello")) | ||||
|             tar.addfile( | ||||
|                 tarfile.TarInfo(name="example/.spack/binary_distribution"), | ||||
|                 fileobj=io.BytesIO(b"hello"), | ||||
|             ) | ||||
| 
 | ||||
|         with pytest.raises(ValueError, match="Tarball does not contain a common prefix"): | ||||
|             bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r")) | ||||
| @@ -1091,7 +1116,7 @@ def test_tarfile_of_spec_prefix(tmpdir): | ||||
|         # Verify that entries are added in depth-first pre-order, files preceding dirs, | ||||
|         # entries ordered alphabetically | ||||
|         assert tar.getnames() == [ | ||||
|             f"{expected_prefix}", | ||||
|             *_all_parents(expected_prefix), | ||||
|             f"{expected_prefix}/file", | ||||
|             f"{expected_prefix}/hardlink", | ||||
|             f"{expected_prefix}/symlink", | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Harmen Stoppels
					Harmen Stoppels