binary_distribution.py: list parent dirs in binary tarball (#41773)

* Bump the build cache layout version from 1 to 2
* Version to lists parent directories of the prefix in the tarball too, which is required from some container runtimes
This commit is contained in:
Harmen Stoppels 2024-01-10 13:21:15 +01:00 committed by GitHub
parent feebd35f91
commit 8c1226ece8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 116 additions and 46 deletions

View File

@ -68,7 +68,10 @@
BUILD_CACHE_RELATIVE_PATH = "build_cache"
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
#: The build cache layout version that this version of Spack creates.
#: Version 2: includes parent directories of the package prefix in the tarball
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 2
class BuildCacheDatabase(spack_db.Database):
@ -1174,6 +1177,16 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
except OSError:
files_to_skip = []
# First add all directories leading up to `prefix` (Spack <= 0.21 did not do this, leading to
# issues when tarballs are used in runtimes like AWS lambda). Skip the file system root.
parent_dirs = reversed(pathlib.Path(prefix).parents)
next(parent_dirs) # skip the root: slices are supported from python 3.10
for parent_dir in parent_dirs:
dir_info = tarfile.TarInfo(_tarinfo_name(str(parent_dir)))
dir_info.type = tarfile.DIRTYPE
dir_info.mode = 0o755
tar.addfile(dir_info)
dir_stack = [prefix]
while dir_stack:
dir = dir_stack.pop()
@ -2047,11 +2060,12 @@ def _extract_inner_tarball(spec, filename, extract_to, signature_required: bool,
def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
"""Strip the top-level directory `prefix` from the member names in a tarfile."""
"""Yield all members of tarfile that start with given prefix, and strip that prefix (including
symlinks)"""
# Including trailing /, otherwise we end up with absolute paths.
regex = re.compile(re.escape(prefix) + "/*")
# Remove the top-level directory from the member (link)names.
# Only yield members in the package prefix.
# Note: when a tarfile is created, relative in-prefix symlinks are
# expanded to matching member names of tarfile entries. So, we have
# to ensure that those are updated too.
@ -2059,12 +2073,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
# them.
for m in tar.getmembers():
result = regex.match(m.name)
assert result is not None
if not result:
continue
m.name = m.name[result.end() :]
if m.linkname:
result = regex.match(m.linkname)
if result:
m.linkname = m.linkname[result.end() :]
yield m
def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
@ -2110,7 +2126,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
_delete_staged_downloads(download_result)
shutil.rmtree(tmpdir)
raise e
elif layout_version == 1:
elif 1 <= layout_version <= 2:
# Newer buildcache layout: the .spack file contains just
# in the install tree, the signature, if it exists, is
# wrapped around the spec.json at the root. If sig verify
@ -2138,8 +2154,10 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
try:
with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove install prefix from tarfil to extract directly into spec.prefix
_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
tar.extractall(path=spec.prefix)
tar.extractall(
path=spec.prefix,
members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)),
)
except Exception:
shutil.rmtree(spec.prefix, ignore_errors=True)
_delete_staged_downloads(download_result)
@ -2174,20 +2192,47 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
# Get the shortest length directory.
common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None)
# Find the lowest `binary_distribution` file (hard-coded forward slash is on purpose).
binary_distribution = min(
(
e.name
for e in tar.getmembers()
if e.isfile() and e.name.endswith(".spack/binary_distribution")
),
key=len,
default=None,
)
if common_prefix is None:
raise ValueError("Tarball does not contain a common prefix")
if binary_distribution is None:
raise ValueError("Tarball is not a Spack package, missing binary_distribution file")
# Validate that each file starts with the prefix
pkg_path = pathlib.PurePosixPath(binary_distribution).parent.parent
# Even the most ancient Spack version has required to list the dir of the package itself, so
# guard against broken tarballs where `path.parent.parent` is empty.
if pkg_path == pathlib.PurePosixPath():
raise ValueError("Invalid tarball, missing package prefix dir")
pkg_prefix = str(pkg_path)
# Ensure all tar entries are in the pkg_prefix dir, and if they're not, they should be parent
# dirs of it.
has_prefix = False
for member in tar.getmembers():
if not member.name.startswith(common_prefix):
raise ValueError(
f"Tarball contains file {member.name} outside of prefix {common_prefix}"
)
stripped = member.name.rstrip("/")
if not (
stripped.startswith(pkg_prefix) or member.isdir() and pkg_prefix.startswith(stripped)
):
raise ValueError(f"Tarball contains file {stripped} outside of prefix {pkg_prefix}")
if member.isdir() and stripped == pkg_prefix:
has_prefix = True
return common_prefix
# This is technically not required, but let's be defensive about the existence of the package
# prefix dir.
if not has_prefix:
raise ValueError(f"Tarball does not contain a common prefix {pkg_prefix}")
return pkg_prefix
def install_root_node(spec, unsigned=False, force=False, sha256=None):

View File

@ -14,7 +14,7 @@
import urllib.error
import urllib.request
import urllib.response
from pathlib import PurePath
from pathlib import Path, PurePath
import py
import pytest
@ -201,6 +201,9 @@ def dummy_prefix(tmpdir):
with open(data, "w") as f:
f.write("hello world")
with open(p.join(".spack", "binary_distribution"), "w") as f:
f.write("{}")
os.symlink("app", relative_app_link)
os.symlink(app, absolute_app_link)
@ -887,24 +890,29 @@ def urlopen(request: urllib.request.Request):
fetcher.conditional_fetch()
def test_tarball_doesnt_include_buildinfo_twice(tmpdir):
def _all_parents(prefix):
parts = [p for p in prefix.split("/")]
return ["/".join(parts[: i + 1]) for i in range(len(parts))]
def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
"""When tarballing a package that was installed from a buildcache, make
sure that the buildinfo file is not included twice in the tarball."""
p = tmpdir.mkdir("prefix")
p.mkdir(".spack")
p = tmp_path / "prefix"
p.joinpath(".spack").mkdir(parents=True)
# Create a binary_distribution file in the .spack folder
with open(p.join(".spack", "binary_distribution"), "w") as f:
with open(p / ".spack" / "binary_distribution", "w") as f:
f.write(syaml.dump({"metadata", "old"}))
# Now create a tarball, which should include a new binary_distribution file
tarball = str(tmpdir.join("prefix.tar.gz"))
tarball = str(tmp_path / "prefix.tar.gz")
bindist._do_create_tarball(
tarfile_path=tarball, binaries_dir=p.strpath, buildinfo={"metadata": "new"}
tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"}
)
expected_prefix = p.strpath.lstrip("/")
expected_prefix = str(p).lstrip("/")
# Verify we don't have a repeated binary_distribution file,
# and that the tarball contains the new one, not the old one.
@ -913,21 +921,20 @@ def test_tarball_doesnt_include_buildinfo_twice(tmpdir):
"metadata": "new"
}
assert tar.getnames() == [
f"{expected_prefix}",
*_all_parents(expected_prefix),
f"{expected_prefix}/.spack",
f"{expected_prefix}/.spack/binary_distribution",
]
def test_reproducible_tarball_is_reproducible(tmpdir):
p = tmpdir.mkdir("prefix")
p.mkdir("bin")
p.mkdir(".spack")
def test_reproducible_tarball_is_reproducible(tmp_path: Path):
p = tmp_path / "prefix"
p.joinpath("bin").mkdir(parents=True)
p.joinpath(".spack").mkdir(parents=True)
app = p / "bin" / "app"
app = p.join("bin", "app")
tarball_1 = str(tmpdir.join("prefix-1.tar.gz"))
tarball_2 = str(tmpdir.join("prefix-2.tar.gz"))
tarball_1 = str(tmp_path / "prefix-1.tar.gz")
tarball_2 = str(tmp_path / "prefix-2.tar.gz")
with open(app, "w") as f:
f.write("hello world")
@ -936,16 +943,16 @@ def test_reproducible_tarball_is_reproducible(tmpdir):
# Create a tarball with a certain mtime of bin/app
os.utime(app, times=(0, 0))
bindist._do_create_tarball(tarball_1, binaries_dir=p.strpath, buildinfo=buildinfo)
bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo)
# Do it another time with different mtime of bin/app
os.utime(app, times=(10, 10))
bindist._do_create_tarball(tarball_2, binaries_dir=p.strpath, buildinfo=buildinfo)
bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo)
# They should be bitwise identical:
assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
expected_prefix = p.strpath.lstrip("/")
expected_prefix = str(p).lstrip("/")
# Sanity check for contents:
with tarfile.open(tarball_1, mode="r") as f:
@ -954,7 +961,7 @@ def test_reproducible_tarball_is_reproducible(tmpdir):
assert m.uname == m.gname == ""
assert set(f.getnames()) == {
f"{expected_prefix}",
*_all_parents(expected_prefix),
f"{expected_prefix}/bin",
f"{expected_prefix}/bin/app",
f"{expected_prefix}/.spack",
@ -1002,8 +1009,10 @@ def test_tarball_normalized_permissions(tmpdir):
def test_tarball_common_prefix(dummy_prefix, tmpdir):
"""Tests whether Spack can figure out the package directory
from the tarball contents, and strip them when extracting."""
"""Tests whether Spack can figure out the package directory from the tarball contents, and
strip them when extracting. This test creates a CURRENT_BUILD_CACHE_LAYOUT_VERSION=1 type
tarball where the parent directories of the package prefix are missing. Spack should be able
to figure out the common prefix and extract the files into the correct location."""
# When creating a tarball, Python (and tar) use relative paths,
# Absolute paths become relative to `/`, so drop the leading `/`.
@ -1020,11 +1029,10 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
common_prefix = bindist._ensure_common_prefix(tar)
assert common_prefix == expected_prefix
# Strip the prefix from the tar entries
bindist._tar_strip_component(tar, common_prefix)
# Extract into prefix2
tar.extractall(path="prefix2")
tar.extractall(
path="prefix2", members=bindist._tar_strip_component(tar, common_prefix)
)
# Verify files are all there at the correct level.
assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"}
@ -1044,13 +1052,30 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
)
def test_tarfile_missing_binary_distribution_file(tmpdir):
"""A tarfile that does not contain a .spack/binary_distribution file cannot be
used to install."""
with tmpdir.as_cwd():
# An empty .spack dir.
with tarfile.open("empty.tar", mode="w") as tar:
tarinfo = tarfile.TarInfo(name="example/.spack")
tarinfo.type = tarfile.DIRTYPE
tar.addfile(tarinfo)
with pytest.raises(ValueError, match="missing binary_distribution file"):
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
def test_tarfile_without_common_directory_prefix_fails(tmpdir):
"""A tarfile that only contains files without a common package directory
should fail to extract, as we won't know where to put the files."""
with tmpdir.as_cwd():
# Create a broken tarball with just a file, no directories.
with tarfile.open("empty.tar", mode="w") as tar:
tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello"))
tar.addfile(
tarfile.TarInfo(name="example/.spack/binary_distribution"),
fileobj=io.BytesIO(b"hello"),
)
with pytest.raises(ValueError, match="Tarball does not contain a common prefix"):
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
@ -1091,7 +1116,7 @@ def test_tarfile_of_spec_prefix(tmpdir):
# Verify that entries are added in depth-first pre-order, files preceding dirs,
# entries ordered alphabetically
assert tar.getnames() == [
f"{expected_prefix}",
*_all_parents(expected_prefix),
f"{expected_prefix}/file",
f"{expected_prefix}/hardlink",
f"{expected_prefix}/symlink",