binary_distribution.py: list parent dirs in binary tarball (#41773)
* Bump the build cache layout version from 1 to 2 * Version to lists parent directories of the prefix in the tarball too, which is required from some container runtimes
This commit is contained in:
parent
feebd35f91
commit
8c1226ece8
@ -68,7 +68,10 @@
|
|||||||
|
|
||||||
BUILD_CACHE_RELATIVE_PATH = "build_cache"
|
BUILD_CACHE_RELATIVE_PATH = "build_cache"
|
||||||
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
|
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
|
||||||
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
|
|
||||||
|
#: The build cache layout version that this version of Spack creates.
|
||||||
|
#: Version 2: includes parent directories of the package prefix in the tarball
|
||||||
|
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 2
|
||||||
|
|
||||||
|
|
||||||
class BuildCacheDatabase(spack_db.Database):
|
class BuildCacheDatabase(spack_db.Database):
|
||||||
@ -1174,6 +1177,16 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
|
|||||||
except OSError:
|
except OSError:
|
||||||
files_to_skip = []
|
files_to_skip = []
|
||||||
|
|
||||||
|
# First add all directories leading up to `prefix` (Spack <= 0.21 did not do this, leading to
|
||||||
|
# issues when tarballs are used in runtimes like AWS lambda). Skip the file system root.
|
||||||
|
parent_dirs = reversed(pathlib.Path(prefix).parents)
|
||||||
|
next(parent_dirs) # skip the root: slices are supported from python 3.10
|
||||||
|
for parent_dir in parent_dirs:
|
||||||
|
dir_info = tarfile.TarInfo(_tarinfo_name(str(parent_dir)))
|
||||||
|
dir_info.type = tarfile.DIRTYPE
|
||||||
|
dir_info.mode = 0o755
|
||||||
|
tar.addfile(dir_info)
|
||||||
|
|
||||||
dir_stack = [prefix]
|
dir_stack = [prefix]
|
||||||
while dir_stack:
|
while dir_stack:
|
||||||
dir = dir_stack.pop()
|
dir = dir_stack.pop()
|
||||||
@ -2047,11 +2060,12 @@ def _extract_inner_tarball(spec, filename, extract_to, signature_required: bool,
|
|||||||
|
|
||||||
|
|
||||||
def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
|
def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
|
||||||
"""Strip the top-level directory `prefix` from the member names in a tarfile."""
|
"""Yield all members of tarfile that start with given prefix, and strip that prefix (including
|
||||||
|
symlinks)"""
|
||||||
# Including trailing /, otherwise we end up with absolute paths.
|
# Including trailing /, otherwise we end up with absolute paths.
|
||||||
regex = re.compile(re.escape(prefix) + "/*")
|
regex = re.compile(re.escape(prefix) + "/*")
|
||||||
|
|
||||||
# Remove the top-level directory from the member (link)names.
|
# Only yield members in the package prefix.
|
||||||
# Note: when a tarfile is created, relative in-prefix symlinks are
|
# Note: when a tarfile is created, relative in-prefix symlinks are
|
||||||
# expanded to matching member names of tarfile entries. So, we have
|
# expanded to matching member names of tarfile entries. So, we have
|
||||||
# to ensure that those are updated too.
|
# to ensure that those are updated too.
|
||||||
@ -2059,12 +2073,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
|
|||||||
# them.
|
# them.
|
||||||
for m in tar.getmembers():
|
for m in tar.getmembers():
|
||||||
result = regex.match(m.name)
|
result = regex.match(m.name)
|
||||||
assert result is not None
|
if not result:
|
||||||
|
continue
|
||||||
m.name = m.name[result.end() :]
|
m.name = m.name[result.end() :]
|
||||||
if m.linkname:
|
if m.linkname:
|
||||||
result = regex.match(m.linkname)
|
result = regex.match(m.linkname)
|
||||||
if result:
|
if result:
|
||||||
m.linkname = m.linkname[result.end() :]
|
m.linkname = m.linkname[result.end() :]
|
||||||
|
yield m
|
||||||
|
|
||||||
|
|
||||||
def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
||||||
@ -2110,7 +2126,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
|||||||
_delete_staged_downloads(download_result)
|
_delete_staged_downloads(download_result)
|
||||||
shutil.rmtree(tmpdir)
|
shutil.rmtree(tmpdir)
|
||||||
raise e
|
raise e
|
||||||
elif layout_version == 1:
|
elif 1 <= layout_version <= 2:
|
||||||
# Newer buildcache layout: the .spack file contains just
|
# Newer buildcache layout: the .spack file contains just
|
||||||
# in the install tree, the signature, if it exists, is
|
# in the install tree, the signature, if it exists, is
|
||||||
# wrapped around the spec.json at the root. If sig verify
|
# wrapped around the spec.json at the root. If sig verify
|
||||||
@ -2138,8 +2154,10 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
|||||||
try:
|
try:
|
||||||
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
||||||
# Remove install prefix from tarfil to extract directly into spec.prefix
|
# Remove install prefix from tarfil to extract directly into spec.prefix
|
||||||
_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
|
tar.extractall(
|
||||||
tar.extractall(path=spec.prefix)
|
path=spec.prefix,
|
||||||
|
members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)),
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
shutil.rmtree(spec.prefix, ignore_errors=True)
|
shutil.rmtree(spec.prefix, ignore_errors=True)
|
||||||
_delete_staged_downloads(download_result)
|
_delete_staged_downloads(download_result)
|
||||||
@ -2174,20 +2192,47 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
|||||||
|
|
||||||
|
|
||||||
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
|
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
|
||||||
# Get the shortest length directory.
|
# Find the lowest `binary_distribution` file (hard-coded forward slash is on purpose).
|
||||||
common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None)
|
binary_distribution = min(
|
||||||
|
(
|
||||||
|
e.name
|
||||||
|
for e in tar.getmembers()
|
||||||
|
if e.isfile() and e.name.endswith(".spack/binary_distribution")
|
||||||
|
),
|
||||||
|
key=len,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
if common_prefix is None:
|
if binary_distribution is None:
|
||||||
raise ValueError("Tarball does not contain a common prefix")
|
raise ValueError("Tarball is not a Spack package, missing binary_distribution file")
|
||||||
|
|
||||||
# Validate that each file starts with the prefix
|
pkg_path = pathlib.PurePosixPath(binary_distribution).parent.parent
|
||||||
|
|
||||||
|
# Even the most ancient Spack version has required to list the dir of the package itself, so
|
||||||
|
# guard against broken tarballs where `path.parent.parent` is empty.
|
||||||
|
if pkg_path == pathlib.PurePosixPath():
|
||||||
|
raise ValueError("Invalid tarball, missing package prefix dir")
|
||||||
|
|
||||||
|
pkg_prefix = str(pkg_path)
|
||||||
|
|
||||||
|
# Ensure all tar entries are in the pkg_prefix dir, and if they're not, they should be parent
|
||||||
|
# dirs of it.
|
||||||
|
has_prefix = False
|
||||||
for member in tar.getmembers():
|
for member in tar.getmembers():
|
||||||
if not member.name.startswith(common_prefix):
|
stripped = member.name.rstrip("/")
|
||||||
raise ValueError(
|
if not (
|
||||||
f"Tarball contains file {member.name} outside of prefix {common_prefix}"
|
stripped.startswith(pkg_prefix) or member.isdir() and pkg_prefix.startswith(stripped)
|
||||||
)
|
):
|
||||||
|
raise ValueError(f"Tarball contains file {stripped} outside of prefix {pkg_prefix}")
|
||||||
|
if member.isdir() and stripped == pkg_prefix:
|
||||||
|
has_prefix = True
|
||||||
|
|
||||||
return common_prefix
|
# This is technically not required, but let's be defensive about the existence of the package
|
||||||
|
# prefix dir.
|
||||||
|
if not has_prefix:
|
||||||
|
raise ValueError(f"Tarball does not contain a common prefix {pkg_prefix}")
|
||||||
|
|
||||||
|
return pkg_prefix
|
||||||
|
|
||||||
|
|
||||||
def install_root_node(spec, unsigned=False, force=False, sha256=None):
|
def install_root_node(spec, unsigned=False, force=False, sha256=None):
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.response
|
import urllib.response
|
||||||
from pathlib import PurePath
|
from pathlib import Path, PurePath
|
||||||
|
|
||||||
import py
|
import py
|
||||||
import pytest
|
import pytest
|
||||||
@ -201,6 +201,9 @@ def dummy_prefix(tmpdir):
|
|||||||
with open(data, "w") as f:
|
with open(data, "w") as f:
|
||||||
f.write("hello world")
|
f.write("hello world")
|
||||||
|
|
||||||
|
with open(p.join(".spack", "binary_distribution"), "w") as f:
|
||||||
|
f.write("{}")
|
||||||
|
|
||||||
os.symlink("app", relative_app_link)
|
os.symlink("app", relative_app_link)
|
||||||
os.symlink(app, absolute_app_link)
|
os.symlink(app, absolute_app_link)
|
||||||
|
|
||||||
@ -887,24 +890,29 @@ def urlopen(request: urllib.request.Request):
|
|||||||
fetcher.conditional_fetch()
|
fetcher.conditional_fetch()
|
||||||
|
|
||||||
|
|
||||||
def test_tarball_doesnt_include_buildinfo_twice(tmpdir):
|
def _all_parents(prefix):
|
||||||
|
parts = [p for p in prefix.split("/")]
|
||||||
|
return ["/".join(parts[: i + 1]) for i in range(len(parts))]
|
||||||
|
|
||||||
|
|
||||||
|
def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
|
||||||
"""When tarballing a package that was installed from a buildcache, make
|
"""When tarballing a package that was installed from a buildcache, make
|
||||||
sure that the buildinfo file is not included twice in the tarball."""
|
sure that the buildinfo file is not included twice in the tarball."""
|
||||||
p = tmpdir.mkdir("prefix")
|
p = tmp_path / "prefix"
|
||||||
p.mkdir(".spack")
|
p.joinpath(".spack").mkdir(parents=True)
|
||||||
|
|
||||||
# Create a binary_distribution file in the .spack folder
|
# Create a binary_distribution file in the .spack folder
|
||||||
with open(p.join(".spack", "binary_distribution"), "w") as f:
|
with open(p / ".spack" / "binary_distribution", "w") as f:
|
||||||
f.write(syaml.dump({"metadata", "old"}))
|
f.write(syaml.dump({"metadata", "old"}))
|
||||||
|
|
||||||
# Now create a tarball, which should include a new binary_distribution file
|
# Now create a tarball, which should include a new binary_distribution file
|
||||||
tarball = str(tmpdir.join("prefix.tar.gz"))
|
tarball = str(tmp_path / "prefix.tar.gz")
|
||||||
|
|
||||||
bindist._do_create_tarball(
|
bindist._do_create_tarball(
|
||||||
tarfile_path=tarball, binaries_dir=p.strpath, buildinfo={"metadata": "new"}
|
tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"}
|
||||||
)
|
)
|
||||||
|
|
||||||
expected_prefix = p.strpath.lstrip("/")
|
expected_prefix = str(p).lstrip("/")
|
||||||
|
|
||||||
# Verify we don't have a repeated binary_distribution file,
|
# Verify we don't have a repeated binary_distribution file,
|
||||||
# and that the tarball contains the new one, not the old one.
|
# and that the tarball contains the new one, not the old one.
|
||||||
@ -913,21 +921,20 @@ def test_tarball_doesnt_include_buildinfo_twice(tmpdir):
|
|||||||
"metadata": "new"
|
"metadata": "new"
|
||||||
}
|
}
|
||||||
assert tar.getnames() == [
|
assert tar.getnames() == [
|
||||||
f"{expected_prefix}",
|
*_all_parents(expected_prefix),
|
||||||
f"{expected_prefix}/.spack",
|
f"{expected_prefix}/.spack",
|
||||||
f"{expected_prefix}/.spack/binary_distribution",
|
f"{expected_prefix}/.spack/binary_distribution",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_reproducible_tarball_is_reproducible(tmpdir):
|
def test_reproducible_tarball_is_reproducible(tmp_path: Path):
|
||||||
p = tmpdir.mkdir("prefix")
|
p = tmp_path / "prefix"
|
||||||
p.mkdir("bin")
|
p.joinpath("bin").mkdir(parents=True)
|
||||||
p.mkdir(".spack")
|
p.joinpath(".spack").mkdir(parents=True)
|
||||||
|
app = p / "bin" / "app"
|
||||||
|
|
||||||
app = p.join("bin", "app")
|
tarball_1 = str(tmp_path / "prefix-1.tar.gz")
|
||||||
|
tarball_2 = str(tmp_path / "prefix-2.tar.gz")
|
||||||
tarball_1 = str(tmpdir.join("prefix-1.tar.gz"))
|
|
||||||
tarball_2 = str(tmpdir.join("prefix-2.tar.gz"))
|
|
||||||
|
|
||||||
with open(app, "w") as f:
|
with open(app, "w") as f:
|
||||||
f.write("hello world")
|
f.write("hello world")
|
||||||
@ -936,16 +943,16 @@ def test_reproducible_tarball_is_reproducible(tmpdir):
|
|||||||
|
|
||||||
# Create a tarball with a certain mtime of bin/app
|
# Create a tarball with a certain mtime of bin/app
|
||||||
os.utime(app, times=(0, 0))
|
os.utime(app, times=(0, 0))
|
||||||
bindist._do_create_tarball(tarball_1, binaries_dir=p.strpath, buildinfo=buildinfo)
|
bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo)
|
||||||
|
|
||||||
# Do it another time with different mtime of bin/app
|
# Do it another time with different mtime of bin/app
|
||||||
os.utime(app, times=(10, 10))
|
os.utime(app, times=(10, 10))
|
||||||
bindist._do_create_tarball(tarball_2, binaries_dir=p.strpath, buildinfo=buildinfo)
|
bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo)
|
||||||
|
|
||||||
# They should be bitwise identical:
|
# They should be bitwise identical:
|
||||||
assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
|
assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
|
||||||
|
|
||||||
expected_prefix = p.strpath.lstrip("/")
|
expected_prefix = str(p).lstrip("/")
|
||||||
|
|
||||||
# Sanity check for contents:
|
# Sanity check for contents:
|
||||||
with tarfile.open(tarball_1, mode="r") as f:
|
with tarfile.open(tarball_1, mode="r") as f:
|
||||||
@ -954,7 +961,7 @@ def test_reproducible_tarball_is_reproducible(tmpdir):
|
|||||||
assert m.uname == m.gname == ""
|
assert m.uname == m.gname == ""
|
||||||
|
|
||||||
assert set(f.getnames()) == {
|
assert set(f.getnames()) == {
|
||||||
f"{expected_prefix}",
|
*_all_parents(expected_prefix),
|
||||||
f"{expected_prefix}/bin",
|
f"{expected_prefix}/bin",
|
||||||
f"{expected_prefix}/bin/app",
|
f"{expected_prefix}/bin/app",
|
||||||
f"{expected_prefix}/.spack",
|
f"{expected_prefix}/.spack",
|
||||||
@ -1002,8 +1009,10 @@ def test_tarball_normalized_permissions(tmpdir):
|
|||||||
|
|
||||||
|
|
||||||
def test_tarball_common_prefix(dummy_prefix, tmpdir):
|
def test_tarball_common_prefix(dummy_prefix, tmpdir):
|
||||||
"""Tests whether Spack can figure out the package directory
|
"""Tests whether Spack can figure out the package directory from the tarball contents, and
|
||||||
from the tarball contents, and strip them when extracting."""
|
strip them when extracting. This test creates a CURRENT_BUILD_CACHE_LAYOUT_VERSION=1 type
|
||||||
|
tarball where the parent directories of the package prefix are missing. Spack should be able
|
||||||
|
to figure out the common prefix and extract the files into the correct location."""
|
||||||
|
|
||||||
# When creating a tarball, Python (and tar) use relative paths,
|
# When creating a tarball, Python (and tar) use relative paths,
|
||||||
# Absolute paths become relative to `/`, so drop the leading `/`.
|
# Absolute paths become relative to `/`, so drop the leading `/`.
|
||||||
@ -1020,11 +1029,10 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
|
|||||||
common_prefix = bindist._ensure_common_prefix(tar)
|
common_prefix = bindist._ensure_common_prefix(tar)
|
||||||
assert common_prefix == expected_prefix
|
assert common_prefix == expected_prefix
|
||||||
|
|
||||||
# Strip the prefix from the tar entries
|
|
||||||
bindist._tar_strip_component(tar, common_prefix)
|
|
||||||
|
|
||||||
# Extract into prefix2
|
# Extract into prefix2
|
||||||
tar.extractall(path="prefix2")
|
tar.extractall(
|
||||||
|
path="prefix2", members=bindist._tar_strip_component(tar, common_prefix)
|
||||||
|
)
|
||||||
|
|
||||||
# Verify files are all there at the correct level.
|
# Verify files are all there at the correct level.
|
||||||
assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"}
|
assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"}
|
||||||
@ -1044,13 +1052,30 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_tarfile_missing_binary_distribution_file(tmpdir):
|
||||||
|
"""A tarfile that does not contain a .spack/binary_distribution file cannot be
|
||||||
|
used to install."""
|
||||||
|
with tmpdir.as_cwd():
|
||||||
|
# An empty .spack dir.
|
||||||
|
with tarfile.open("empty.tar", mode="w") as tar:
|
||||||
|
tarinfo = tarfile.TarInfo(name="example/.spack")
|
||||||
|
tarinfo.type = tarfile.DIRTYPE
|
||||||
|
tar.addfile(tarinfo)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="missing binary_distribution file"):
|
||||||
|
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
|
||||||
|
|
||||||
|
|
||||||
def test_tarfile_without_common_directory_prefix_fails(tmpdir):
|
def test_tarfile_without_common_directory_prefix_fails(tmpdir):
|
||||||
"""A tarfile that only contains files without a common package directory
|
"""A tarfile that only contains files without a common package directory
|
||||||
should fail to extract, as we won't know where to put the files."""
|
should fail to extract, as we won't know where to put the files."""
|
||||||
with tmpdir.as_cwd():
|
with tmpdir.as_cwd():
|
||||||
# Create a broken tarball with just a file, no directories.
|
# Create a broken tarball with just a file, no directories.
|
||||||
with tarfile.open("empty.tar", mode="w") as tar:
|
with tarfile.open("empty.tar", mode="w") as tar:
|
||||||
tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello"))
|
tar.addfile(
|
||||||
|
tarfile.TarInfo(name="example/.spack/binary_distribution"),
|
||||||
|
fileobj=io.BytesIO(b"hello"),
|
||||||
|
)
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="Tarball does not contain a common prefix"):
|
with pytest.raises(ValueError, match="Tarball does not contain a common prefix"):
|
||||||
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
|
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
|
||||||
@ -1091,7 +1116,7 @@ def test_tarfile_of_spec_prefix(tmpdir):
|
|||||||
# Verify that entries are added in depth-first pre-order, files preceding dirs,
|
# Verify that entries are added in depth-first pre-order, files preceding dirs,
|
||||||
# entries ordered alphabetically
|
# entries ordered alphabetically
|
||||||
assert tar.getnames() == [
|
assert tar.getnames() == [
|
||||||
f"{expected_prefix}",
|
*_all_parents(expected_prefix),
|
||||||
f"{expected_prefix}/file",
|
f"{expected_prefix}/file",
|
||||||
f"{expected_prefix}/hardlink",
|
f"{expected_prefix}/hardlink",
|
||||||
f"{expected_prefix}/symlink",
|
f"{expected_prefix}/symlink",
|
||||||
|
Loading…
Reference in New Issue
Block a user