buildcache extractall: extract directly into spec.prefix (#37441)

- Run `mkdirp` on `spec.prefix`
- Extract directly into `spec.prefix`
  1. No need for `$store/tmp.xxx` where we extract the tarball directly, pray that it has one subdir `<name>-<version>-<hash>`, and then `rm -rf` the package prefix followed by `mv`.
  2. No need to clean up this temp dir in `spack clean`.
  3. Instead figure out package directory prefix from the tarball contents, and strip the tarinfo entries accordingly (kinda like tar --strip-components but more strict)
- Set package dir permissions
- Don't error during error handling when files cannot removed
- No need to "enrich" spec.json with this tarball-toplevel-path

After this PR, we can in fact tarball packages relative to `/` instead of `spec.prefix/..`, which makes it possible to use Spack tarballs as container layers, where relocation is impossible, and rootfs tarballs are expected.
This commit is contained in:
Harmen Stoppels 2023-08-02 17:06:13 +02:00 committed by GitHub
parent a14f4b5a02
commit 03c0d74139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 159 additions and 60 deletions

View File

@ -217,13 +217,7 @@ file would live in the ``build_cache`` directory of a binary mirror::
"binary_cache_checksum": {
"hash_algorithm": "sha256",
"hash": "4f1e46452c35a5e61bcacca205bae1bfcd60a83a399af201a29c95b7cc3e1423"
},
"buildinfo": {
"relative_prefix":
"linux-ubuntu18.04-haswell/gcc-7.5.0/zlib-1.2.12-llv2ysfdxnppzjrt5ldybb5c52qbmoow",
"relative_rpaths": false
}
}
}
-----BEGIN PGP SIGNATURE-----

View File

@ -52,6 +52,7 @@
import spack.util.url as url_util
import spack.util.web as web_util
from spack.caches import misc_cache_location
from spack.package_prefs import get_package_dir_permissions, get_package_group
from spack.relocate_text import utf8_paths_to_single_binary_regex
from spack.spec import Spec
from spack.stage import Stage
@ -1312,15 +1313,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
else:
raise ValueError("{0} not a valid spec file type".format(spec_file))
spec_dict["buildcache_layout_version"] = 1
bchecksum = {}
bchecksum["hash_algorithm"] = "sha256"
bchecksum["hash"] = checksum
spec_dict["binary_cache_checksum"] = bchecksum
# Add original install prefix relative to layout root to spec.json.
# This will be used to determine is the directory layout has changed.
buildinfo = {}
buildinfo["relative_prefix"] = os.path.relpath(spec.prefix, spack.store.STORE.layout.root)
spec_dict["buildinfo"] = buildinfo
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
with open(specfile_path, "w") as outfile:
# Note: when using gpg clear sign, we need to avoid long lines (19995 chars).
@ -1799,6 +1792,27 @@ def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum
return tarfile_path
def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
"""Strip the top-level directory `prefix` from the member names in a tarfile."""
# Including trailing /, otherwise we end up with absolute paths.
regex = re.compile(re.escape(prefix) + "/*")
# Remove the top-level directory from the member (link)names.
# Note: when a tarfile is created, relative in-prefix symlinks are
# expanded to matching member names of tarfile entries. So, we have
# to ensure that those are updated too.
# Absolute symlinks are copied verbatim -- relocation should take care of
# them.
for m in tar.getmembers():
result = regex.match(m.name)
assert result is not None
m.name = m.name[result.end() :]
if m.linkname:
result = regex.match(m.linkname)
if result:
m.linkname = m.linkname[result.end() :]
def extract_tarball(spec, download_result, unsigned=False, force=False):
"""
extract binary tarball for given package into install area
@ -1809,6 +1823,14 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
else:
raise NoOverwriteException(str(spec.prefix))
# Create the install prefix
fsys.mkdirp(
spec.prefix,
mode=get_package_dir_permissions(spec),
group=get_package_group(spec),
default_perms="parents",
)
specfile_path = download_result["specfile_stage"].save_filename
with open(specfile_path, "r") as inputfile:
@ -1862,42 +1884,23 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
tarfile_path, size, contents, "sha256", expected, local_checksum
)
new_relative_prefix = str(os.path.relpath(spec.prefix, spack.store.STORE.layout.root))
# if the original relative prefix is in the spec file use it
buildinfo = spec_dict.get("buildinfo", {})
old_relative_prefix = buildinfo.get("relative_prefix", new_relative_prefix)
rel = buildinfo.get("relative_rpaths")
info = "old relative prefix %s\nnew relative prefix %s\nrelative rpaths %s"
tty.debug(info % (old_relative_prefix, new_relative_prefix, rel), level=2)
# Extract the tarball into the store root, presumably on the same filesystem.
# The directory created is the base directory name of the old prefix.
# Moving the old prefix name to the new prefix location should preserve
# hard links and symbolic links.
extract_tmp = os.path.join(spack.store.STORE.layout.root, ".tmp")
mkdirp(extract_tmp)
extracted_dir = os.path.join(extract_tmp, old_relative_prefix.split(os.path.sep)[-1])
with closing(tarfile.open(tarfile_path, "r")) as tar:
try:
tar.extractall(path=extract_tmp)
except Exception as e:
_delete_staged_downloads(download_result)
shutil.rmtree(extracted_dir)
raise e
try:
shutil.move(extracted_dir, spec.prefix)
except Exception as e:
with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove install prefix from tarfil to extract directly into spec.prefix
_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
tar.extractall(path=spec.prefix)
except Exception:
shutil.rmtree(spec.prefix, ignore_errors=True)
_delete_staged_downloads(download_result)
shutil.rmtree(extracted_dir)
raise e
raise
os.remove(tarfile_path)
os.remove(specfile_path)
try:
relocate_package(spec)
except Exception as e:
shutil.rmtree(spec.prefix)
shutil.rmtree(spec.prefix, ignore_errors=True)
raise e
else:
manifest_file = os.path.join(
@ -1910,12 +1913,29 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
tty.warn("No manifest file in tarball for spec %s" % spec_id)
finally:
if tmpdir:
shutil.rmtree(tmpdir)
shutil.rmtree(tmpdir, ignore_errors=True)
if os.path.exists(filename):
os.remove(filename)
_delete_staged_downloads(download_result)
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
# Get the shortest length directory.
common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None)
if common_prefix is None:
raise ValueError("Tarball does not contain a common prefix")
# Validate that each file starts with the prefix
for member in tar.getmembers():
if not member.name.startswith(common_prefix):
raise ValueError(
f"Tarball contains file {member.name} outside of prefix {common_prefix}"
)
return common_prefix
def install_root_node(spec, unsigned=False, force=False, sha256=None):
"""Install the root node of a concrete spec from a buildcache.

View File

@ -114,11 +114,7 @@ def clean(parser, args):
if args.stage:
tty.msg("Removing all temporary build stages")
spack.stage.purge()
# Temp directory where buildcaches are extracted
extract_tmp = os.path.join(spack.store.STORE.layout.root, ".tmp")
if os.path.exists(extract_tmp):
tty.debug("Removing {0}".format(extract_tmp))
shutil.rmtree(extract_tmp)
if args.downloads:
tty.msg("Removing cached downloads")
spack.caches.fetch_cache.destroy()

View File

@ -6,7 +6,7 @@
"""Schema for a buildcache spec.yaml file
.. literalinclude:: _spack_root/lib/spack/spack/schema/buildcache_spec.py
:lines: 14-
:lines: 13-
"""
import spack.schema.spec
@ -16,15 +16,8 @@
"type": "object",
"additionalProperties": False,
"properties": {
"buildinfo": {
"type": "object",
"additionalProperties": False,
"required": ["relative_prefix"],
"properties": {
"relative_prefix": {"type": "string"},
"relative_rpaths": {"type": "boolean"},
},
},
# `buildinfo` is no longer needed as of Spack 0.21
"buildinfo": {"type": "object"},
"spec": {
"type": "object",
"additionalProperties": True,

View File

@ -12,6 +12,7 @@
import urllib.error
import urllib.request
import urllib.response
from pathlib import PurePath
import py
import pytest
@ -177,6 +178,33 @@ def install_dir_non_default_layout(tmpdir):
spack.store.STORE.layout = real_layout
@pytest.fixture(scope="function")
def dummy_prefix(tmpdir):
"""Dummy prefix used for testing tarball creation, validation, extraction"""
p = tmpdir.mkdir("prefix")
assert os.path.isabs(p)
p.mkdir("bin")
p.mkdir("share")
p.mkdir(".spack")
app = p.join("bin", "app")
relative_app_link = p.join("bin", "relative_app_link")
absolute_app_link = p.join("bin", "absolute_app_link")
data = p.join("share", "file")
with open(app, "w") as f:
f.write("hello world")
with open(data, "w") as f:
f.write("hello world")
os.symlink("app", relative_app_link)
os.symlink(app, absolute_app_link)
return str(p)
args = ["file"]
if sys.platform == "darwin":
args.extend(["/usr/bin/clang++", "install_name_tool"])
@ -966,3 +994,71 @@ def test_tarball_normalized_permissions(tmpdir):
# not-executable-by-user files should be 0o644
assert path_to_member["pkg/share/file"].mode == 0o644
def test_tarball_common_prefix(dummy_prefix, tmpdir):
"""Tests whether Spack can figure out the package directory
from the tarball contents, and strip them when extracting."""
# When creating a tarball, Python (and tar) use relative paths,
# Absolute paths become relative to `/`, so drop the leading `/`.
assert os.path.isabs(dummy_prefix)
expected_prefix = PurePath(dummy_prefix).as_posix().lstrip("/")
with tmpdir.as_cwd():
# Create a tarball (using absolute path for prefix dir)
with tarfile.open("example.tar", mode="w") as tar:
tar.add(name=dummy_prefix)
# Open, verify common prefix, and extract it.
with tarfile.open("example.tar", mode="r") as tar:
common_prefix = bindist._ensure_common_prefix(tar)
assert common_prefix == expected_prefix
# Strip the prefix from the tar entries
bindist._tar_strip_component(tar, common_prefix)
# Extract into prefix2
tar.extractall(path="prefix2")
# Verify files are all there at the correct level.
assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"}
assert set(os.listdir(os.path.join("prefix2", "bin"))) == {
"app",
"relative_app_link",
"absolute_app_link",
}
assert set(os.listdir(os.path.join("prefix2", "share"))) == {"file"}
# Relative symlink should still be correct
assert os.readlink(os.path.join("prefix2", "bin", "relative_app_link")) == "app"
# Absolute symlink should remain absolute -- this is for relocation to fix up.
assert os.readlink(os.path.join("prefix2", "bin", "absolute_app_link")) == os.path.join(
dummy_prefix, "bin", "app"
)
def test_tarfile_without_common_directory_prefix_fails(tmpdir):
"""A tarfile that only contains files without a common package directory
should fail to extract, as we won't know where to put the files."""
with tmpdir.as_cwd():
# Create a broken tarball with just a file, no directories.
with tarfile.open("empty.tar", mode="w") as tar:
tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello"))
with pytest.raises(ValueError, match="Tarball does not contain a common prefix"):
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
def test_tarfile_with_files_outside_common_prefix(tmpdir, dummy_prefix):
"""If a file is outside of the common prefix, we should fail."""
with tmpdir.as_cwd():
with tarfile.open("broken.tar", mode="w") as tar:
tar.add(name=dummy_prefix)
tar.addfile(tarfile.TarInfo(name="/etc/config_file"), fileobj=io.BytesIO(b"hello"))
with pytest.raises(
ValueError, match="Tarball contains file /etc/config_file outside of prefix"
):
bindist._ensure_common_prefix(tarfile.open("broken.tar", mode="r"))