build caches: collect files to relocate while tarballing w/o file (#48212)
A few changes to tarball creation (for build caches): - do not run file to distinguish binary from text - file is slow, even when running it in a batched fashion -- it usually reads all bytes and has slow logic to categorize specific types - we don't need a highly detailed file categorization; a crude categorization of elf, mach-o, text suffices. detecting elf and mach-o is straightforward and cheap - detecting utf-8 (and with that ascii) is highly accurate: false positive rate decays exponentially as file size increases. Further it's not only the most common encoding, but the most common file type in package prefixes. iso-8859-1 is cheaply (but heuristically) detected too, and sufficiently accurate after binaries and utf-8 files are classified earlier - remove file as a dependency of Spack in general, which makes Spack itself easier to install - detect file type and need to relocate as part of creating the tarball, which is more cache friendly and thus faster
This commit is contained in:
parent
aca469b329
commit
e9cdcc4af0
12
.github/workflows/bootstrap.yml
vendored
12
.github/workflows/bootstrap.yml
vendored
@ -161,11 +161,7 @@ jobs:
|
||||
source share/spack/setup-env.sh
|
||||
spack -d gpg list
|
||||
tree $HOME/.spack/bootstrap/store/
|
||||
- name: Bootstrap File
|
||||
run: |
|
||||
source share/spack/setup-env.sh
|
||||
spack -d python share/spack/qa/bootstrap-file.py
|
||||
tree $HOME/.spack/bootstrap/store/
|
||||
|
||||
|
||||
windows:
|
||||
runs-on: "windows-latest"
|
||||
@ -196,9 +192,3 @@ jobs:
|
||||
spack -d gpg list
|
||||
./share/spack/qa/validate_last_exit.ps1
|
||||
tree $env:userprofile/.spack/bootstrap/store/
|
||||
- name: Bootstrap File
|
||||
run: |
|
||||
./share/spack/setup-env.ps1
|
||||
spack -d python share/spack/qa/bootstrap-file.py
|
||||
./share/spack/qa/validate_last_exit.ps1
|
||||
tree $env:userprofile/.spack/bootstrap/store/
|
||||
|
2
.github/workflows/unit_tests.yaml
vendored
2
.github/workflows/unit_tests.yaml
vendored
@ -140,7 +140,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
dnf install -y \
|
||||
bzip2 curl file gcc-c++ gcc gcc-gfortran git gnupg2 gzip \
|
||||
bzip2 curl gcc-c++ gcc gcc-gfortran git gnupg2 gzip \
|
||||
make patch tcl unzip which xz
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- name: Setup repo and non-root user
|
||||
|
@ -35,7 +35,7 @@ A build matrix showing which packages are working on which systems is shown belo
|
||||
.. code-block:: console
|
||||
|
||||
apt update
|
||||
apt install bzip2 ca-certificates file g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd
|
||||
apt install bzip2 ca-certificates g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd
|
||||
|
||||
.. tab-item:: RHEL
|
||||
|
||||
|
@ -8,7 +8,6 @@ unzip, , , Compress/Decompress archives
|
||||
bzip2, , , Compress/Decompress archives
|
||||
xz, , , Compress/Decompress archives
|
||||
zstd, , Optional, Compress/Decompress archives
|
||||
file, , , Create/Use Buildcaches
|
||||
lsb-release, , , Linux: identify operating system version
|
||||
gnupg2, , , Sign/Verify Buildcaches
|
||||
git, , , Manage Software Repositories
|
||||
|
|
@ -863,8 +863,10 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]:
|
||||
|
||||
if sys.version_info >= (3, 9):
|
||||
PatternStr = re.Pattern[str]
|
||||
PatternBytes = re.Pattern[bytes]
|
||||
else:
|
||||
PatternStr = typing.Pattern[str]
|
||||
PatternBytes = typing.Pattern[bytes]
|
||||
|
||||
|
||||
def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str:
|
||||
|
@ -24,13 +24,12 @@
|
||||
import urllib.request
|
||||
import warnings
|
||||
from contextlib import closing
|
||||
from typing import Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
|
||||
from typing import IO, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
|
||||
|
||||
import llnl.util.filesystem as fsys
|
||||
import llnl.util.lang
|
||||
import llnl.util.tty as tty
|
||||
from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree
|
||||
from llnl.util.symlink import readlink
|
||||
from llnl.util.filesystem import mkdirp
|
||||
|
||||
import spack.caches
|
||||
import spack.config as config
|
||||
@ -54,7 +53,6 @@
|
||||
import spack.util.archive
|
||||
import spack.util.crypto
|
||||
import spack.util.file_cache as file_cache
|
||||
import spack.util.filesystem as ssys
|
||||
import spack.util.gpg
|
||||
import spack.util.parallel
|
||||
import spack.util.path
|
||||
@ -587,129 +585,11 @@ def read_buildinfo_file(prefix):
|
||||
return syaml.load(f)
|
||||
|
||||
|
||||
class BuildManifestVisitor(BaseDirectoryVisitor):
|
||||
"""Visitor that collects a list of files and symlinks
|
||||
that can be checked for need of relocation. It knows how
|
||||
to dedupe hardlinks and deal with symlinks to files and
|
||||
directories."""
|
||||
|
||||
def __init__(self):
|
||||
# Save unique identifiers of hardlinks to avoid relocating them multiple times
|
||||
self.visited = set()
|
||||
|
||||
# Lists of files we will check
|
||||
self.files = []
|
||||
self.symlinks = []
|
||||
|
||||
def seen_before(self, root, rel_path):
|
||||
stat_result = os.lstat(os.path.join(root, rel_path))
|
||||
if stat_result.st_nlink == 1:
|
||||
return False
|
||||
identifier = (stat_result.st_dev, stat_result.st_ino)
|
||||
if identifier in self.visited:
|
||||
return True
|
||||
else:
|
||||
self.visited.add(identifier)
|
||||
return False
|
||||
|
||||
def visit_file(self, root, rel_path, depth):
|
||||
if self.seen_before(root, rel_path):
|
||||
return
|
||||
self.files.append(rel_path)
|
||||
|
||||
def visit_symlinked_file(self, root, rel_path, depth):
|
||||
# Note: symlinks *can* be hardlinked, but it is unclear if
|
||||
# symlinks can be relinked in-place (preserving inode).
|
||||
# Therefore, we do *not* de-dupe hardlinked symlinks.
|
||||
self.symlinks.append(rel_path)
|
||||
|
||||
def before_visit_dir(self, root, rel_path, depth):
|
||||
return os.path.basename(rel_path) not in (".spack", "man")
|
||||
|
||||
def before_visit_symlinked_dir(self, root, rel_path, depth):
|
||||
# Treat symlinked directories simply as symlinks.
|
||||
self.visit_symlinked_file(root, rel_path, depth)
|
||||
# Never recurse into symlinked directories.
|
||||
return False
|
||||
|
||||
|
||||
def file_matches(path, regex):
|
||||
with open(path, "rb") as f:
|
||||
contents = f.read()
|
||||
return bool(regex.search(contents))
|
||||
|
||||
|
||||
def get_buildfile_manifest(spec):
|
||||
"""
|
||||
Return a data structure with information about a build, including
|
||||
text_to_relocate, binary_to_relocate, binary_to_relocate_fullpath
|
||||
link_to_relocate, and other, which means it doesn't fit any of previous
|
||||
checks (and should not be relocated). We exclude docs (man) and
|
||||
metadata (.spack). This can be used to find a particular kind of file
|
||||
in spack, or to generate the build metadata.
|
||||
"""
|
||||
data = {
|
||||
"text_to_relocate": [],
|
||||
"binary_to_relocate": [],
|
||||
"link_to_relocate": [],
|
||||
"other": [],
|
||||
"binary_to_relocate_fullpath": [],
|
||||
"hardlinks_deduped": True,
|
||||
}
|
||||
|
||||
# Guard against filesystem footguns of hardlinks and symlinks by using
|
||||
# a visitor to retrieve a list of files and symlinks, so we don't have
|
||||
# to worry about hardlinks of symlinked dirs and what not.
|
||||
visitor = BuildManifestVisitor()
|
||||
root = spec.prefix
|
||||
visit_directory_tree(root, visitor)
|
||||
|
||||
# Collect a list of prefixes for this package and it's dependencies, Spack will
|
||||
# look for them to decide if text file needs to be relocated or not
|
||||
prefixes = [d.prefix for d in spec.traverse(root=True, deptype="all") if not d.external]
|
||||
prefixes.append(spack.hooks.sbang.sbang_install_path())
|
||||
prefixes.append(str(spack.store.STORE.layout.root))
|
||||
|
||||
# Create a giant regex that matches all prefixes
|
||||
regex = utf8_paths_to_single_binary_regex(prefixes)
|
||||
|
||||
# Symlinks.
|
||||
|
||||
# Obvious bugs:
|
||||
# 1. relative links are not relocated.
|
||||
# 2. paths are used as strings.
|
||||
for rel_path in visitor.symlinks:
|
||||
abs_path = os.path.join(root, rel_path)
|
||||
link = readlink(abs_path)
|
||||
if os.path.isabs(link) and link.startswith(spack.store.STORE.layout.root):
|
||||
data["link_to_relocate"].append(rel_path)
|
||||
|
||||
# Non-symlinks.
|
||||
for rel_path in visitor.files:
|
||||
abs_path = os.path.join(root, rel_path)
|
||||
m_type, m_subtype = ssys.mime_type(abs_path)
|
||||
|
||||
if relocate.needs_binary_relocation(m_type, m_subtype):
|
||||
# Why is this branch not part of needs_binary_relocation? :(
|
||||
if (
|
||||
(
|
||||
m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
|
||||
and sys.platform != "darwin"
|
||||
)
|
||||
or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
|
||||
or (not rel_path.endswith(".o"))
|
||||
):
|
||||
data["binary_to_relocate"].append(rel_path)
|
||||
data["binary_to_relocate_fullpath"].append(abs_path)
|
||||
continue
|
||||
|
||||
elif relocate.needs_text_relocation(m_type, m_subtype) and file_matches(abs_path, regex):
|
||||
data["text_to_relocate"].append(rel_path)
|
||||
continue
|
||||
|
||||
data["other"].append(abs_path)
|
||||
|
||||
return data
|
||||
def file_matches(f: IO[bytes], regex: llnl.util.lang.PatternBytes) -> bool:
|
||||
try:
|
||||
return bool(regex.search(f.read()))
|
||||
finally:
|
||||
f.seek(0)
|
||||
|
||||
|
||||
def deps_to_relocate(spec):
|
||||
@ -742,17 +622,15 @@ def deps_to_relocate(spec):
|
||||
|
||||
def get_buildinfo_dict(spec):
|
||||
"""Create metadata for a tarball"""
|
||||
manifest = get_buildfile_manifest(spec)
|
||||
|
||||
return {
|
||||
"sbang_install_path": spack.hooks.sbang.sbang_install_path(),
|
||||
"buildpath": spack.store.STORE.layout.root,
|
||||
"spackprefix": spack.paths.prefix,
|
||||
"relative_prefix": os.path.relpath(spec.prefix, spack.store.STORE.layout.root),
|
||||
"relocate_textfiles": manifest["text_to_relocate"],
|
||||
"relocate_binaries": manifest["binary_to_relocate"],
|
||||
"relocate_links": manifest["link_to_relocate"],
|
||||
"hardlinks_deduped": manifest["hardlinks_deduped"],
|
||||
# "relocate_textfiles": [],
|
||||
# "relocate_binaries": [],
|
||||
# "relocate_links": [],
|
||||
"hardlinks_deduped": True,
|
||||
"hash_to_prefix": {d.dag_hash(): str(d.prefix) for d in deps_to_relocate(spec)},
|
||||
}
|
||||
|
||||
@ -1042,7 +920,55 @@ def generate_key_index(key_prefix: str, tmpdir: str) -> None:
|
||||
) from e
|
||||
|
||||
|
||||
def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
|
||||
class FileTypes:
|
||||
BINARY = 0
|
||||
TEXT = 1
|
||||
UNKNOWN = 2
|
||||
|
||||
|
||||
NOT_ISO8859_1_TEXT = re.compile(b"[\x00\x7F-\x9F]")
|
||||
|
||||
|
||||
def file_type(f: IO[bytes]) -> int:
|
||||
try:
|
||||
# first check if this is an ELF or mach-o binary.
|
||||
magic = f.read(8)
|
||||
if len(magic) < 8:
|
||||
return FileTypes.UNKNOWN
|
||||
elif relocate.is_elf_magic(magic) or relocate.is_macho_magic(magic):
|
||||
return FileTypes.BINARY
|
||||
|
||||
f.seek(0)
|
||||
|
||||
# Then try utf-8, which has a fast exponential decay in false positive rate with file size.
|
||||
# Use chunked reads for fast early exit.
|
||||
f_txt = io.TextIOWrapper(f, encoding="utf-8", errors="strict")
|
||||
try:
|
||||
while f_txt.read(1024):
|
||||
pass
|
||||
return FileTypes.TEXT
|
||||
except UnicodeError:
|
||||
f_txt.seek(0)
|
||||
pass
|
||||
finally:
|
||||
f_txt.detach()
|
||||
# Finally try iso-8859-1 heuristically. In Python, all possible 256 byte values are valid.
|
||||
# We classify it as text if it does not contain any control characters / null bytes.
|
||||
data = f.read(1024)
|
||||
while data:
|
||||
if NOT_ISO8859_1_TEXT.search(data):
|
||||
break
|
||||
data = f.read(1024)
|
||||
else:
|
||||
return FileTypes.TEXT
|
||||
return FileTypes.UNKNOWN
|
||||
finally:
|
||||
f.seek(0)
|
||||
|
||||
|
||||
def tarfile_of_spec_prefix(
|
||||
tar: tarfile.TarFile, prefix: str, prefixes_to_relocate: List[str]
|
||||
) -> dict:
|
||||
"""Create a tarfile of an install prefix of a spec. Skips existing buildinfo file.
|
||||
|
||||
Args:
|
||||
@ -1058,6 +984,33 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
|
||||
except OSError:
|
||||
skip = lambda entry: False
|
||||
|
||||
binary_regex = utf8_paths_to_single_binary_regex(prefixes_to_relocate)
|
||||
|
||||
relocate_binaries = []
|
||||
relocate_links = []
|
||||
relocate_textfiles = []
|
||||
|
||||
# use callbacks to add files and symlinks, so we can register which files need relocation upon
|
||||
# extraction.
|
||||
def add_file(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str):
|
||||
with open(path, "rb") as f:
|
||||
relpath = os.path.relpath(path, prefix)
|
||||
# no need to relocate anything in the .spack directory
|
||||
if relpath.split(os.sep, 1)[0] == ".spack":
|
||||
tar.addfile(info, f)
|
||||
return
|
||||
f_type = file_type(f)
|
||||
if f_type == FileTypes.BINARY:
|
||||
relocate_binaries.append(os.path.relpath(path, prefix))
|
||||
elif f_type == FileTypes.TEXT and file_matches(f, binary_regex):
|
||||
relocate_textfiles.append(os.path.relpath(path, prefix))
|
||||
tar.addfile(info, f)
|
||||
|
||||
def add_symlink(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str):
|
||||
if os.path.isabs(info.linkname) and binary_regex.match(info.linkname.encode("utf-8")):
|
||||
relocate_links.append(os.path.relpath(path, prefix))
|
||||
tar.addfile(info)
|
||||
|
||||
spack.util.archive.reproducible_tarfile_from_prefix(
|
||||
tar,
|
||||
prefix,
|
||||
@ -1065,29 +1018,51 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
|
||||
# used in runtimes like AWS lambda.
|
||||
include_parent_directories=True,
|
||||
skip=skip,
|
||||
add_file=add_file,
|
||||
add_symlink=add_symlink,
|
||||
)
|
||||
|
||||
return {
|
||||
"relocate_binaries": relocate_binaries,
|
||||
"relocate_links": relocate_links,
|
||||
"relocate_textfiles": relocate_textfiles,
|
||||
}
|
||||
|
||||
|
||||
def create_tarball(spec: spack.spec.Spec, tarfile_path: str) -> Tuple[str, str]:
|
||||
"""Create a tarball of a spec and return the checksums of the compressed tarfile and the
|
||||
uncompressed tarfile."""
|
||||
return _do_create_tarball(
|
||||
tarfile_path,
|
||||
spec.prefix,
|
||||
buildinfo=get_buildinfo_dict(spec),
|
||||
prefixes_to_relocate=prefixes_to_relocate(spec),
|
||||
)
|
||||
|
||||
|
||||
def _do_create_tarball(tarfile_path: str, binaries_dir: str, buildinfo: dict):
|
||||
def _do_create_tarball(
|
||||
tarfile_path: str, prefix: str, buildinfo: dict, prefixes_to_relocate: List[str]
|
||||
) -> Tuple[str, str]:
|
||||
with spack.util.archive.gzip_compressed_tarfile(tarfile_path) as (
|
||||
tar,
|
||||
inner_checksum,
|
||||
outer_checksum,
|
||||
tar_gz_checksum,
|
||||
tar_checksum,
|
||||
):
|
||||
# Tarball the install prefix
|
||||
tarfile_of_spec_prefix(tar, binaries_dir)
|
||||
files_to_relocate = tarfile_of_spec_prefix(tar, prefix, prefixes_to_relocate)
|
||||
buildinfo.update(files_to_relocate)
|
||||
|
||||
# Serialize buildinfo for the tarball
|
||||
bstring = syaml.dump(buildinfo, default_flow_style=True).encode("utf-8")
|
||||
tarinfo = tarfile.TarInfo(
|
||||
name=spack.util.archive.default_path_to_name(buildinfo_file_name(binaries_dir))
|
||||
name=spack.util.archive.default_path_to_name(buildinfo_file_name(prefix))
|
||||
)
|
||||
tarinfo.type = tarfile.REGTYPE
|
||||
tarinfo.size = len(bstring)
|
||||
tarinfo.mode = 0o644
|
||||
tar.addfile(tarinfo, io.BytesIO(bstring))
|
||||
|
||||
return inner_checksum.hexdigest(), outer_checksum.hexdigest()
|
||||
return tar_gz_checksum.hexdigest(), tar_checksum.hexdigest()
|
||||
|
||||
|
||||
class ExistsInBuildcache(NamedTuple):
|
||||
@ -1137,6 +1112,13 @@ def _exists_in_buildcache(spec: spack.spec.Spec, tmpdir: str, out_url: str) -> E
|
||||
return ExistsInBuildcache(signed, unsigned, tarball)
|
||||
|
||||
|
||||
def prefixes_to_relocate(spec):
|
||||
prefixes = [s.prefix for s in deps_to_relocate(spec)]
|
||||
prefixes.append(spack.hooks.sbang.sbang_install_path())
|
||||
prefixes.append(str(spack.store.STORE.layout.root))
|
||||
return prefixes
|
||||
|
||||
|
||||
def _url_upload_tarball_and_specfile(
|
||||
spec: spack.spec.Spec,
|
||||
tmpdir: str,
|
||||
@ -1146,7 +1128,7 @@ def _url_upload_tarball_and_specfile(
|
||||
):
|
||||
files = BuildcacheFiles(spec, tmpdir, out_url)
|
||||
tarball = files.local_tarball()
|
||||
checksum, _ = _do_create_tarball(tarball, spec.prefix, get_buildinfo_dict(spec))
|
||||
checksum, _ = create_tarball(spec, tarball)
|
||||
spec_dict = spec.to_dict(hash=ht.dag_hash)
|
||||
spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
|
||||
@ -1470,13 +1452,11 @@ def _oci_push_pkg_blob(
|
||||
filename = os.path.join(tmpdir, f"{spec.dag_hash()}.tar.gz")
|
||||
|
||||
# Create an oci.image.layer aka tarball of the package
|
||||
compressed_tarfile_checksum, tarfile_checksum = _do_create_tarball(
|
||||
filename, spec.prefix, get_buildinfo_dict(spec)
|
||||
)
|
||||
tar_gz_checksum, tar_checksum = create_tarball(spec, filename)
|
||||
|
||||
blob = spack.oci.oci.Blob(
|
||||
Digest.from_sha256(compressed_tarfile_checksum),
|
||||
Digest.from_sha256(tarfile_checksum),
|
||||
Digest.from_sha256(tar_gz_checksum),
|
||||
Digest.from_sha256(tar_checksum),
|
||||
os.path.getsize(filename),
|
||||
)
|
||||
|
||||
@ -2435,6 +2415,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
|
||||
yield m
|
||||
|
||||
|
||||
def extract_buildcache_tarball(tarfile_path: str, destination: str) -> None:
|
||||
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
||||
# Remove common prefix from tarball entries and directly extract them to the install dir.
|
||||
tar.extractall(
|
||||
path=destination, members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
|
||||
)
|
||||
|
||||
|
||||
def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
||||
"""
|
||||
extract binary tarball for given package into install area
|
||||
@ -2504,12 +2492,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
|
||||
tarfile_path, size, contents, "sha256", expected, local_checksum
|
||||
)
|
||||
try:
|
||||
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
||||
# Remove install prefix from tarfil to extract directly into spec.prefix
|
||||
tar.extractall(
|
||||
path=spec.prefix,
|
||||
members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)),
|
||||
)
|
||||
extract_buildcache_tarball(tarfile_path, destination=spec.prefix)
|
||||
except Exception:
|
||||
shutil.rmtree(spec.prefix, ignore_errors=True)
|
||||
_delete_staged_downloads(download_result)
|
||||
|
@ -9,7 +9,6 @@
|
||||
all_core_root_specs,
|
||||
ensure_clingo_importable_or_raise,
|
||||
ensure_core_dependencies,
|
||||
ensure_file_in_path_or_raise,
|
||||
ensure_gpg_in_path_or_raise,
|
||||
ensure_patchelf_in_path_or_raise,
|
||||
)
|
||||
@ -20,7 +19,6 @@
|
||||
"is_bootstrapping",
|
||||
"ensure_bootstrap_configuration",
|
||||
"ensure_core_dependencies",
|
||||
"ensure_file_in_path_or_raise",
|
||||
"ensure_gpg_in_path_or_raise",
|
||||
"ensure_clingo_importable_or_raise",
|
||||
"ensure_patchelf_in_path_or_raise",
|
||||
|
@ -481,19 +481,6 @@ def ensure_gpg_in_path_or_raise() -> None:
|
||||
)
|
||||
|
||||
|
||||
def file_root_spec() -> str:
|
||||
"""Return the root spec used to bootstrap file"""
|
||||
root_spec_name = "win-file" if IS_WINDOWS else "file"
|
||||
return _root_spec(root_spec_name)
|
||||
|
||||
|
||||
def ensure_file_in_path_or_raise() -> None:
|
||||
"""Ensure file is in the PATH or raise"""
|
||||
return ensure_executables_in_path_or_raise(
|
||||
executables=["file"], abstract_spec=file_root_spec()
|
||||
)
|
||||
|
||||
|
||||
def patchelf_root_spec() -> str:
|
||||
"""Return the root spec used to bootstrap patchelf"""
|
||||
# 0.13.1 is the last version not to require C++17.
|
||||
@ -577,15 +564,13 @@ def ensure_core_dependencies() -> None:
|
||||
"""Ensure the presence of all the core dependencies."""
|
||||
if sys.platform.lower() == "linux":
|
||||
ensure_patchelf_in_path_or_raise()
|
||||
elif sys.platform == "win32":
|
||||
ensure_file_in_path_or_raise()
|
||||
ensure_gpg_in_path_or_raise()
|
||||
ensure_clingo_importable_or_raise()
|
||||
|
||||
|
||||
def all_core_root_specs() -> List[str]:
|
||||
"""Return a list of all the core root specs that may be used to bootstrap Spack"""
|
||||
return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec(), file_root_spec()]
|
||||
return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec()]
|
||||
|
||||
|
||||
def bootstrapping_sources(scope: Optional[str] = None):
|
||||
|
@ -3,8 +3,8 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
"""Query the status of bootstrapping on this machine"""
|
||||
import platform
|
||||
from typing import List, Optional, Sequence, Tuple, Union
|
||||
import sys
|
||||
from typing import Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import spack.util.executable
|
||||
|
||||
@ -72,7 +72,7 @@ def _core_requirements() -> List[RequiredResponseType]:
|
||||
"bzip2": _missing("bzip2", "required to compress/decompress code archives"),
|
||||
"git": _missing("git", "required to fetch/manage git repositories"),
|
||||
}
|
||||
if platform.system().lower() == "linux":
|
||||
if sys.platform == "linux":
|
||||
_core_system_exes["xz"] = _missing("xz", "required to compress/decompress code archives")
|
||||
|
||||
# Executables that are not bootstrapped yet
|
||||
@ -87,17 +87,16 @@ def _core_requirements() -> List[RequiredResponseType]:
|
||||
|
||||
|
||||
def _buildcache_requirements() -> List[RequiredResponseType]:
|
||||
_buildcache_exes = {
|
||||
"file": _missing("file", "required to analyze files for buildcaches", system_only=False),
|
||||
("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False),
|
||||
_buildcache_exes: Dict[ExecutablesType, str] = {
|
||||
("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False)
|
||||
}
|
||||
if platform.system().lower() == "darwin":
|
||||
if sys.platform == "darwin":
|
||||
_buildcache_exes["otool"] = _missing("otool", "required to relocate binaries")
|
||||
|
||||
# Executables that are not bootstrapped yet
|
||||
result = [_required_system_executable(exe, msg) for exe, msg in _buildcache_exes.items()]
|
||||
|
||||
if platform.system().lower() == "linux":
|
||||
if sys.platform == "linux":
|
||||
result.append(
|
||||
_required_executable(
|
||||
"patchelf",
|
||||
|
@ -103,7 +103,7 @@
|
||||
from spack.phase_callbacks import run_after, run_before
|
||||
from spack.spec import InvalidSpecDetected, Spec
|
||||
from spack.util.executable import *
|
||||
from spack.util.filesystem import file_command, fix_darwin_install_name, mime_type
|
||||
from spack.util.filesystem import fix_darwin_install_name
|
||||
from spack.variant import any_combination_of, auto_or_any_combination_of, disjoint_sets
|
||||
from spack.version import Version, ver
|
||||
|
||||
|
@ -23,7 +23,6 @@
|
||||
import spack.store
|
||||
import spack.util.elf as elf
|
||||
import spack.util.executable as executable
|
||||
import spack.util.filesystem as ssys
|
||||
|
||||
from .relocate_text import BinaryFilePrefixReplacer, TextFilePrefixReplacer
|
||||
|
||||
@ -350,32 +349,6 @@ def _set_elf_rpaths_and_interpreter(
|
||||
return None
|
||||
|
||||
|
||||
def needs_binary_relocation(m_type, m_subtype):
|
||||
"""Returns True if the file with MIME type/subtype passed as arguments
|
||||
needs binary relocation, False otherwise.
|
||||
|
||||
Args:
|
||||
m_type (str): MIME type of the file
|
||||
m_subtype (str): MIME subtype of the file
|
||||
"""
|
||||
subtypes = ("x-executable", "x-sharedlib", "x-mach-binary", "x-pie-executable")
|
||||
if m_type == "application":
|
||||
if m_subtype in subtypes:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def needs_text_relocation(m_type, m_subtype):
|
||||
"""Returns True if the file with MIME type/subtype passed as arguments
|
||||
needs text relocation, False otherwise.
|
||||
|
||||
Args:
|
||||
m_type (str): MIME type of the file
|
||||
m_subtype (str): MIME subtype of the file
|
||||
"""
|
||||
return m_type == "text"
|
||||
|
||||
|
||||
def relocate_macho_binaries(
|
||||
path_names, old_layout_root, new_layout_root, prefix_to_prefix, rel, old_prefix, new_prefix
|
||||
):
|
||||
@ -623,24 +596,32 @@ def relocate_text_bin(binaries, prefixes):
|
||||
return BinaryFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(binaries)
|
||||
|
||||
|
||||
def is_binary(filename):
|
||||
"""Returns true if a file is binary, False otherwise
|
||||
def is_macho_magic(magic: bytes) -> bool:
|
||||
return (
|
||||
# In order of popularity: 64-bit mach-o le/be, 32-bit mach-o le/be.
|
||||
magic.startswith(b"\xCF\xFA\xED\xFE")
|
||||
or magic.startswith(b"\xFE\xED\xFA\xCF")
|
||||
or magic.startswith(b"\xCE\xFA\xED\xFE")
|
||||
or magic.startswith(b"\xFE\xED\xFA\xCE")
|
||||
# universal binaries: 0xcafebabe be (most common?) or 0xbebafeca le (not sure if exists).
|
||||
# Here we need to disambiguate mach-o and JVM class files. In mach-o the next 4 bytes are
|
||||
# the number of binaries; in JVM class files it's the java version number. We assume there
|
||||
# are less than 10 binaries in a universal binary.
|
||||
or (magic.startswith(b"\xCA\xFE\xBA\xBE") and int.from_bytes(magic[4:8], "big") < 10)
|
||||
or (magic.startswith(b"\xBE\xBA\xFE\xCA") and int.from_bytes(magic[4:8], "little") < 10)
|
||||
)
|
||||
|
||||
Args:
|
||||
filename: file to be tested
|
||||
|
||||
Returns:
|
||||
True or False
|
||||
"""
|
||||
m_type, _ = ssys.mime_type(filename)
|
||||
def is_elf_magic(magic: bytes) -> bool:
|
||||
return magic.startswith(b"\x7FELF")
|
||||
|
||||
msg = "[{0}] -> ".format(filename)
|
||||
if m_type == "application":
|
||||
tty.debug(msg + "BINARY FILE")
|
||||
return True
|
||||
|
||||
tty.debug(msg + "TEXT FILE")
|
||||
return False
|
||||
def is_binary(filename: str) -> bool:
|
||||
"""Returns true iff a file is likely binary"""
|
||||
with open(filename, "rb") as f:
|
||||
magic = f.read(8)
|
||||
|
||||
return is_macho_magic(magic) or is_elf_magic(magic)
|
||||
|
||||
|
||||
# Memoize this due to repeated calls to libraries in the same directory.
|
||||
@ -649,6 +630,14 @@ def _exists_dir(dirname):
|
||||
return os.path.isdir(dirname)
|
||||
|
||||
|
||||
def is_macho_binary(path):
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
return is_macho_magic(f.read(4))
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def fixup_macos_rpath(root, filename):
|
||||
"""Apply rpath fixups to the given file.
|
||||
|
||||
@ -660,7 +649,8 @@ def fixup_macos_rpath(root, filename):
|
||||
True if fixups were applied, else False
|
||||
"""
|
||||
abspath = os.path.join(root, filename)
|
||||
if ssys.mime_type(abspath) != ("application", "x-mach-binary"):
|
||||
|
||||
if not is_macho_binary(abspath):
|
||||
return False
|
||||
|
||||
# Get Mach-O header commands
|
||||
|
@ -4,12 +4,8 @@
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from collections import OrderedDict
|
||||
|
||||
from llnl.util.symlink import readlink, symlink
|
||||
|
||||
import spack.binary_distribution as bindist
|
||||
import spack.deptypes as dt
|
||||
@ -20,19 +16,6 @@
|
||||
import spack.store
|
||||
|
||||
|
||||
def _relocate_spliced_links(links, orig_prefix, new_prefix):
|
||||
"""Re-linking function which differs from `relocate.relocate_links` by
|
||||
reading the old link rather than the new link, since the latter wasn't moved
|
||||
in our case. This still needs to be called after the copy to destination
|
||||
because it expects the new directory structure to be in place."""
|
||||
for link in links:
|
||||
link_target = readlink(os.path.join(orig_prefix, link))
|
||||
link_target = re.sub("^" + orig_prefix, new_prefix, link_target)
|
||||
new_link_path = os.path.join(new_prefix, link)
|
||||
os.unlink(new_link_path)
|
||||
symlink(link_target, new_link_path)
|
||||
|
||||
|
||||
def rewire(spliced_spec):
|
||||
"""Given a spliced spec, this function conducts all the rewiring on all
|
||||
nodes in the DAG of that spec."""
|
||||
@ -54,13 +37,17 @@ def rewire_node(spec, explicit):
|
||||
the splice. The resulting package is then 'installed.'"""
|
||||
tempdir = tempfile.mkdtemp()
|
||||
|
||||
# copy anything installed to a temporary directory
|
||||
shutil.copytree(spec.build_spec.prefix, os.path.join(tempdir, spec.dag_hash()))
|
||||
# Copy spec.build_spec.prefix to spec.prefix through a temporary tarball
|
||||
tarball = os.path.join(tempdir, f"{spec.dag_hash()}.tar.gz")
|
||||
bindist.create_tarball(spec.build_spec, tarball)
|
||||
|
||||
spack.hooks.pre_install(spec)
|
||||
bindist.extract_buildcache_tarball(tarball, destination=spec.prefix)
|
||||
buildinfo = bindist.read_buildinfo_file(spec.prefix)
|
||||
|
||||
# compute prefix-to-prefix for every node from the build spec to the spliced
|
||||
# spec
|
||||
prefix_to_prefix = OrderedDict({spec.build_spec.prefix: spec.prefix})
|
||||
prefix_to_prefix = {spec.build_spec.prefix: spec.prefix}
|
||||
build_spec_ids = set(id(s) for s in spec.build_spec.traverse(deptype=dt.ALL & ~dt.BUILD))
|
||||
for s in bindist.deps_to_relocate(spec):
|
||||
analog = s
|
||||
@ -77,19 +64,17 @@ def rewire_node(spec, explicit):
|
||||
|
||||
prefix_to_prefix[analog.prefix] = s.prefix
|
||||
|
||||
manifest = bindist.get_buildfile_manifest(spec.build_spec)
|
||||
platform = spack.platforms.by_name(spec.platform)
|
||||
|
||||
text_to_relocate = [
|
||||
os.path.join(tempdir, spec.dag_hash(), rel_path)
|
||||
for rel_path in manifest.get("text_to_relocate", [])
|
||||
os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_textfiles"]
|
||||
]
|
||||
if text_to_relocate:
|
||||
relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
|
||||
|
||||
links = [os.path.join(spec.prefix, f) for f in buildinfo["relocate_links"]]
|
||||
relocate.relocate_links(links, prefix_to_prefix)
|
||||
bins_to_relocate = [
|
||||
os.path.join(tempdir, spec.dag_hash(), rel_path)
|
||||
for rel_path in manifest.get("binary_to_relocate", [])
|
||||
os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_binaries"]
|
||||
]
|
||||
if bins_to_relocate:
|
||||
if "macho" in platform.binary_formats:
|
||||
@ -113,22 +98,18 @@ def rewire_node(spec, explicit):
|
||||
spec.prefix,
|
||||
)
|
||||
relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
|
||||
# Copy package into place, except for spec.json (because spec.json
|
||||
# describes the old spec and not the new spliced spec).
|
||||
shutil.copytree(
|
||||
os.path.join(tempdir, spec.dag_hash()),
|
||||
spec.prefix,
|
||||
ignore=shutil.ignore_patterns("spec.json", "install_manifest.json"),
|
||||
)
|
||||
if manifest.get("link_to_relocate"):
|
||||
_relocate_spliced_links(
|
||||
manifest.get("link_to_relocate"), spec.build_spec.prefix, spec.prefix
|
||||
)
|
||||
shutil.rmtree(tempdir)
|
||||
# Above, we did not copy spec.json: instead, here we write the new
|
||||
# (spliced) spec into spec.json, without this, Database.add would fail on
|
||||
# the next line (because it checks the spec.json in the prefix against the
|
||||
# spec being added to look for mismatches)
|
||||
install_manifest = os.path.join(
|
||||
spec.prefix,
|
||||
spack.store.STORE.layout.metadata_dir,
|
||||
spack.store.STORE.layout.manifest_file_name,
|
||||
)
|
||||
try:
|
||||
os.unlink(install_manifest)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
# Write the spliced spec into spec.json. Without this, Database.add would fail because it
|
||||
# checks the spec.json in the prefix against the spec being added to look for mismatches
|
||||
spack.store.STORE.layout.write_spec(spec, spack.store.STORE.layout.spec_file_path(spec))
|
||||
# add to database, not sure about explicit
|
||||
spack.store.STORE.db.add(spec, explicit=explicit)
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
import archspec.cpu
|
||||
|
||||
from llnl.util.filesystem import copy_tree, join_path, visit_directory_tree
|
||||
from llnl.util.filesystem import copy_tree, join_path
|
||||
from llnl.util.symlink import readlink
|
||||
|
||||
import spack.binary_distribution as bindist
|
||||
@ -43,7 +43,7 @@
|
||||
import spack.util.spack_yaml as syaml
|
||||
import spack.util.url as url_util
|
||||
import spack.util.web as web_util
|
||||
from spack.binary_distribution import CannotListKeys, GenerateIndexError, get_buildfile_manifest
|
||||
from spack.binary_distribution import CannotListKeys, GenerateIndexError
|
||||
from spack.directory_layout import DirectoryLayout
|
||||
from spack.paths import test_path
|
||||
from spack.spec import Spec
|
||||
@ -623,60 +623,21 @@ def test_FetchCacheError_pretty_printing_single():
|
||||
assert str_e.rstrip() == str_e
|
||||
|
||||
|
||||
def test_build_manifest_visitor(tmpdir):
|
||||
dir = "directory"
|
||||
file = os.path.join("directory", "file")
|
||||
|
||||
with tmpdir.as_cwd():
|
||||
# Create a file inside a directory
|
||||
os.mkdir(dir)
|
||||
with open(file, "wb") as f:
|
||||
f.write(b"example file")
|
||||
|
||||
# Symlink the dir
|
||||
os.symlink(dir, "symlink_to_directory")
|
||||
|
||||
# Symlink the file
|
||||
os.symlink(file, "symlink_to_file")
|
||||
|
||||
# Hardlink the file
|
||||
os.link(file, "hardlink_of_file")
|
||||
|
||||
# Hardlinked symlinks: seems like this is only a thing on Linux,
|
||||
# on Darwin the symlink *target* is hardlinked, on Linux the
|
||||
# symlink *itself* is hardlinked.
|
||||
if sys.platform.startswith("linux"):
|
||||
os.link("symlink_to_file", "hardlink_of_symlink_to_file")
|
||||
os.link("symlink_to_directory", "hardlink_of_symlink_to_directory")
|
||||
|
||||
visitor = bindist.BuildManifestVisitor()
|
||||
visit_directory_tree(str(tmpdir), visitor)
|
||||
|
||||
# We de-dupe hardlinks of files, so there should really be just one file
|
||||
assert len(visitor.files) == 1
|
||||
|
||||
# We do not de-dupe symlinks, cause it's unclear how to update symlinks
|
||||
# in-place, preserving inodes.
|
||||
if sys.platform.startswith("linux"):
|
||||
assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks.
|
||||
else:
|
||||
assert len(visitor.symlinks) == 2
|
||||
|
||||
with tmpdir.as_cwd():
|
||||
assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files)
|
||||
assert all(os.path.islink(f) for f in visitor.symlinks)
|
||||
|
||||
|
||||
def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, monkeypatch, capfd):
|
||||
def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, tmp_path):
|
||||
install_cmd("needs-text-relocation")
|
||||
spec = temporary_store.db.query_one("needs-text-relocation")
|
||||
tgz_path = tmp_path / "relocatable.tar.gz"
|
||||
bindist.create_tarball(spec, str(tgz_path))
|
||||
|
||||
specs = temporary_store.db.query("needs-text-relocation")
|
||||
assert len(specs) == 1
|
||||
manifest = get_buildfile_manifest(specs[0])
|
||||
# extract the .spack/binary_distribution file
|
||||
with tarfile.open(tgz_path) as tar:
|
||||
entry_name = next(x for x in tar.getnames() if x.endswith(".spack/binary_distribution"))
|
||||
bd_file = tar.extractfile(entry_name)
|
||||
manifest = syaml.load(bd_file)
|
||||
|
||||
assert join_path("bin", "exe") in manifest["text_to_relocate"]
|
||||
assert join_path("bin", "otherexe") not in manifest["text_to_relocate"]
|
||||
assert join_path("bin", "secretexe") not in manifest["text_to_relocate"]
|
||||
assert join_path("bin", "exe") in manifest["relocate_textfiles"]
|
||||
assert join_path("bin", "otherexe") not in manifest["relocate_textfiles"]
|
||||
assert join_path("bin", "secretexe") not in manifest["relocate_textfiles"]
|
||||
|
||||
|
||||
def test_etag_fetching_304():
|
||||
@ -917,7 +878,7 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
|
||||
tarball = str(tmp_path / "prefix.tar.gz")
|
||||
|
||||
bindist._do_create_tarball(
|
||||
tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"}
|
||||
tarfile_path=tarball, prefix=str(p), buildinfo={"metadata": "new"}, prefixes_to_relocate=[]
|
||||
)
|
||||
|
||||
expected_prefix = str(p).lstrip("/")
|
||||
@ -926,7 +887,10 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
|
||||
# and that the tarball contains the new one, not the old one.
|
||||
with tarfile.open(tarball) as tar:
|
||||
assert syaml.load(tar.extractfile(f"{expected_prefix}/.spack/binary_distribution")) == {
|
||||
"metadata": "new"
|
||||
"metadata": "new",
|
||||
"relocate_binaries": [],
|
||||
"relocate_textfiles": [],
|
||||
"relocate_links": [],
|
||||
}
|
||||
assert tar.getnames() == [
|
||||
*_all_parents(expected_prefix),
|
||||
@ -951,11 +915,15 @@ def test_reproducible_tarball_is_reproducible(tmp_path: Path):
|
||||
|
||||
# Create a tarball with a certain mtime of bin/app
|
||||
os.utime(app, times=(0, 0))
|
||||
bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo)
|
||||
bindist._do_create_tarball(
|
||||
tarball_1, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[]
|
||||
)
|
||||
|
||||
# Do it another time with different mtime of bin/app
|
||||
os.utime(app, times=(10, 10))
|
||||
bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo)
|
||||
bindist._do_create_tarball(
|
||||
tarball_2, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[]
|
||||
)
|
||||
|
||||
# They should be bitwise identical:
|
||||
assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
|
||||
@ -1001,7 +969,7 @@ def test_tarball_normalized_permissions(tmpdir):
|
||||
) as f:
|
||||
f.write("hello world")
|
||||
|
||||
bindist._do_create_tarball(tarball, binaries_dir=p.strpath, buildinfo={})
|
||||
bindist._do_create_tarball(tarball, prefix=p.strpath, buildinfo={}, prefixes_to_relocate=[])
|
||||
|
||||
expected_prefix = p.strpath.lstrip("/")
|
||||
|
||||
@ -1120,7 +1088,7 @@ def test_tarfile_of_spec_prefix(tmpdir):
|
||||
file = tmpdir.join("example.tar")
|
||||
|
||||
with tarfile.open(file, mode="w") as tar:
|
||||
bindist.tarfile_of_spec_prefix(tar, prefix.strpath)
|
||||
bindist.tarfile_of_spec_prefix(tar, prefix.strpath, prefixes_to_relocate=[])
|
||||
|
||||
expected_prefix = prefix.strpath.lstrip("/")
|
||||
|
||||
|
@ -36,8 +36,6 @@
|
||||
macho_find_paths,
|
||||
macho_make_paths_normal,
|
||||
macho_make_paths_relative,
|
||||
needs_binary_relocation,
|
||||
needs_text_relocation,
|
||||
relocate_links,
|
||||
relocate_text,
|
||||
)
|
||||
@ -193,16 +191,6 @@ def test_relocate_links(tmpdir):
|
||||
assert readlink("to_self_but_relative") == "relative"
|
||||
|
||||
|
||||
def test_needs_relocation():
|
||||
assert needs_binary_relocation("application", "x-sharedlib")
|
||||
assert needs_binary_relocation("application", "x-executable")
|
||||
assert not needs_binary_relocation("application", "x-octet-stream")
|
||||
assert not needs_binary_relocation("text", "x-")
|
||||
assert needs_text_relocation("text", "x-")
|
||||
assert not needs_text_relocation("symbolic link to", "x-")
|
||||
assert needs_binary_relocation("application", "x-mach-binary")
|
||||
|
||||
|
||||
def test_replace_paths(tmpdir):
|
||||
with tmpdir.as_cwd():
|
||||
suffix = "dylib" if platform.system().lower() == "darwin" else "so"
|
||||
|
@ -10,7 +10,7 @@
|
||||
import tarfile
|
||||
from contextlib import closing, contextmanager
|
||||
from gzip import GzipFile
|
||||
from typing import Callable, Dict, Tuple
|
||||
from typing import Callable, Dict, List, Tuple
|
||||
|
||||
from llnl.util.symlink import readlink
|
||||
|
||||
@ -130,6 +130,15 @@ def default_path_to_name(path: str) -> str:
|
||||
return pathlib.PurePath(*p.parts[1:]).as_posix() if p.is_absolute() else p.as_posix()
|
||||
|
||||
|
||||
def default_add_file(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None:
|
||||
with open(path, "rb") as f:
|
||||
tar.addfile(file_info, f)
|
||||
|
||||
|
||||
def default_add_link(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None:
|
||||
tar.addfile(file_info)
|
||||
|
||||
|
||||
def reproducible_tarfile_from_prefix(
|
||||
tar: tarfile.TarFile,
|
||||
prefix: str,
|
||||
@ -137,6 +146,9 @@ def reproducible_tarfile_from_prefix(
|
||||
include_parent_directories: bool = False,
|
||||
skip: Callable[[os.DirEntry], bool] = lambda entry: False,
|
||||
path_to_name: Callable[[str], str] = default_path_to_name,
|
||||
add_file: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_file,
|
||||
add_symlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link,
|
||||
add_hardlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link,
|
||||
) -> None:
|
||||
"""Create a tarball from a given directory. Only adds regular files, symlinks and dirs.
|
||||
Skips devices, fifos. Preserves hardlinks. Normalizes permissions like git. Tar entries are
|
||||
@ -170,8 +182,10 @@ def reproducible_tarfile_from_prefix(
|
||||
tar.addfile(dir_info)
|
||||
|
||||
dir_stack = [prefix]
|
||||
new_dirs: List[str] = []
|
||||
while dir_stack:
|
||||
dir = dir_stack.pop()
|
||||
new_dirs.clear()
|
||||
|
||||
# Add the dir before its contents
|
||||
dir_info = tarfile.TarInfo(path_to_name(dir))
|
||||
@ -183,7 +197,6 @@ def reproducible_tarfile_from_prefix(
|
||||
with os.scandir(dir) as it:
|
||||
entries = sorted(it, key=lambda entry: entry.name)
|
||||
|
||||
new_dirs = []
|
||||
for entry in entries:
|
||||
if skip(entry):
|
||||
continue
|
||||
@ -201,7 +214,7 @@ def reproducible_tarfile_from_prefix(
|
||||
# st_mode field of the stat structure is unspecified." So we set it to
|
||||
# something sensible without lstat'ing the link.
|
||||
file_info.mode = 0o755
|
||||
tar.addfile(file_info)
|
||||
add_symlink(tar, file_info, entry.path)
|
||||
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
# entry.stat has zero (st_ino, st_dev, st_nlink) on Windows: use lstat.
|
||||
@ -216,15 +229,13 @@ def reproducible_tarfile_from_prefix(
|
||||
if ident in hardlink_to_tarinfo_name:
|
||||
file_info.type = tarfile.LNKTYPE
|
||||
file_info.linkname = hardlink_to_tarinfo_name[ident]
|
||||
tar.addfile(file_info)
|
||||
add_hardlink(tar, file_info, entry.path)
|
||||
continue
|
||||
hardlink_to_tarinfo_name[ident] = file_info.name
|
||||
|
||||
# If file not yet seen, copy it
|
||||
file_info.type = tarfile.REGTYPE
|
||||
file_info.size = s.st_size
|
||||
|
||||
with open(entry.path, "rb") as f:
|
||||
tar.addfile(file_info, f)
|
||||
add_file(tar, file_info, entry.path)
|
||||
|
||||
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical
|
||||
|
@ -10,62 +10,10 @@
|
||||
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
|
||||
from llnl.util import tty
|
||||
from llnl.util.filesystem import edit_in_place_through_temporary_file
|
||||
from llnl.util.lang import memoized
|
||||
|
||||
from spack.util.executable import Executable, which
|
||||
|
||||
|
||||
def _ensure_file_on_win():
|
||||
"""Ensures the file command is available on Windows
|
||||
If not, it is bootstrapped.
|
||||
No-op on all other platforms"""
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
import spack.bootstrap
|
||||
|
||||
with spack.bootstrap.ensure_bootstrap_configuration():
|
||||
spack.bootstrap.ensure_file_in_path_or_raise()
|
||||
|
||||
|
||||
@memoized
|
||||
def file_command(*args):
|
||||
"""Creates entry point to `file` system command with provided arguments"""
|
||||
_ensure_file_on_win()
|
||||
file_cmd = which("file", required=True)
|
||||
for arg in args:
|
||||
file_cmd.add_default_arg(arg)
|
||||
return file_cmd
|
||||
|
||||
|
||||
@memoized
|
||||
def _get_mime_type():
|
||||
"""Generate method to call `file` system command to aquire mime type
|
||||
for a specified path
|
||||
"""
|
||||
if sys.platform == "win32":
|
||||
# -h option (no-dereference) does not exist in Windows
|
||||
return file_command("-b", "--mime-type")
|
||||
else:
|
||||
return file_command("-b", "-h", "--mime-type")
|
||||
|
||||
|
||||
def mime_type(filename):
|
||||
"""Returns the mime type and subtype of a file.
|
||||
|
||||
Args:
|
||||
filename: file to be analyzed
|
||||
|
||||
Returns:
|
||||
Tuple containing the MIME type and subtype
|
||||
"""
|
||||
output = _get_mime_type()(filename, output=str, error=str).strip()
|
||||
tty.debug("==> " + output)
|
||||
type, _, subtype = output.partition("/")
|
||||
return type, subtype
|
||||
from spack.util.executable import Executable
|
||||
|
||||
|
||||
def fix_darwin_install_name(path):
|
||||
|
@ -1,4 +0,0 @@
|
||||
from spack.util.filesystem import file_command
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_command()
|
Loading…
Reference in New Issue
Block a user