build caches: collect files to relocate while tarballing w/o file (#48212)

A few changes to tarball creation (for build caches):
- do not run file to distinguish binary from text
- file is slow, even when running it in a batched fashion -- it usually reads all bytes and has slow logic to categorize specific types
- we don't need a highly detailed file categorization; a crude categorization of elf, mach-o, text suffices.
detecting elf and mach-o is straightforward and cheap
- detecting utf-8 (and with that ascii) is highly accurate: false positive rate decays exponentially as file size increases. Further it's not only the most common encoding, but the most common file type in package prefixes.
iso-8859-1 is cheaply (but heuristically) detected too, and sufficiently accurate after binaries and utf-8 files are classified earlier
- remove file as a dependency of Spack in general, which makes Spack itself easier to install
- detect file type and need to relocate as part of creating the tarball, which is more cache friendly and thus faster
This commit is contained in:
Harmen Stoppels 2024-12-24 18:53:13 +01:00 committed by GitHub
parent aca469b329
commit e9cdcc4af0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 249 additions and 411 deletions

View File

@ -161,11 +161,7 @@ jobs:
source share/spack/setup-env.sh source share/spack/setup-env.sh
spack -d gpg list spack -d gpg list
tree $HOME/.spack/bootstrap/store/ tree $HOME/.spack/bootstrap/store/
- name: Bootstrap File
run: |
source share/spack/setup-env.sh
spack -d python share/spack/qa/bootstrap-file.py
tree $HOME/.spack/bootstrap/store/
windows: windows:
runs-on: "windows-latest" runs-on: "windows-latest"
@ -196,9 +192,3 @@ jobs:
spack -d gpg list spack -d gpg list
./share/spack/qa/validate_last_exit.ps1 ./share/spack/qa/validate_last_exit.ps1
tree $env:userprofile/.spack/bootstrap/store/ tree $env:userprofile/.spack/bootstrap/store/
- name: Bootstrap File
run: |
./share/spack/setup-env.ps1
spack -d python share/spack/qa/bootstrap-file.py
./share/spack/qa/validate_last_exit.ps1
tree $env:userprofile/.spack/bootstrap/store/

View File

@ -140,7 +140,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
dnf install -y \ dnf install -y \
bzip2 curl file gcc-c++ gcc gcc-gfortran git gnupg2 gzip \ bzip2 curl gcc-c++ gcc gcc-gfortran git gnupg2 gzip \
make patch tcl unzip which xz make patch tcl unzip which xz
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup repo and non-root user - name: Setup repo and non-root user

View File

@ -35,7 +35,7 @@ A build matrix showing which packages are working on which systems is shown belo
.. code-block:: console .. code-block:: console
apt update apt update
apt install bzip2 ca-certificates file g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd apt install bzip2 ca-certificates g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd
.. tab-item:: RHEL .. tab-item:: RHEL

View File

@ -8,7 +8,6 @@ unzip, , , Compress/Decompress archives
bzip2, , , Compress/Decompress archives bzip2, , , Compress/Decompress archives
xz, , , Compress/Decompress archives xz, , , Compress/Decompress archives
zstd, , Optional, Compress/Decompress archives zstd, , Optional, Compress/Decompress archives
file, , , Create/Use Buildcaches
lsb-release, , , Linux: identify operating system version lsb-release, , , Linux: identify operating system version
gnupg2, , , Sign/Verify Buildcaches gnupg2, , , Sign/Verify Buildcaches
git, , , Manage Software Repositories git, , , Manage Software Repositories

1 Name Supported Versions Notes Requirement Reason
8 bzip2 Compress/Decompress archives
9 xz Compress/Decompress archives
10 zstd Optional Compress/Decompress archives
file Create/Use Buildcaches
11 lsb-release Linux: identify operating system version
12 gnupg2 Sign/Verify Buildcaches
13 git Manage Software Repositories

View File

@ -863,8 +863,10 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]:
if sys.version_info >= (3, 9): if sys.version_info >= (3, 9):
PatternStr = re.Pattern[str] PatternStr = re.Pattern[str]
PatternBytes = re.Pattern[bytes]
else: else:
PatternStr = typing.Pattern[str] PatternStr = typing.Pattern[str]
PatternBytes = typing.Pattern[bytes]
def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str: def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str:

View File

@ -24,13 +24,12 @@
import urllib.request import urllib.request
import warnings import warnings
from contextlib import closing from contextlib import closing
from typing import Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union from typing import IO, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
import llnl.util.filesystem as fsys import llnl.util.filesystem as fsys
import llnl.util.lang import llnl.util.lang
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree from llnl.util.filesystem import mkdirp
from llnl.util.symlink import readlink
import spack.caches import spack.caches
import spack.config as config import spack.config as config
@ -54,7 +53,6 @@
import spack.util.archive import spack.util.archive
import spack.util.crypto import spack.util.crypto
import spack.util.file_cache as file_cache import spack.util.file_cache as file_cache
import spack.util.filesystem as ssys
import spack.util.gpg import spack.util.gpg
import spack.util.parallel import spack.util.parallel
import spack.util.path import spack.util.path
@ -587,129 +585,11 @@ def read_buildinfo_file(prefix):
return syaml.load(f) return syaml.load(f)
class BuildManifestVisitor(BaseDirectoryVisitor): def file_matches(f: IO[bytes], regex: llnl.util.lang.PatternBytes) -> bool:
"""Visitor that collects a list of files and symlinks try:
that can be checked for need of relocation. It knows how return bool(regex.search(f.read()))
to dedupe hardlinks and deal with symlinks to files and finally:
directories.""" f.seek(0)
def __init__(self):
# Save unique identifiers of hardlinks to avoid relocating them multiple times
self.visited = set()
# Lists of files we will check
self.files = []
self.symlinks = []
def seen_before(self, root, rel_path):
stat_result = os.lstat(os.path.join(root, rel_path))
if stat_result.st_nlink == 1:
return False
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in self.visited:
return True
else:
self.visited.add(identifier)
return False
def visit_file(self, root, rel_path, depth):
if self.seen_before(root, rel_path):
return
self.files.append(rel_path)
def visit_symlinked_file(self, root, rel_path, depth):
# Note: symlinks *can* be hardlinked, but it is unclear if
# symlinks can be relinked in-place (preserving inode).
# Therefore, we do *not* de-dupe hardlinked symlinks.
self.symlinks.append(rel_path)
def before_visit_dir(self, root, rel_path, depth):
return os.path.basename(rel_path) not in (".spack", "man")
def before_visit_symlinked_dir(self, root, rel_path, depth):
# Treat symlinked directories simply as symlinks.
self.visit_symlinked_file(root, rel_path, depth)
# Never recurse into symlinked directories.
return False
def file_matches(path, regex):
with open(path, "rb") as f:
contents = f.read()
return bool(regex.search(contents))
def get_buildfile_manifest(spec):
"""
Return a data structure with information about a build, including
text_to_relocate, binary_to_relocate, binary_to_relocate_fullpath
link_to_relocate, and other, which means it doesn't fit any of previous
checks (and should not be relocated). We exclude docs (man) and
metadata (.spack). This can be used to find a particular kind of file
in spack, or to generate the build metadata.
"""
data = {
"text_to_relocate": [],
"binary_to_relocate": [],
"link_to_relocate": [],
"other": [],
"binary_to_relocate_fullpath": [],
"hardlinks_deduped": True,
}
# Guard against filesystem footguns of hardlinks and symlinks by using
# a visitor to retrieve a list of files and symlinks, so we don't have
# to worry about hardlinks of symlinked dirs and what not.
visitor = BuildManifestVisitor()
root = spec.prefix
visit_directory_tree(root, visitor)
# Collect a list of prefixes for this package and it's dependencies, Spack will
# look for them to decide if text file needs to be relocated or not
prefixes = [d.prefix for d in spec.traverse(root=True, deptype="all") if not d.external]
prefixes.append(spack.hooks.sbang.sbang_install_path())
prefixes.append(str(spack.store.STORE.layout.root))
# Create a giant regex that matches all prefixes
regex = utf8_paths_to_single_binary_regex(prefixes)
# Symlinks.
# Obvious bugs:
# 1. relative links are not relocated.
# 2. paths are used as strings.
for rel_path in visitor.symlinks:
abs_path = os.path.join(root, rel_path)
link = readlink(abs_path)
if os.path.isabs(link) and link.startswith(spack.store.STORE.layout.root):
data["link_to_relocate"].append(rel_path)
# Non-symlinks.
for rel_path in visitor.files:
abs_path = os.path.join(root, rel_path)
m_type, m_subtype = ssys.mime_type(abs_path)
if relocate.needs_binary_relocation(m_type, m_subtype):
# Why is this branch not part of needs_binary_relocation? :(
if (
(
m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
and sys.platform != "darwin"
)
or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
or (not rel_path.endswith(".o"))
):
data["binary_to_relocate"].append(rel_path)
data["binary_to_relocate_fullpath"].append(abs_path)
continue
elif relocate.needs_text_relocation(m_type, m_subtype) and file_matches(abs_path, regex):
data["text_to_relocate"].append(rel_path)
continue
data["other"].append(abs_path)
return data
def deps_to_relocate(spec): def deps_to_relocate(spec):
@ -742,17 +622,15 @@ def deps_to_relocate(spec):
def get_buildinfo_dict(spec): def get_buildinfo_dict(spec):
"""Create metadata for a tarball""" """Create metadata for a tarball"""
manifest = get_buildfile_manifest(spec)
return { return {
"sbang_install_path": spack.hooks.sbang.sbang_install_path(), "sbang_install_path": spack.hooks.sbang.sbang_install_path(),
"buildpath": spack.store.STORE.layout.root, "buildpath": spack.store.STORE.layout.root,
"spackprefix": spack.paths.prefix, "spackprefix": spack.paths.prefix,
"relative_prefix": os.path.relpath(spec.prefix, spack.store.STORE.layout.root), "relative_prefix": os.path.relpath(spec.prefix, spack.store.STORE.layout.root),
"relocate_textfiles": manifest["text_to_relocate"], # "relocate_textfiles": [],
"relocate_binaries": manifest["binary_to_relocate"], # "relocate_binaries": [],
"relocate_links": manifest["link_to_relocate"], # "relocate_links": [],
"hardlinks_deduped": manifest["hardlinks_deduped"], "hardlinks_deduped": True,
"hash_to_prefix": {d.dag_hash(): str(d.prefix) for d in deps_to_relocate(spec)}, "hash_to_prefix": {d.dag_hash(): str(d.prefix) for d in deps_to_relocate(spec)},
} }
@ -1042,7 +920,55 @@ def generate_key_index(key_prefix: str, tmpdir: str) -> None:
) from e ) from e
def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: class FileTypes:
BINARY = 0
TEXT = 1
UNKNOWN = 2
NOT_ISO8859_1_TEXT = re.compile(b"[\x00\x7F-\x9F]")
def file_type(f: IO[bytes]) -> int:
try:
# first check if this is an ELF or mach-o binary.
magic = f.read(8)
if len(magic) < 8:
return FileTypes.UNKNOWN
elif relocate.is_elf_magic(magic) or relocate.is_macho_magic(magic):
return FileTypes.BINARY
f.seek(0)
# Then try utf-8, which has a fast exponential decay in false positive rate with file size.
# Use chunked reads for fast early exit.
f_txt = io.TextIOWrapper(f, encoding="utf-8", errors="strict")
try:
while f_txt.read(1024):
pass
return FileTypes.TEXT
except UnicodeError:
f_txt.seek(0)
pass
finally:
f_txt.detach()
# Finally try iso-8859-1 heuristically. In Python, all possible 256 byte values are valid.
# We classify it as text if it does not contain any control characters / null bytes.
data = f.read(1024)
while data:
if NOT_ISO8859_1_TEXT.search(data):
break
data = f.read(1024)
else:
return FileTypes.TEXT
return FileTypes.UNKNOWN
finally:
f.seek(0)
def tarfile_of_spec_prefix(
tar: tarfile.TarFile, prefix: str, prefixes_to_relocate: List[str]
) -> dict:
"""Create a tarfile of an install prefix of a spec. Skips existing buildinfo file. """Create a tarfile of an install prefix of a spec. Skips existing buildinfo file.
Args: Args:
@ -1058,6 +984,33 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
except OSError: except OSError:
skip = lambda entry: False skip = lambda entry: False
binary_regex = utf8_paths_to_single_binary_regex(prefixes_to_relocate)
relocate_binaries = []
relocate_links = []
relocate_textfiles = []
# use callbacks to add files and symlinks, so we can register which files need relocation upon
# extraction.
def add_file(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str):
with open(path, "rb") as f:
relpath = os.path.relpath(path, prefix)
# no need to relocate anything in the .spack directory
if relpath.split(os.sep, 1)[0] == ".spack":
tar.addfile(info, f)
return
f_type = file_type(f)
if f_type == FileTypes.BINARY:
relocate_binaries.append(os.path.relpath(path, prefix))
elif f_type == FileTypes.TEXT and file_matches(f, binary_regex):
relocate_textfiles.append(os.path.relpath(path, prefix))
tar.addfile(info, f)
def add_symlink(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str):
if os.path.isabs(info.linkname) and binary_regex.match(info.linkname.encode("utf-8")):
relocate_links.append(os.path.relpath(path, prefix))
tar.addfile(info)
spack.util.archive.reproducible_tarfile_from_prefix( spack.util.archive.reproducible_tarfile_from_prefix(
tar, tar,
prefix, prefix,
@ -1065,29 +1018,51 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None:
# used in runtimes like AWS lambda. # used in runtimes like AWS lambda.
include_parent_directories=True, include_parent_directories=True,
skip=skip, skip=skip,
add_file=add_file,
add_symlink=add_symlink,
)
return {
"relocate_binaries": relocate_binaries,
"relocate_links": relocate_links,
"relocate_textfiles": relocate_textfiles,
}
def create_tarball(spec: spack.spec.Spec, tarfile_path: str) -> Tuple[str, str]:
"""Create a tarball of a spec and return the checksums of the compressed tarfile and the
uncompressed tarfile."""
return _do_create_tarball(
tarfile_path,
spec.prefix,
buildinfo=get_buildinfo_dict(spec),
prefixes_to_relocate=prefixes_to_relocate(spec),
) )
def _do_create_tarball(tarfile_path: str, binaries_dir: str, buildinfo: dict): def _do_create_tarball(
tarfile_path: str, prefix: str, buildinfo: dict, prefixes_to_relocate: List[str]
) -> Tuple[str, str]:
with spack.util.archive.gzip_compressed_tarfile(tarfile_path) as ( with spack.util.archive.gzip_compressed_tarfile(tarfile_path) as (
tar, tar,
inner_checksum, tar_gz_checksum,
outer_checksum, tar_checksum,
): ):
# Tarball the install prefix # Tarball the install prefix
tarfile_of_spec_prefix(tar, binaries_dir) files_to_relocate = tarfile_of_spec_prefix(tar, prefix, prefixes_to_relocate)
buildinfo.update(files_to_relocate)
# Serialize buildinfo for the tarball # Serialize buildinfo for the tarball
bstring = syaml.dump(buildinfo, default_flow_style=True).encode("utf-8") bstring = syaml.dump(buildinfo, default_flow_style=True).encode("utf-8")
tarinfo = tarfile.TarInfo( tarinfo = tarfile.TarInfo(
name=spack.util.archive.default_path_to_name(buildinfo_file_name(binaries_dir)) name=spack.util.archive.default_path_to_name(buildinfo_file_name(prefix))
) )
tarinfo.type = tarfile.REGTYPE tarinfo.type = tarfile.REGTYPE
tarinfo.size = len(bstring) tarinfo.size = len(bstring)
tarinfo.mode = 0o644 tarinfo.mode = 0o644
tar.addfile(tarinfo, io.BytesIO(bstring)) tar.addfile(tarinfo, io.BytesIO(bstring))
return inner_checksum.hexdigest(), outer_checksum.hexdigest() return tar_gz_checksum.hexdigest(), tar_checksum.hexdigest()
class ExistsInBuildcache(NamedTuple): class ExistsInBuildcache(NamedTuple):
@ -1137,6 +1112,13 @@ def _exists_in_buildcache(spec: spack.spec.Spec, tmpdir: str, out_url: str) -> E
return ExistsInBuildcache(signed, unsigned, tarball) return ExistsInBuildcache(signed, unsigned, tarball)
def prefixes_to_relocate(spec):
prefixes = [s.prefix for s in deps_to_relocate(spec)]
prefixes.append(spack.hooks.sbang.sbang_install_path())
prefixes.append(str(spack.store.STORE.layout.root))
return prefixes
def _url_upload_tarball_and_specfile( def _url_upload_tarball_and_specfile(
spec: spack.spec.Spec, spec: spack.spec.Spec,
tmpdir: str, tmpdir: str,
@ -1146,7 +1128,7 @@ def _url_upload_tarball_and_specfile(
): ):
files = BuildcacheFiles(spec, tmpdir, out_url) files = BuildcacheFiles(spec, tmpdir, out_url)
tarball = files.local_tarball() tarball = files.local_tarball()
checksum, _ = _do_create_tarball(tarball, spec.prefix, get_buildinfo_dict(spec)) checksum, _ = create_tarball(spec, tarball)
spec_dict = spec.to_dict(hash=ht.dag_hash) spec_dict = spec.to_dict(hash=ht.dag_hash)
spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum} spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
@ -1470,13 +1452,11 @@ def _oci_push_pkg_blob(
filename = os.path.join(tmpdir, f"{spec.dag_hash()}.tar.gz") filename = os.path.join(tmpdir, f"{spec.dag_hash()}.tar.gz")
# Create an oci.image.layer aka tarball of the package # Create an oci.image.layer aka tarball of the package
compressed_tarfile_checksum, tarfile_checksum = _do_create_tarball( tar_gz_checksum, tar_checksum = create_tarball(spec, filename)
filename, spec.prefix, get_buildinfo_dict(spec)
)
blob = spack.oci.oci.Blob( blob = spack.oci.oci.Blob(
Digest.from_sha256(compressed_tarfile_checksum), Digest.from_sha256(tar_gz_checksum),
Digest.from_sha256(tarfile_checksum), Digest.from_sha256(tar_checksum),
os.path.getsize(filename), os.path.getsize(filename),
) )
@ -2435,6 +2415,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
yield m yield m
def extract_buildcache_tarball(tarfile_path: str, destination: str) -> None:
with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove common prefix from tarball entries and directly extract them to the install dir.
tar.extractall(
path=destination, members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
)
def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
""" """
extract binary tarball for given package into install area extract binary tarball for given package into install area
@ -2504,12 +2492,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER):
tarfile_path, size, contents, "sha256", expected, local_checksum tarfile_path, size, contents, "sha256", expected, local_checksum
) )
try: try:
with closing(tarfile.open(tarfile_path, "r")) as tar: extract_buildcache_tarball(tarfile_path, destination=spec.prefix)
# Remove install prefix from tarfil to extract directly into spec.prefix
tar.extractall(
path=spec.prefix,
members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)),
)
except Exception: except Exception:
shutil.rmtree(spec.prefix, ignore_errors=True) shutil.rmtree(spec.prefix, ignore_errors=True)
_delete_staged_downloads(download_result) _delete_staged_downloads(download_result)

View File

@ -9,7 +9,6 @@
all_core_root_specs, all_core_root_specs,
ensure_clingo_importable_or_raise, ensure_clingo_importable_or_raise,
ensure_core_dependencies, ensure_core_dependencies,
ensure_file_in_path_or_raise,
ensure_gpg_in_path_or_raise, ensure_gpg_in_path_or_raise,
ensure_patchelf_in_path_or_raise, ensure_patchelf_in_path_or_raise,
) )
@ -20,7 +19,6 @@
"is_bootstrapping", "is_bootstrapping",
"ensure_bootstrap_configuration", "ensure_bootstrap_configuration",
"ensure_core_dependencies", "ensure_core_dependencies",
"ensure_file_in_path_or_raise",
"ensure_gpg_in_path_or_raise", "ensure_gpg_in_path_or_raise",
"ensure_clingo_importable_or_raise", "ensure_clingo_importable_or_raise",
"ensure_patchelf_in_path_or_raise", "ensure_patchelf_in_path_or_raise",

View File

@ -481,19 +481,6 @@ def ensure_gpg_in_path_or_raise() -> None:
) )
def file_root_spec() -> str:
"""Return the root spec used to bootstrap file"""
root_spec_name = "win-file" if IS_WINDOWS else "file"
return _root_spec(root_spec_name)
def ensure_file_in_path_or_raise() -> None:
"""Ensure file is in the PATH or raise"""
return ensure_executables_in_path_or_raise(
executables=["file"], abstract_spec=file_root_spec()
)
def patchelf_root_spec() -> str: def patchelf_root_spec() -> str:
"""Return the root spec used to bootstrap patchelf""" """Return the root spec used to bootstrap patchelf"""
# 0.13.1 is the last version not to require C++17. # 0.13.1 is the last version not to require C++17.
@ -577,15 +564,13 @@ def ensure_core_dependencies() -> None:
"""Ensure the presence of all the core dependencies.""" """Ensure the presence of all the core dependencies."""
if sys.platform.lower() == "linux": if sys.platform.lower() == "linux":
ensure_patchelf_in_path_or_raise() ensure_patchelf_in_path_or_raise()
elif sys.platform == "win32":
ensure_file_in_path_or_raise()
ensure_gpg_in_path_or_raise() ensure_gpg_in_path_or_raise()
ensure_clingo_importable_or_raise() ensure_clingo_importable_or_raise()
def all_core_root_specs() -> List[str]: def all_core_root_specs() -> List[str]:
"""Return a list of all the core root specs that may be used to bootstrap Spack""" """Return a list of all the core root specs that may be used to bootstrap Spack"""
return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec(), file_root_spec()] return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec()]
def bootstrapping_sources(scope: Optional[str] = None): def bootstrapping_sources(scope: Optional[str] = None):

View File

@ -3,8 +3,8 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""Query the status of bootstrapping on this machine""" """Query the status of bootstrapping on this machine"""
import platform import sys
from typing import List, Optional, Sequence, Tuple, Union from typing import Dict, List, Optional, Sequence, Tuple, Union
import spack.util.executable import spack.util.executable
@ -72,7 +72,7 @@ def _core_requirements() -> List[RequiredResponseType]:
"bzip2": _missing("bzip2", "required to compress/decompress code archives"), "bzip2": _missing("bzip2", "required to compress/decompress code archives"),
"git": _missing("git", "required to fetch/manage git repositories"), "git": _missing("git", "required to fetch/manage git repositories"),
} }
if platform.system().lower() == "linux": if sys.platform == "linux":
_core_system_exes["xz"] = _missing("xz", "required to compress/decompress code archives") _core_system_exes["xz"] = _missing("xz", "required to compress/decompress code archives")
# Executables that are not bootstrapped yet # Executables that are not bootstrapped yet
@ -87,17 +87,16 @@ def _core_requirements() -> List[RequiredResponseType]:
def _buildcache_requirements() -> List[RequiredResponseType]: def _buildcache_requirements() -> List[RequiredResponseType]:
_buildcache_exes = { _buildcache_exes: Dict[ExecutablesType, str] = {
"file": _missing("file", "required to analyze files for buildcaches", system_only=False), ("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False)
("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False),
} }
if platform.system().lower() == "darwin": if sys.platform == "darwin":
_buildcache_exes["otool"] = _missing("otool", "required to relocate binaries") _buildcache_exes["otool"] = _missing("otool", "required to relocate binaries")
# Executables that are not bootstrapped yet # Executables that are not bootstrapped yet
result = [_required_system_executable(exe, msg) for exe, msg in _buildcache_exes.items()] result = [_required_system_executable(exe, msg) for exe, msg in _buildcache_exes.items()]
if platform.system().lower() == "linux": if sys.platform == "linux":
result.append( result.append(
_required_executable( _required_executable(
"patchelf", "patchelf",

View File

@ -103,7 +103,7 @@
from spack.phase_callbacks import run_after, run_before from spack.phase_callbacks import run_after, run_before
from spack.spec import InvalidSpecDetected, Spec from spack.spec import InvalidSpecDetected, Spec
from spack.util.executable import * from spack.util.executable import *
from spack.util.filesystem import file_command, fix_darwin_install_name, mime_type from spack.util.filesystem import fix_darwin_install_name
from spack.variant import any_combination_of, auto_or_any_combination_of, disjoint_sets from spack.variant import any_combination_of, auto_or_any_combination_of, disjoint_sets
from spack.version import Version, ver from spack.version import Version, ver

View File

@ -23,7 +23,6 @@
import spack.store import spack.store
import spack.util.elf as elf import spack.util.elf as elf
import spack.util.executable as executable import spack.util.executable as executable
import spack.util.filesystem as ssys
from .relocate_text import BinaryFilePrefixReplacer, TextFilePrefixReplacer from .relocate_text import BinaryFilePrefixReplacer, TextFilePrefixReplacer
@ -350,32 +349,6 @@ def _set_elf_rpaths_and_interpreter(
return None return None
def needs_binary_relocation(m_type, m_subtype):
"""Returns True if the file with MIME type/subtype passed as arguments
needs binary relocation, False otherwise.
Args:
m_type (str): MIME type of the file
m_subtype (str): MIME subtype of the file
"""
subtypes = ("x-executable", "x-sharedlib", "x-mach-binary", "x-pie-executable")
if m_type == "application":
if m_subtype in subtypes:
return True
return False
def needs_text_relocation(m_type, m_subtype):
"""Returns True if the file with MIME type/subtype passed as arguments
needs text relocation, False otherwise.
Args:
m_type (str): MIME type of the file
m_subtype (str): MIME subtype of the file
"""
return m_type == "text"
def relocate_macho_binaries( def relocate_macho_binaries(
path_names, old_layout_root, new_layout_root, prefix_to_prefix, rel, old_prefix, new_prefix path_names, old_layout_root, new_layout_root, prefix_to_prefix, rel, old_prefix, new_prefix
): ):
@ -623,24 +596,32 @@ def relocate_text_bin(binaries, prefixes):
return BinaryFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(binaries) return BinaryFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(binaries)
def is_binary(filename): def is_macho_magic(magic: bytes) -> bool:
"""Returns true if a file is binary, False otherwise return (
# In order of popularity: 64-bit mach-o le/be, 32-bit mach-o le/be.
magic.startswith(b"\xCF\xFA\xED\xFE")
or magic.startswith(b"\xFE\xED\xFA\xCF")
or magic.startswith(b"\xCE\xFA\xED\xFE")
or magic.startswith(b"\xFE\xED\xFA\xCE")
# universal binaries: 0xcafebabe be (most common?) or 0xbebafeca le (not sure if exists).
# Here we need to disambiguate mach-o and JVM class files. In mach-o the next 4 bytes are
# the number of binaries; in JVM class files it's the java version number. We assume there
# are less than 10 binaries in a universal binary.
or (magic.startswith(b"\xCA\xFE\xBA\xBE") and int.from_bytes(magic[4:8], "big") < 10)
or (magic.startswith(b"\xBE\xBA\xFE\xCA") and int.from_bytes(magic[4:8], "little") < 10)
)
Args:
filename: file to be tested
Returns: def is_elf_magic(magic: bytes) -> bool:
True or False return magic.startswith(b"\x7FELF")
"""
m_type, _ = ssys.mime_type(filename)
msg = "[{0}] -> ".format(filename)
if m_type == "application":
tty.debug(msg + "BINARY FILE")
return True
tty.debug(msg + "TEXT FILE") def is_binary(filename: str) -> bool:
return False """Returns true iff a file is likely binary"""
with open(filename, "rb") as f:
magic = f.read(8)
return is_macho_magic(magic) or is_elf_magic(magic)
# Memoize this due to repeated calls to libraries in the same directory. # Memoize this due to repeated calls to libraries in the same directory.
@ -649,6 +630,14 @@ def _exists_dir(dirname):
return os.path.isdir(dirname) return os.path.isdir(dirname)
def is_macho_binary(path):
try:
with open(path, "rb") as f:
return is_macho_magic(f.read(4))
except OSError:
return False
def fixup_macos_rpath(root, filename): def fixup_macos_rpath(root, filename):
"""Apply rpath fixups to the given file. """Apply rpath fixups to the given file.
@ -660,7 +649,8 @@ def fixup_macos_rpath(root, filename):
True if fixups were applied, else False True if fixups were applied, else False
""" """
abspath = os.path.join(root, filename) abspath = os.path.join(root, filename)
if ssys.mime_type(abspath) != ("application", "x-mach-binary"):
if not is_macho_binary(abspath):
return False return False
# Get Mach-O header commands # Get Mach-O header commands

View File

@ -4,12 +4,8 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os import os
import re
import shutil import shutil
import tempfile import tempfile
from collections import OrderedDict
from llnl.util.symlink import readlink, symlink
import spack.binary_distribution as bindist import spack.binary_distribution as bindist
import spack.deptypes as dt import spack.deptypes as dt
@ -20,19 +16,6 @@
import spack.store import spack.store
def _relocate_spliced_links(links, orig_prefix, new_prefix):
"""Re-linking function which differs from `relocate.relocate_links` by
reading the old link rather than the new link, since the latter wasn't moved
in our case. This still needs to be called after the copy to destination
because it expects the new directory structure to be in place."""
for link in links:
link_target = readlink(os.path.join(orig_prefix, link))
link_target = re.sub("^" + orig_prefix, new_prefix, link_target)
new_link_path = os.path.join(new_prefix, link)
os.unlink(new_link_path)
symlink(link_target, new_link_path)
def rewire(spliced_spec): def rewire(spliced_spec):
"""Given a spliced spec, this function conducts all the rewiring on all """Given a spliced spec, this function conducts all the rewiring on all
nodes in the DAG of that spec.""" nodes in the DAG of that spec."""
@ -54,13 +37,17 @@ def rewire_node(spec, explicit):
the splice. The resulting package is then 'installed.'""" the splice. The resulting package is then 'installed.'"""
tempdir = tempfile.mkdtemp() tempdir = tempfile.mkdtemp()
# copy anything installed to a temporary directory # Copy spec.build_spec.prefix to spec.prefix through a temporary tarball
shutil.copytree(spec.build_spec.prefix, os.path.join(tempdir, spec.dag_hash())) tarball = os.path.join(tempdir, f"{spec.dag_hash()}.tar.gz")
bindist.create_tarball(spec.build_spec, tarball)
spack.hooks.pre_install(spec) spack.hooks.pre_install(spec)
bindist.extract_buildcache_tarball(tarball, destination=spec.prefix)
buildinfo = bindist.read_buildinfo_file(spec.prefix)
# compute prefix-to-prefix for every node from the build spec to the spliced # compute prefix-to-prefix for every node from the build spec to the spliced
# spec # spec
prefix_to_prefix = OrderedDict({spec.build_spec.prefix: spec.prefix}) prefix_to_prefix = {spec.build_spec.prefix: spec.prefix}
build_spec_ids = set(id(s) for s in spec.build_spec.traverse(deptype=dt.ALL & ~dt.BUILD)) build_spec_ids = set(id(s) for s in spec.build_spec.traverse(deptype=dt.ALL & ~dt.BUILD))
for s in bindist.deps_to_relocate(spec): for s in bindist.deps_to_relocate(spec):
analog = s analog = s
@ -77,19 +64,17 @@ def rewire_node(spec, explicit):
prefix_to_prefix[analog.prefix] = s.prefix prefix_to_prefix[analog.prefix] = s.prefix
manifest = bindist.get_buildfile_manifest(spec.build_spec)
platform = spack.platforms.by_name(spec.platform) platform = spack.platforms.by_name(spec.platform)
text_to_relocate = [ text_to_relocate = [
os.path.join(tempdir, spec.dag_hash(), rel_path) os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_textfiles"]
for rel_path in manifest.get("text_to_relocate", [])
] ]
if text_to_relocate: if text_to_relocate:
relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix) relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
links = [os.path.join(spec.prefix, f) for f in buildinfo["relocate_links"]]
relocate.relocate_links(links, prefix_to_prefix)
bins_to_relocate = [ bins_to_relocate = [
os.path.join(tempdir, spec.dag_hash(), rel_path) os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_binaries"]
for rel_path in manifest.get("binary_to_relocate", [])
] ]
if bins_to_relocate: if bins_to_relocate:
if "macho" in platform.binary_formats: if "macho" in platform.binary_formats:
@ -113,22 +98,18 @@ def rewire_node(spec, explicit):
spec.prefix, spec.prefix,
) )
relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix) relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
# Copy package into place, except for spec.json (because spec.json
# describes the old spec and not the new spliced spec).
shutil.copytree(
os.path.join(tempdir, spec.dag_hash()),
spec.prefix,
ignore=shutil.ignore_patterns("spec.json", "install_manifest.json"),
)
if manifest.get("link_to_relocate"):
_relocate_spliced_links(
manifest.get("link_to_relocate"), spec.build_spec.prefix, spec.prefix
)
shutil.rmtree(tempdir) shutil.rmtree(tempdir)
# Above, we did not copy spec.json: instead, here we write the new install_manifest = os.path.join(
# (spliced) spec into spec.json, without this, Database.add would fail on spec.prefix,
# the next line (because it checks the spec.json in the prefix against the spack.store.STORE.layout.metadata_dir,
# spec being added to look for mismatches) spack.store.STORE.layout.manifest_file_name,
)
try:
os.unlink(install_manifest)
except FileNotFoundError:
pass
# Write the spliced spec into spec.json. Without this, Database.add would fail because it
# checks the spec.json in the prefix against the spec being added to look for mismatches
spack.store.STORE.layout.write_spec(spec, spack.store.STORE.layout.spec_file_path(spec)) spack.store.STORE.layout.write_spec(spec, spack.store.STORE.layout.spec_file_path(spec))
# add to database, not sure about explicit # add to database, not sure about explicit
spack.store.STORE.db.add(spec, explicit=explicit) spack.store.STORE.db.add(spec, explicit=explicit)

View File

@ -23,7 +23,7 @@
import archspec.cpu import archspec.cpu
from llnl.util.filesystem import copy_tree, join_path, visit_directory_tree from llnl.util.filesystem import copy_tree, join_path
from llnl.util.symlink import readlink from llnl.util.symlink import readlink
import spack.binary_distribution as bindist import spack.binary_distribution as bindist
@ -43,7 +43,7 @@
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
import spack.util.url as url_util import spack.util.url as url_util
import spack.util.web as web_util import spack.util.web as web_util
from spack.binary_distribution import CannotListKeys, GenerateIndexError, get_buildfile_manifest from spack.binary_distribution import CannotListKeys, GenerateIndexError
from spack.directory_layout import DirectoryLayout from spack.directory_layout import DirectoryLayout
from spack.paths import test_path from spack.paths import test_path
from spack.spec import Spec from spack.spec import Spec
@ -623,60 +623,21 @@ def test_FetchCacheError_pretty_printing_single():
assert str_e.rstrip() == str_e assert str_e.rstrip() == str_e
def test_build_manifest_visitor(tmpdir): def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, tmp_path):
dir = "directory"
file = os.path.join("directory", "file")
with tmpdir.as_cwd():
# Create a file inside a directory
os.mkdir(dir)
with open(file, "wb") as f:
f.write(b"example file")
# Symlink the dir
os.symlink(dir, "symlink_to_directory")
# Symlink the file
os.symlink(file, "symlink_to_file")
# Hardlink the file
os.link(file, "hardlink_of_file")
# Hardlinked symlinks: seems like this is only a thing on Linux,
# on Darwin the symlink *target* is hardlinked, on Linux the
# symlink *itself* is hardlinked.
if sys.platform.startswith("linux"):
os.link("symlink_to_file", "hardlink_of_symlink_to_file")
os.link("symlink_to_directory", "hardlink_of_symlink_to_directory")
visitor = bindist.BuildManifestVisitor()
visit_directory_tree(str(tmpdir), visitor)
# We de-dupe hardlinks of files, so there should really be just one file
assert len(visitor.files) == 1
# We do not de-dupe symlinks, cause it's unclear how to update symlinks
# in-place, preserving inodes.
if sys.platform.startswith("linux"):
assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks.
else:
assert len(visitor.symlinks) == 2
with tmpdir.as_cwd():
assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files)
assert all(os.path.islink(f) for f in visitor.symlinks)
def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, monkeypatch, capfd):
install_cmd("needs-text-relocation") install_cmd("needs-text-relocation")
spec = temporary_store.db.query_one("needs-text-relocation")
tgz_path = tmp_path / "relocatable.tar.gz"
bindist.create_tarball(spec, str(tgz_path))
specs = temporary_store.db.query("needs-text-relocation") # extract the .spack/binary_distribution file
assert len(specs) == 1 with tarfile.open(tgz_path) as tar:
manifest = get_buildfile_manifest(specs[0]) entry_name = next(x for x in tar.getnames() if x.endswith(".spack/binary_distribution"))
bd_file = tar.extractfile(entry_name)
manifest = syaml.load(bd_file)
assert join_path("bin", "exe") in manifest["text_to_relocate"] assert join_path("bin", "exe") in manifest["relocate_textfiles"]
assert join_path("bin", "otherexe") not in manifest["text_to_relocate"] assert join_path("bin", "otherexe") not in manifest["relocate_textfiles"]
assert join_path("bin", "secretexe") not in manifest["text_to_relocate"] assert join_path("bin", "secretexe") not in manifest["relocate_textfiles"]
def test_etag_fetching_304(): def test_etag_fetching_304():
@ -917,7 +878,7 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
tarball = str(tmp_path / "prefix.tar.gz") tarball = str(tmp_path / "prefix.tar.gz")
bindist._do_create_tarball( bindist._do_create_tarball(
tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"} tarfile_path=tarball, prefix=str(p), buildinfo={"metadata": "new"}, prefixes_to_relocate=[]
) )
expected_prefix = str(p).lstrip("/") expected_prefix = str(p).lstrip("/")
@ -926,7 +887,10 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path):
# and that the tarball contains the new one, not the old one. # and that the tarball contains the new one, not the old one.
with tarfile.open(tarball) as tar: with tarfile.open(tarball) as tar:
assert syaml.load(tar.extractfile(f"{expected_prefix}/.spack/binary_distribution")) == { assert syaml.load(tar.extractfile(f"{expected_prefix}/.spack/binary_distribution")) == {
"metadata": "new" "metadata": "new",
"relocate_binaries": [],
"relocate_textfiles": [],
"relocate_links": [],
} }
assert tar.getnames() == [ assert tar.getnames() == [
*_all_parents(expected_prefix), *_all_parents(expected_prefix),
@ -951,11 +915,15 @@ def test_reproducible_tarball_is_reproducible(tmp_path: Path):
# Create a tarball with a certain mtime of bin/app # Create a tarball with a certain mtime of bin/app
os.utime(app, times=(0, 0)) os.utime(app, times=(0, 0))
bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo) bindist._do_create_tarball(
tarball_1, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[]
)
# Do it another time with different mtime of bin/app # Do it another time with different mtime of bin/app
os.utime(app, times=(10, 10)) os.utime(app, times=(10, 10))
bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo) bindist._do_create_tarball(
tarball_2, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[]
)
# They should be bitwise identical: # They should be bitwise identical:
assert filecmp.cmp(tarball_1, tarball_2, shallow=False) assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
@ -1001,7 +969,7 @@ def test_tarball_normalized_permissions(tmpdir):
) as f: ) as f:
f.write("hello world") f.write("hello world")
bindist._do_create_tarball(tarball, binaries_dir=p.strpath, buildinfo={}) bindist._do_create_tarball(tarball, prefix=p.strpath, buildinfo={}, prefixes_to_relocate=[])
expected_prefix = p.strpath.lstrip("/") expected_prefix = p.strpath.lstrip("/")
@ -1120,7 +1088,7 @@ def test_tarfile_of_spec_prefix(tmpdir):
file = tmpdir.join("example.tar") file = tmpdir.join("example.tar")
with tarfile.open(file, mode="w") as tar: with tarfile.open(file, mode="w") as tar:
bindist.tarfile_of_spec_prefix(tar, prefix.strpath) bindist.tarfile_of_spec_prefix(tar, prefix.strpath, prefixes_to_relocate=[])
expected_prefix = prefix.strpath.lstrip("/") expected_prefix = prefix.strpath.lstrip("/")

View File

@ -36,8 +36,6 @@
macho_find_paths, macho_find_paths,
macho_make_paths_normal, macho_make_paths_normal,
macho_make_paths_relative, macho_make_paths_relative,
needs_binary_relocation,
needs_text_relocation,
relocate_links, relocate_links,
relocate_text, relocate_text,
) )
@ -193,16 +191,6 @@ def test_relocate_links(tmpdir):
assert readlink("to_self_but_relative") == "relative" assert readlink("to_self_but_relative") == "relative"
def test_needs_relocation():
assert needs_binary_relocation("application", "x-sharedlib")
assert needs_binary_relocation("application", "x-executable")
assert not needs_binary_relocation("application", "x-octet-stream")
assert not needs_binary_relocation("text", "x-")
assert needs_text_relocation("text", "x-")
assert not needs_text_relocation("symbolic link to", "x-")
assert needs_binary_relocation("application", "x-mach-binary")
def test_replace_paths(tmpdir): def test_replace_paths(tmpdir):
with tmpdir.as_cwd(): with tmpdir.as_cwd():
suffix = "dylib" if platform.system().lower() == "darwin" else "so" suffix = "dylib" if platform.system().lower() == "darwin" else "so"

View File

@ -10,7 +10,7 @@
import tarfile import tarfile
from contextlib import closing, contextmanager from contextlib import closing, contextmanager
from gzip import GzipFile from gzip import GzipFile
from typing import Callable, Dict, Tuple from typing import Callable, Dict, List, Tuple
from llnl.util.symlink import readlink from llnl.util.symlink import readlink
@ -130,6 +130,15 @@ def default_path_to_name(path: str) -> str:
return pathlib.PurePath(*p.parts[1:]).as_posix() if p.is_absolute() else p.as_posix() return pathlib.PurePath(*p.parts[1:]).as_posix() if p.is_absolute() else p.as_posix()
def default_add_file(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None:
with open(path, "rb") as f:
tar.addfile(file_info, f)
def default_add_link(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None:
tar.addfile(file_info)
def reproducible_tarfile_from_prefix( def reproducible_tarfile_from_prefix(
tar: tarfile.TarFile, tar: tarfile.TarFile,
prefix: str, prefix: str,
@ -137,6 +146,9 @@ def reproducible_tarfile_from_prefix(
include_parent_directories: bool = False, include_parent_directories: bool = False,
skip: Callable[[os.DirEntry], bool] = lambda entry: False, skip: Callable[[os.DirEntry], bool] = lambda entry: False,
path_to_name: Callable[[str], str] = default_path_to_name, path_to_name: Callable[[str], str] = default_path_to_name,
add_file: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_file,
add_symlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link,
add_hardlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link,
) -> None: ) -> None:
"""Create a tarball from a given directory. Only adds regular files, symlinks and dirs. """Create a tarball from a given directory. Only adds regular files, symlinks and dirs.
Skips devices, fifos. Preserves hardlinks. Normalizes permissions like git. Tar entries are Skips devices, fifos. Preserves hardlinks. Normalizes permissions like git. Tar entries are
@ -170,8 +182,10 @@ def reproducible_tarfile_from_prefix(
tar.addfile(dir_info) tar.addfile(dir_info)
dir_stack = [prefix] dir_stack = [prefix]
new_dirs: List[str] = []
while dir_stack: while dir_stack:
dir = dir_stack.pop() dir = dir_stack.pop()
new_dirs.clear()
# Add the dir before its contents # Add the dir before its contents
dir_info = tarfile.TarInfo(path_to_name(dir)) dir_info = tarfile.TarInfo(path_to_name(dir))
@ -183,7 +197,6 @@ def reproducible_tarfile_from_prefix(
with os.scandir(dir) as it: with os.scandir(dir) as it:
entries = sorted(it, key=lambda entry: entry.name) entries = sorted(it, key=lambda entry: entry.name)
new_dirs = []
for entry in entries: for entry in entries:
if skip(entry): if skip(entry):
continue continue
@ -201,7 +214,7 @@ def reproducible_tarfile_from_prefix(
# st_mode field of the stat structure is unspecified." So we set it to # st_mode field of the stat structure is unspecified." So we set it to
# something sensible without lstat'ing the link. # something sensible without lstat'ing the link.
file_info.mode = 0o755 file_info.mode = 0o755
tar.addfile(file_info) add_symlink(tar, file_info, entry.path)
elif entry.is_file(follow_symlinks=False): elif entry.is_file(follow_symlinks=False):
# entry.stat has zero (st_ino, st_dev, st_nlink) on Windows: use lstat. # entry.stat has zero (st_ino, st_dev, st_nlink) on Windows: use lstat.
@ -216,15 +229,13 @@ def reproducible_tarfile_from_prefix(
if ident in hardlink_to_tarinfo_name: if ident in hardlink_to_tarinfo_name:
file_info.type = tarfile.LNKTYPE file_info.type = tarfile.LNKTYPE
file_info.linkname = hardlink_to_tarinfo_name[ident] file_info.linkname = hardlink_to_tarinfo_name[ident]
tar.addfile(file_info) add_hardlink(tar, file_info, entry.path)
continue continue
hardlink_to_tarinfo_name[ident] = file_info.name hardlink_to_tarinfo_name[ident] = file_info.name
# If file not yet seen, copy it # If file not yet seen, copy it
file_info.type = tarfile.REGTYPE file_info.type = tarfile.REGTYPE
file_info.size = s.st_size file_info.size = s.st_size
add_file(tar, file_info, entry.path)
with open(entry.path, "rb") as f:
tar.addfile(file_info, f)
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical

View File

@ -10,62 +10,10 @@
import glob import glob
import os import os
import sys
from llnl.util import tty
from llnl.util.filesystem import edit_in_place_through_temporary_file from llnl.util.filesystem import edit_in_place_through_temporary_file
from llnl.util.lang import memoized
from spack.util.executable import Executable, which from spack.util.executable import Executable
def _ensure_file_on_win():
"""Ensures the file command is available on Windows
If not, it is bootstrapped.
No-op on all other platforms"""
if sys.platform != "win32":
return
import spack.bootstrap
with spack.bootstrap.ensure_bootstrap_configuration():
spack.bootstrap.ensure_file_in_path_or_raise()
@memoized
def file_command(*args):
"""Creates entry point to `file` system command with provided arguments"""
_ensure_file_on_win()
file_cmd = which("file", required=True)
for arg in args:
file_cmd.add_default_arg(arg)
return file_cmd
@memoized
def _get_mime_type():
"""Generate method to call `file` system command to aquire mime type
for a specified path
"""
if sys.platform == "win32":
# -h option (no-dereference) does not exist in Windows
return file_command("-b", "--mime-type")
else:
return file_command("-b", "-h", "--mime-type")
def mime_type(filename):
"""Returns the mime type and subtype of a file.
Args:
filename: file to be analyzed
Returns:
Tuple containing the MIME type and subtype
"""
output = _get_mime_type()(filename, output=str, error=str).strip()
tty.debug("==> " + output)
type, _, subtype = output.partition("/")
return type, subtype
def fix_darwin_install_name(path): def fix_darwin_install_name(path):

View File

@ -1,4 +0,0 @@
from spack.util.filesystem import file_command
if __name__ == "__main__":
file_command()