diff --git a/.github/workflows/bootstrap.yml b/.github/workflows/bootstrap.yml index 90b31a098ae..dae7034193b 100644 --- a/.github/workflows/bootstrap.yml +++ b/.github/workflows/bootstrap.yml @@ -161,11 +161,7 @@ jobs: source share/spack/setup-env.sh spack -d gpg list tree $HOME/.spack/bootstrap/store/ - - name: Bootstrap File - run: | - source share/spack/setup-env.sh - spack -d python share/spack/qa/bootstrap-file.py - tree $HOME/.spack/bootstrap/store/ + windows: runs-on: "windows-latest" @@ -196,9 +192,3 @@ jobs: spack -d gpg list ./share/spack/qa/validate_last_exit.ps1 tree $env:userprofile/.spack/bootstrap/store/ - - name: Bootstrap File - run: | - ./share/spack/setup-env.ps1 - spack -d python share/spack/qa/bootstrap-file.py - ./share/spack/qa/validate_last_exit.ps1 - tree $env:userprofile/.spack/bootstrap/store/ diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml index aaf774026b7..d8c94d9a691 100644 --- a/.github/workflows/unit_tests.yaml +++ b/.github/workflows/unit_tests.yaml @@ -140,7 +140,7 @@ jobs: - name: Install dependencies run: | dnf install -y \ - bzip2 curl file gcc-c++ gcc gcc-gfortran git gnupg2 gzip \ + bzip2 curl gcc-c++ gcc gcc-gfortran git gnupg2 gzip \ make patch tcl unzip which xz - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Setup repo and non-root user diff --git a/lib/spack/docs/getting_started.rst b/lib/spack/docs/getting_started.rst index 696431fc74c..b0fa11054ca 100644 --- a/lib/spack/docs/getting_started.rst +++ b/lib/spack/docs/getting_started.rst @@ -35,7 +35,7 @@ A build matrix showing which packages are working on which systems is shown belo .. code-block:: console apt update - apt install bzip2 ca-certificates file g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd + apt install bzip2 ca-certificates g++ gcc gfortran git gzip lsb-release patch python3 tar unzip xz-utils zstd .. tab-item:: RHEL diff --git a/lib/spack/docs/tables/system_prerequisites.csv b/lib/spack/docs/tables/system_prerequisites.csv index 7a72078cdd6..403c058a9a8 100644 --- a/lib/spack/docs/tables/system_prerequisites.csv +++ b/lib/spack/docs/tables/system_prerequisites.csv @@ -8,7 +8,6 @@ unzip, , , Compress/Decompress archives bzip2, , , Compress/Decompress archives xz, , , Compress/Decompress archives zstd, , Optional, Compress/Decompress archives -file, , , Create/Use Buildcaches lsb-release, , , Linux: identify operating system version gnupg2, , , Sign/Verify Buildcaches git, , , Manage Software Repositories diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index 4913a50fad9..0950a39da16 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -863,8 +863,10 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]: if sys.version_info >= (3, 9): PatternStr = re.Pattern[str] + PatternBytes = re.Pattern[bytes] else: PatternStr = typing.Pattern[str] + PatternBytes = typing.Pattern[bytes] def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str: diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 98559162b13..57ccc6ec7de 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -24,13 +24,12 @@ import urllib.request import warnings from contextlib import closing -from typing import Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union +from typing import IO, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union import llnl.util.filesystem as fsys import llnl.util.lang import llnl.util.tty as tty -from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree -from llnl.util.symlink import readlink +from llnl.util.filesystem import mkdirp import spack.caches import spack.config as config @@ -54,7 +53,6 @@ import spack.util.archive import spack.util.crypto import spack.util.file_cache as file_cache -import spack.util.filesystem as ssys import spack.util.gpg import spack.util.parallel import spack.util.path @@ -587,129 +585,11 @@ def read_buildinfo_file(prefix): return syaml.load(f) -class BuildManifestVisitor(BaseDirectoryVisitor): - """Visitor that collects a list of files and symlinks - that can be checked for need of relocation. It knows how - to dedupe hardlinks and deal with symlinks to files and - directories.""" - - def __init__(self): - # Save unique identifiers of hardlinks to avoid relocating them multiple times - self.visited = set() - - # Lists of files we will check - self.files = [] - self.symlinks = [] - - def seen_before(self, root, rel_path): - stat_result = os.lstat(os.path.join(root, rel_path)) - if stat_result.st_nlink == 1: - return False - identifier = (stat_result.st_dev, stat_result.st_ino) - if identifier in self.visited: - return True - else: - self.visited.add(identifier) - return False - - def visit_file(self, root, rel_path, depth): - if self.seen_before(root, rel_path): - return - self.files.append(rel_path) - - def visit_symlinked_file(self, root, rel_path, depth): - # Note: symlinks *can* be hardlinked, but it is unclear if - # symlinks can be relinked in-place (preserving inode). - # Therefore, we do *not* de-dupe hardlinked symlinks. - self.symlinks.append(rel_path) - - def before_visit_dir(self, root, rel_path, depth): - return os.path.basename(rel_path) not in (".spack", "man") - - def before_visit_symlinked_dir(self, root, rel_path, depth): - # Treat symlinked directories simply as symlinks. - self.visit_symlinked_file(root, rel_path, depth) - # Never recurse into symlinked directories. - return False - - -def file_matches(path, regex): - with open(path, "rb") as f: - contents = f.read() - return bool(regex.search(contents)) - - -def get_buildfile_manifest(spec): - """ - Return a data structure with information about a build, including - text_to_relocate, binary_to_relocate, binary_to_relocate_fullpath - link_to_relocate, and other, which means it doesn't fit any of previous - checks (and should not be relocated). We exclude docs (man) and - metadata (.spack). This can be used to find a particular kind of file - in spack, or to generate the build metadata. - """ - data = { - "text_to_relocate": [], - "binary_to_relocate": [], - "link_to_relocate": [], - "other": [], - "binary_to_relocate_fullpath": [], - "hardlinks_deduped": True, - } - - # Guard against filesystem footguns of hardlinks and symlinks by using - # a visitor to retrieve a list of files and symlinks, so we don't have - # to worry about hardlinks of symlinked dirs and what not. - visitor = BuildManifestVisitor() - root = spec.prefix - visit_directory_tree(root, visitor) - - # Collect a list of prefixes for this package and it's dependencies, Spack will - # look for them to decide if text file needs to be relocated or not - prefixes = [d.prefix for d in spec.traverse(root=True, deptype="all") if not d.external] - prefixes.append(spack.hooks.sbang.sbang_install_path()) - prefixes.append(str(spack.store.STORE.layout.root)) - - # Create a giant regex that matches all prefixes - regex = utf8_paths_to_single_binary_regex(prefixes) - - # Symlinks. - - # Obvious bugs: - # 1. relative links are not relocated. - # 2. paths are used as strings. - for rel_path in visitor.symlinks: - abs_path = os.path.join(root, rel_path) - link = readlink(abs_path) - if os.path.isabs(link) and link.startswith(spack.store.STORE.layout.root): - data["link_to_relocate"].append(rel_path) - - # Non-symlinks. - for rel_path in visitor.files: - abs_path = os.path.join(root, rel_path) - m_type, m_subtype = ssys.mime_type(abs_path) - - if relocate.needs_binary_relocation(m_type, m_subtype): - # Why is this branch not part of needs_binary_relocation? :( - if ( - ( - m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable") - and sys.platform != "darwin" - ) - or (m_subtype in ("x-mach-binary") and sys.platform == "darwin") - or (not rel_path.endswith(".o")) - ): - data["binary_to_relocate"].append(rel_path) - data["binary_to_relocate_fullpath"].append(abs_path) - continue - - elif relocate.needs_text_relocation(m_type, m_subtype) and file_matches(abs_path, regex): - data["text_to_relocate"].append(rel_path) - continue - - data["other"].append(abs_path) - - return data +def file_matches(f: IO[bytes], regex: llnl.util.lang.PatternBytes) -> bool: + try: + return bool(regex.search(f.read())) + finally: + f.seek(0) def deps_to_relocate(spec): @@ -742,17 +622,15 @@ def deps_to_relocate(spec): def get_buildinfo_dict(spec): """Create metadata for a tarball""" - manifest = get_buildfile_manifest(spec) - return { "sbang_install_path": spack.hooks.sbang.sbang_install_path(), "buildpath": spack.store.STORE.layout.root, "spackprefix": spack.paths.prefix, "relative_prefix": os.path.relpath(spec.prefix, spack.store.STORE.layout.root), - "relocate_textfiles": manifest["text_to_relocate"], - "relocate_binaries": manifest["binary_to_relocate"], - "relocate_links": manifest["link_to_relocate"], - "hardlinks_deduped": manifest["hardlinks_deduped"], + # "relocate_textfiles": [], + # "relocate_binaries": [], + # "relocate_links": [], + "hardlinks_deduped": True, "hash_to_prefix": {d.dag_hash(): str(d.prefix) for d in deps_to_relocate(spec)}, } @@ -1042,7 +920,55 @@ def generate_key_index(key_prefix: str, tmpdir: str) -> None: ) from e -def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: +class FileTypes: + BINARY = 0 + TEXT = 1 + UNKNOWN = 2 + + +NOT_ISO8859_1_TEXT = re.compile(b"[\x00\x7F-\x9F]") + + +def file_type(f: IO[bytes]) -> int: + try: + # first check if this is an ELF or mach-o binary. + magic = f.read(8) + if len(magic) < 8: + return FileTypes.UNKNOWN + elif relocate.is_elf_magic(magic) or relocate.is_macho_magic(magic): + return FileTypes.BINARY + + f.seek(0) + + # Then try utf-8, which has a fast exponential decay in false positive rate with file size. + # Use chunked reads for fast early exit. + f_txt = io.TextIOWrapper(f, encoding="utf-8", errors="strict") + try: + while f_txt.read(1024): + pass + return FileTypes.TEXT + except UnicodeError: + f_txt.seek(0) + pass + finally: + f_txt.detach() + # Finally try iso-8859-1 heuristically. In Python, all possible 256 byte values are valid. + # We classify it as text if it does not contain any control characters / null bytes. + data = f.read(1024) + while data: + if NOT_ISO8859_1_TEXT.search(data): + break + data = f.read(1024) + else: + return FileTypes.TEXT + return FileTypes.UNKNOWN + finally: + f.seek(0) + + +def tarfile_of_spec_prefix( + tar: tarfile.TarFile, prefix: str, prefixes_to_relocate: List[str] +) -> dict: """Create a tarfile of an install prefix of a spec. Skips existing buildinfo file. Args: @@ -1058,6 +984,33 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: except OSError: skip = lambda entry: False + binary_regex = utf8_paths_to_single_binary_regex(prefixes_to_relocate) + + relocate_binaries = [] + relocate_links = [] + relocate_textfiles = [] + + # use callbacks to add files and symlinks, so we can register which files need relocation upon + # extraction. + def add_file(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str): + with open(path, "rb") as f: + relpath = os.path.relpath(path, prefix) + # no need to relocate anything in the .spack directory + if relpath.split(os.sep, 1)[0] == ".spack": + tar.addfile(info, f) + return + f_type = file_type(f) + if f_type == FileTypes.BINARY: + relocate_binaries.append(os.path.relpath(path, prefix)) + elif f_type == FileTypes.TEXT and file_matches(f, binary_regex): + relocate_textfiles.append(os.path.relpath(path, prefix)) + tar.addfile(info, f) + + def add_symlink(tar: tarfile.TarFile, info: tarfile.TarInfo, path: str): + if os.path.isabs(info.linkname) and binary_regex.match(info.linkname.encode("utf-8")): + relocate_links.append(os.path.relpath(path, prefix)) + tar.addfile(info) + spack.util.archive.reproducible_tarfile_from_prefix( tar, prefix, @@ -1065,29 +1018,51 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: # used in runtimes like AWS lambda. include_parent_directories=True, skip=skip, + add_file=add_file, + add_symlink=add_symlink, + ) + + return { + "relocate_binaries": relocate_binaries, + "relocate_links": relocate_links, + "relocate_textfiles": relocate_textfiles, + } + + +def create_tarball(spec: spack.spec.Spec, tarfile_path: str) -> Tuple[str, str]: + """Create a tarball of a spec and return the checksums of the compressed tarfile and the + uncompressed tarfile.""" + return _do_create_tarball( + tarfile_path, + spec.prefix, + buildinfo=get_buildinfo_dict(spec), + prefixes_to_relocate=prefixes_to_relocate(spec), ) -def _do_create_tarball(tarfile_path: str, binaries_dir: str, buildinfo: dict): +def _do_create_tarball( + tarfile_path: str, prefix: str, buildinfo: dict, prefixes_to_relocate: List[str] +) -> Tuple[str, str]: with spack.util.archive.gzip_compressed_tarfile(tarfile_path) as ( tar, - inner_checksum, - outer_checksum, + tar_gz_checksum, + tar_checksum, ): # Tarball the install prefix - tarfile_of_spec_prefix(tar, binaries_dir) + files_to_relocate = tarfile_of_spec_prefix(tar, prefix, prefixes_to_relocate) + buildinfo.update(files_to_relocate) # Serialize buildinfo for the tarball bstring = syaml.dump(buildinfo, default_flow_style=True).encode("utf-8") tarinfo = tarfile.TarInfo( - name=spack.util.archive.default_path_to_name(buildinfo_file_name(binaries_dir)) + name=spack.util.archive.default_path_to_name(buildinfo_file_name(prefix)) ) tarinfo.type = tarfile.REGTYPE tarinfo.size = len(bstring) tarinfo.mode = 0o644 tar.addfile(tarinfo, io.BytesIO(bstring)) - return inner_checksum.hexdigest(), outer_checksum.hexdigest() + return tar_gz_checksum.hexdigest(), tar_checksum.hexdigest() class ExistsInBuildcache(NamedTuple): @@ -1137,6 +1112,13 @@ def _exists_in_buildcache(spec: spack.spec.Spec, tmpdir: str, out_url: str) -> E return ExistsInBuildcache(signed, unsigned, tarball) +def prefixes_to_relocate(spec): + prefixes = [s.prefix for s in deps_to_relocate(spec)] + prefixes.append(spack.hooks.sbang.sbang_install_path()) + prefixes.append(str(spack.store.STORE.layout.root)) + return prefixes + + def _url_upload_tarball_and_specfile( spec: spack.spec.Spec, tmpdir: str, @@ -1146,7 +1128,7 @@ def _url_upload_tarball_and_specfile( ): files = BuildcacheFiles(spec, tmpdir, out_url) tarball = files.local_tarball() - checksum, _ = _do_create_tarball(tarball, spec.prefix, get_buildinfo_dict(spec)) + checksum, _ = create_tarball(spec, tarball) spec_dict = spec.to_dict(hash=ht.dag_hash) spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum} @@ -1470,13 +1452,11 @@ def _oci_push_pkg_blob( filename = os.path.join(tmpdir, f"{spec.dag_hash()}.tar.gz") # Create an oci.image.layer aka tarball of the package - compressed_tarfile_checksum, tarfile_checksum = _do_create_tarball( - filename, spec.prefix, get_buildinfo_dict(spec) - ) + tar_gz_checksum, tar_checksum = create_tarball(spec, filename) blob = spack.oci.oci.Blob( - Digest.from_sha256(compressed_tarfile_checksum), - Digest.from_sha256(tarfile_checksum), + Digest.from_sha256(tar_gz_checksum), + Digest.from_sha256(tar_checksum), os.path.getsize(filename), ) @@ -2435,6 +2415,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str): yield m +def extract_buildcache_tarball(tarfile_path: str, destination: str) -> None: + with closing(tarfile.open(tarfile_path, "r")) as tar: + # Remove common prefix from tarball entries and directly extract them to the install dir. + tar.extractall( + path=destination, members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)) + ) + + def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): """ extract binary tarball for given package into install area @@ -2504,12 +2492,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): tarfile_path, size, contents, "sha256", expected, local_checksum ) try: - with closing(tarfile.open(tarfile_path, "r")) as tar: - # Remove install prefix from tarfil to extract directly into spec.prefix - tar.extractall( - path=spec.prefix, - members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)), - ) + extract_buildcache_tarball(tarfile_path, destination=spec.prefix) except Exception: shutil.rmtree(spec.prefix, ignore_errors=True) _delete_staged_downloads(download_result) diff --git a/lib/spack/spack/bootstrap/__init__.py b/lib/spack/spack/bootstrap/__init__.py index d710caee68c..85935cd0e02 100644 --- a/lib/spack/spack/bootstrap/__init__.py +++ b/lib/spack/spack/bootstrap/__init__.py @@ -9,7 +9,6 @@ all_core_root_specs, ensure_clingo_importable_or_raise, ensure_core_dependencies, - ensure_file_in_path_or_raise, ensure_gpg_in_path_or_raise, ensure_patchelf_in_path_or_raise, ) @@ -20,7 +19,6 @@ "is_bootstrapping", "ensure_bootstrap_configuration", "ensure_core_dependencies", - "ensure_file_in_path_or_raise", "ensure_gpg_in_path_or_raise", "ensure_clingo_importable_or_raise", "ensure_patchelf_in_path_or_raise", diff --git a/lib/spack/spack/bootstrap/core.py b/lib/spack/spack/bootstrap/core.py index d396aaac687..5bf27e38846 100644 --- a/lib/spack/spack/bootstrap/core.py +++ b/lib/spack/spack/bootstrap/core.py @@ -481,19 +481,6 @@ def ensure_gpg_in_path_or_raise() -> None: ) -def file_root_spec() -> str: - """Return the root spec used to bootstrap file""" - root_spec_name = "win-file" if IS_WINDOWS else "file" - return _root_spec(root_spec_name) - - -def ensure_file_in_path_or_raise() -> None: - """Ensure file is in the PATH or raise""" - return ensure_executables_in_path_or_raise( - executables=["file"], abstract_spec=file_root_spec() - ) - - def patchelf_root_spec() -> str: """Return the root spec used to bootstrap patchelf""" # 0.13.1 is the last version not to require C++17. @@ -577,15 +564,13 @@ def ensure_core_dependencies() -> None: """Ensure the presence of all the core dependencies.""" if sys.platform.lower() == "linux": ensure_patchelf_in_path_or_raise() - elif sys.platform == "win32": - ensure_file_in_path_or_raise() ensure_gpg_in_path_or_raise() ensure_clingo_importable_or_raise() def all_core_root_specs() -> List[str]: """Return a list of all the core root specs that may be used to bootstrap Spack""" - return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec(), file_root_spec()] + return [clingo_root_spec(), gnupg_root_spec(), patchelf_root_spec()] def bootstrapping_sources(scope: Optional[str] = None): diff --git a/lib/spack/spack/bootstrap/status.py b/lib/spack/spack/bootstrap/status.py index 6d3270b42c9..4be7b3ce513 100644 --- a/lib/spack/spack/bootstrap/status.py +++ b/lib/spack/spack/bootstrap/status.py @@ -3,8 +3,8 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) """Query the status of bootstrapping on this machine""" -import platform -from typing import List, Optional, Sequence, Tuple, Union +import sys +from typing import Dict, List, Optional, Sequence, Tuple, Union import spack.util.executable @@ -72,7 +72,7 @@ def _core_requirements() -> List[RequiredResponseType]: "bzip2": _missing("bzip2", "required to compress/decompress code archives"), "git": _missing("git", "required to fetch/manage git repositories"), } - if platform.system().lower() == "linux": + if sys.platform == "linux": _core_system_exes["xz"] = _missing("xz", "required to compress/decompress code archives") # Executables that are not bootstrapped yet @@ -87,17 +87,16 @@ def _core_requirements() -> List[RequiredResponseType]: def _buildcache_requirements() -> List[RequiredResponseType]: - _buildcache_exes = { - "file": _missing("file", "required to analyze files for buildcaches", system_only=False), - ("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False), + _buildcache_exes: Dict[ExecutablesType, str] = { + ("gpg2", "gpg"): _missing("gpg2", "required to sign/verify buildcaches", False) } - if platform.system().lower() == "darwin": + if sys.platform == "darwin": _buildcache_exes["otool"] = _missing("otool", "required to relocate binaries") # Executables that are not bootstrapped yet result = [_required_system_executable(exe, msg) for exe, msg in _buildcache_exes.items()] - if platform.system().lower() == "linux": + if sys.platform == "linux": result.append( _required_executable( "patchelf", diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 525721ebb30..4158b4b3e9d 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -103,7 +103,7 @@ from spack.phase_callbacks import run_after, run_before from spack.spec import InvalidSpecDetected, Spec from spack.util.executable import * -from spack.util.filesystem import file_command, fix_darwin_install_name, mime_type +from spack.util.filesystem import fix_darwin_install_name from spack.variant import any_combination_of, auto_or_any_combination_of, disjoint_sets from spack.version import Version, ver diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index dda17a128e5..41a53ce1b30 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -23,7 +23,6 @@ import spack.store import spack.util.elf as elf import spack.util.executable as executable -import spack.util.filesystem as ssys from .relocate_text import BinaryFilePrefixReplacer, TextFilePrefixReplacer @@ -350,32 +349,6 @@ def _set_elf_rpaths_and_interpreter( return None -def needs_binary_relocation(m_type, m_subtype): - """Returns True if the file with MIME type/subtype passed as arguments - needs binary relocation, False otherwise. - - Args: - m_type (str): MIME type of the file - m_subtype (str): MIME subtype of the file - """ - subtypes = ("x-executable", "x-sharedlib", "x-mach-binary", "x-pie-executable") - if m_type == "application": - if m_subtype in subtypes: - return True - return False - - -def needs_text_relocation(m_type, m_subtype): - """Returns True if the file with MIME type/subtype passed as arguments - needs text relocation, False otherwise. - - Args: - m_type (str): MIME type of the file - m_subtype (str): MIME subtype of the file - """ - return m_type == "text" - - def relocate_macho_binaries( path_names, old_layout_root, new_layout_root, prefix_to_prefix, rel, old_prefix, new_prefix ): @@ -623,24 +596,32 @@ def relocate_text_bin(binaries, prefixes): return BinaryFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(binaries) -def is_binary(filename): - """Returns true if a file is binary, False otherwise +def is_macho_magic(magic: bytes) -> bool: + return ( + # In order of popularity: 64-bit mach-o le/be, 32-bit mach-o le/be. + magic.startswith(b"\xCF\xFA\xED\xFE") + or magic.startswith(b"\xFE\xED\xFA\xCF") + or magic.startswith(b"\xCE\xFA\xED\xFE") + or magic.startswith(b"\xFE\xED\xFA\xCE") + # universal binaries: 0xcafebabe be (most common?) or 0xbebafeca le (not sure if exists). + # Here we need to disambiguate mach-o and JVM class files. In mach-o the next 4 bytes are + # the number of binaries; in JVM class files it's the java version number. We assume there + # are less than 10 binaries in a universal binary. + or (magic.startswith(b"\xCA\xFE\xBA\xBE") and int.from_bytes(magic[4:8], "big") < 10) + or (magic.startswith(b"\xBE\xBA\xFE\xCA") and int.from_bytes(magic[4:8], "little") < 10) + ) - Args: - filename: file to be tested - Returns: - True or False - """ - m_type, _ = ssys.mime_type(filename) +def is_elf_magic(magic: bytes) -> bool: + return magic.startswith(b"\x7FELF") - msg = "[{0}] -> ".format(filename) - if m_type == "application": - tty.debug(msg + "BINARY FILE") - return True - tty.debug(msg + "TEXT FILE") - return False +def is_binary(filename: str) -> bool: + """Returns true iff a file is likely binary""" + with open(filename, "rb") as f: + magic = f.read(8) + + return is_macho_magic(magic) or is_elf_magic(magic) # Memoize this due to repeated calls to libraries in the same directory. @@ -649,6 +630,14 @@ def _exists_dir(dirname): return os.path.isdir(dirname) +def is_macho_binary(path): + try: + with open(path, "rb") as f: + return is_macho_magic(f.read(4)) + except OSError: + return False + + def fixup_macos_rpath(root, filename): """Apply rpath fixups to the given file. @@ -660,7 +649,8 @@ def fixup_macos_rpath(root, filename): True if fixups were applied, else False """ abspath = os.path.join(root, filename) - if ssys.mime_type(abspath) != ("application", "x-mach-binary"): + + if not is_macho_binary(abspath): return False # Get Mach-O header commands diff --git a/lib/spack/spack/rewiring.py b/lib/spack/spack/rewiring.py index ae7eb0a8d85..0b20c82b73d 100644 --- a/lib/spack/spack/rewiring.py +++ b/lib/spack/spack/rewiring.py @@ -4,12 +4,8 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os -import re import shutil import tempfile -from collections import OrderedDict - -from llnl.util.symlink import readlink, symlink import spack.binary_distribution as bindist import spack.deptypes as dt @@ -20,19 +16,6 @@ import spack.store -def _relocate_spliced_links(links, orig_prefix, new_prefix): - """Re-linking function which differs from `relocate.relocate_links` by - reading the old link rather than the new link, since the latter wasn't moved - in our case. This still needs to be called after the copy to destination - because it expects the new directory structure to be in place.""" - for link in links: - link_target = readlink(os.path.join(orig_prefix, link)) - link_target = re.sub("^" + orig_prefix, new_prefix, link_target) - new_link_path = os.path.join(new_prefix, link) - os.unlink(new_link_path) - symlink(link_target, new_link_path) - - def rewire(spliced_spec): """Given a spliced spec, this function conducts all the rewiring on all nodes in the DAG of that spec.""" @@ -54,13 +37,17 @@ def rewire_node(spec, explicit): the splice. The resulting package is then 'installed.'""" tempdir = tempfile.mkdtemp() - # copy anything installed to a temporary directory - shutil.copytree(spec.build_spec.prefix, os.path.join(tempdir, spec.dag_hash())) + # Copy spec.build_spec.prefix to spec.prefix through a temporary tarball + tarball = os.path.join(tempdir, f"{spec.dag_hash()}.tar.gz") + bindist.create_tarball(spec.build_spec, tarball) spack.hooks.pre_install(spec) + bindist.extract_buildcache_tarball(tarball, destination=spec.prefix) + buildinfo = bindist.read_buildinfo_file(spec.prefix) + # compute prefix-to-prefix for every node from the build spec to the spliced # spec - prefix_to_prefix = OrderedDict({spec.build_spec.prefix: spec.prefix}) + prefix_to_prefix = {spec.build_spec.prefix: spec.prefix} build_spec_ids = set(id(s) for s in spec.build_spec.traverse(deptype=dt.ALL & ~dt.BUILD)) for s in bindist.deps_to_relocate(spec): analog = s @@ -77,19 +64,17 @@ def rewire_node(spec, explicit): prefix_to_prefix[analog.prefix] = s.prefix - manifest = bindist.get_buildfile_manifest(spec.build_spec) platform = spack.platforms.by_name(spec.platform) text_to_relocate = [ - os.path.join(tempdir, spec.dag_hash(), rel_path) - for rel_path in manifest.get("text_to_relocate", []) + os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_textfiles"] ] if text_to_relocate: relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix) - + links = [os.path.join(spec.prefix, f) for f in buildinfo["relocate_links"]] + relocate.relocate_links(links, prefix_to_prefix) bins_to_relocate = [ - os.path.join(tempdir, spec.dag_hash(), rel_path) - for rel_path in manifest.get("binary_to_relocate", []) + os.path.join(spec.prefix, rel_path) for rel_path in buildinfo["relocate_binaries"] ] if bins_to_relocate: if "macho" in platform.binary_formats: @@ -113,22 +98,18 @@ def rewire_node(spec, explicit): spec.prefix, ) relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix) - # Copy package into place, except for spec.json (because spec.json - # describes the old spec and not the new spliced spec). - shutil.copytree( - os.path.join(tempdir, spec.dag_hash()), - spec.prefix, - ignore=shutil.ignore_patterns("spec.json", "install_manifest.json"), - ) - if manifest.get("link_to_relocate"): - _relocate_spliced_links( - manifest.get("link_to_relocate"), spec.build_spec.prefix, spec.prefix - ) shutil.rmtree(tempdir) - # Above, we did not copy spec.json: instead, here we write the new - # (spliced) spec into spec.json, without this, Database.add would fail on - # the next line (because it checks the spec.json in the prefix against the - # spec being added to look for mismatches) + install_manifest = os.path.join( + spec.prefix, + spack.store.STORE.layout.metadata_dir, + spack.store.STORE.layout.manifest_file_name, + ) + try: + os.unlink(install_manifest) + except FileNotFoundError: + pass + # Write the spliced spec into spec.json. Without this, Database.add would fail because it + # checks the spec.json in the prefix against the spec being added to look for mismatches spack.store.STORE.layout.write_spec(spec, spack.store.STORE.layout.spec_file_path(spec)) # add to database, not sure about explicit spack.store.STORE.db.add(spec, explicit=explicit) diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 4b179527619..1c863a0c555 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -23,7 +23,7 @@ import archspec.cpu -from llnl.util.filesystem import copy_tree, join_path, visit_directory_tree +from llnl.util.filesystem import copy_tree, join_path from llnl.util.symlink import readlink import spack.binary_distribution as bindist @@ -43,7 +43,7 @@ import spack.util.spack_yaml as syaml import spack.util.url as url_util import spack.util.web as web_util -from spack.binary_distribution import CannotListKeys, GenerateIndexError, get_buildfile_manifest +from spack.binary_distribution import CannotListKeys, GenerateIndexError from spack.directory_layout import DirectoryLayout from spack.paths import test_path from spack.spec import Spec @@ -623,60 +623,21 @@ def test_FetchCacheError_pretty_printing_single(): assert str_e.rstrip() == str_e -def test_build_manifest_visitor(tmpdir): - dir = "directory" - file = os.path.join("directory", "file") - - with tmpdir.as_cwd(): - # Create a file inside a directory - os.mkdir(dir) - with open(file, "wb") as f: - f.write(b"example file") - - # Symlink the dir - os.symlink(dir, "symlink_to_directory") - - # Symlink the file - os.symlink(file, "symlink_to_file") - - # Hardlink the file - os.link(file, "hardlink_of_file") - - # Hardlinked symlinks: seems like this is only a thing on Linux, - # on Darwin the symlink *target* is hardlinked, on Linux the - # symlink *itself* is hardlinked. - if sys.platform.startswith("linux"): - os.link("symlink_to_file", "hardlink_of_symlink_to_file") - os.link("symlink_to_directory", "hardlink_of_symlink_to_directory") - - visitor = bindist.BuildManifestVisitor() - visit_directory_tree(str(tmpdir), visitor) - - # We de-dupe hardlinks of files, so there should really be just one file - assert len(visitor.files) == 1 - - # We do not de-dupe symlinks, cause it's unclear how to update symlinks - # in-place, preserving inodes. - if sys.platform.startswith("linux"): - assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks. - else: - assert len(visitor.symlinks) == 2 - - with tmpdir.as_cwd(): - assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files) - assert all(os.path.islink(f) for f in visitor.symlinks) - - -def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, monkeypatch, capfd): +def test_text_relocate_if_needed(install_mockery, temporary_store, mock_fetch, tmp_path): install_cmd("needs-text-relocation") + spec = temporary_store.db.query_one("needs-text-relocation") + tgz_path = tmp_path / "relocatable.tar.gz" + bindist.create_tarball(spec, str(tgz_path)) - specs = temporary_store.db.query("needs-text-relocation") - assert len(specs) == 1 - manifest = get_buildfile_manifest(specs[0]) + # extract the .spack/binary_distribution file + with tarfile.open(tgz_path) as tar: + entry_name = next(x for x in tar.getnames() if x.endswith(".spack/binary_distribution")) + bd_file = tar.extractfile(entry_name) + manifest = syaml.load(bd_file) - assert join_path("bin", "exe") in manifest["text_to_relocate"] - assert join_path("bin", "otherexe") not in manifest["text_to_relocate"] - assert join_path("bin", "secretexe") not in manifest["text_to_relocate"] + assert join_path("bin", "exe") in manifest["relocate_textfiles"] + assert join_path("bin", "otherexe") not in manifest["relocate_textfiles"] + assert join_path("bin", "secretexe") not in manifest["relocate_textfiles"] def test_etag_fetching_304(): @@ -917,7 +878,7 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path): tarball = str(tmp_path / "prefix.tar.gz") bindist._do_create_tarball( - tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"} + tarfile_path=tarball, prefix=str(p), buildinfo={"metadata": "new"}, prefixes_to_relocate=[] ) expected_prefix = str(p).lstrip("/") @@ -926,7 +887,10 @@ def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path): # and that the tarball contains the new one, not the old one. with tarfile.open(tarball) as tar: assert syaml.load(tar.extractfile(f"{expected_prefix}/.spack/binary_distribution")) == { - "metadata": "new" + "metadata": "new", + "relocate_binaries": [], + "relocate_textfiles": [], + "relocate_links": [], } assert tar.getnames() == [ *_all_parents(expected_prefix), @@ -951,11 +915,15 @@ def test_reproducible_tarball_is_reproducible(tmp_path: Path): # Create a tarball with a certain mtime of bin/app os.utime(app, times=(0, 0)) - bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo) + bindist._do_create_tarball( + tarball_1, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[] + ) # Do it another time with different mtime of bin/app os.utime(app, times=(10, 10)) - bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo) + bindist._do_create_tarball( + tarball_2, prefix=str(p), buildinfo=buildinfo, prefixes_to_relocate=[] + ) # They should be bitwise identical: assert filecmp.cmp(tarball_1, tarball_2, shallow=False) @@ -1001,7 +969,7 @@ def test_tarball_normalized_permissions(tmpdir): ) as f: f.write("hello world") - bindist._do_create_tarball(tarball, binaries_dir=p.strpath, buildinfo={}) + bindist._do_create_tarball(tarball, prefix=p.strpath, buildinfo={}, prefixes_to_relocate=[]) expected_prefix = p.strpath.lstrip("/") @@ -1120,7 +1088,7 @@ def test_tarfile_of_spec_prefix(tmpdir): file = tmpdir.join("example.tar") with tarfile.open(file, mode="w") as tar: - bindist.tarfile_of_spec_prefix(tar, prefix.strpath) + bindist.tarfile_of_spec_prefix(tar, prefix.strpath, prefixes_to_relocate=[]) expected_prefix = prefix.strpath.lstrip("/") diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index aa7eed4ed66..e707fe25272 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -36,8 +36,6 @@ macho_find_paths, macho_make_paths_normal, macho_make_paths_relative, - needs_binary_relocation, - needs_text_relocation, relocate_links, relocate_text, ) @@ -193,16 +191,6 @@ def test_relocate_links(tmpdir): assert readlink("to_self_but_relative") == "relative" -def test_needs_relocation(): - assert needs_binary_relocation("application", "x-sharedlib") - assert needs_binary_relocation("application", "x-executable") - assert not needs_binary_relocation("application", "x-octet-stream") - assert not needs_binary_relocation("text", "x-") - assert needs_text_relocation("text", "x-") - assert not needs_text_relocation("symbolic link to", "x-") - assert needs_binary_relocation("application", "x-mach-binary") - - def test_replace_paths(tmpdir): with tmpdir.as_cwd(): suffix = "dylib" if platform.system().lower() == "darwin" else "so" diff --git a/lib/spack/spack/util/archive.py b/lib/spack/spack/util/archive.py index 950405887a4..62757e2d47e 100644 --- a/lib/spack/spack/util/archive.py +++ b/lib/spack/spack/util/archive.py @@ -10,7 +10,7 @@ import tarfile from contextlib import closing, contextmanager from gzip import GzipFile -from typing import Callable, Dict, Tuple +from typing import Callable, Dict, List, Tuple from llnl.util.symlink import readlink @@ -130,6 +130,15 @@ def default_path_to_name(path: str) -> str: return pathlib.PurePath(*p.parts[1:]).as_posix() if p.is_absolute() else p.as_posix() +def default_add_file(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None: + with open(path, "rb") as f: + tar.addfile(file_info, f) + + +def default_add_link(tar: tarfile.TarFile, file_info: tarfile.TarInfo, path: str) -> None: + tar.addfile(file_info) + + def reproducible_tarfile_from_prefix( tar: tarfile.TarFile, prefix: str, @@ -137,6 +146,9 @@ def reproducible_tarfile_from_prefix( include_parent_directories: bool = False, skip: Callable[[os.DirEntry], bool] = lambda entry: False, path_to_name: Callable[[str], str] = default_path_to_name, + add_file: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_file, + add_symlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link, + add_hardlink: Callable[[tarfile.TarFile, tarfile.TarInfo, str], None] = default_add_link, ) -> None: """Create a tarball from a given directory. Only adds regular files, symlinks and dirs. Skips devices, fifos. Preserves hardlinks. Normalizes permissions like git. Tar entries are @@ -170,8 +182,10 @@ def reproducible_tarfile_from_prefix( tar.addfile(dir_info) dir_stack = [prefix] + new_dirs: List[str] = [] while dir_stack: dir = dir_stack.pop() + new_dirs.clear() # Add the dir before its contents dir_info = tarfile.TarInfo(path_to_name(dir)) @@ -183,7 +197,6 @@ def reproducible_tarfile_from_prefix( with os.scandir(dir) as it: entries = sorted(it, key=lambda entry: entry.name) - new_dirs = [] for entry in entries: if skip(entry): continue @@ -201,7 +214,7 @@ def reproducible_tarfile_from_prefix( # st_mode field of the stat structure is unspecified." So we set it to # something sensible without lstat'ing the link. file_info.mode = 0o755 - tar.addfile(file_info) + add_symlink(tar, file_info, entry.path) elif entry.is_file(follow_symlinks=False): # entry.stat has zero (st_ino, st_dev, st_nlink) on Windows: use lstat. @@ -216,15 +229,13 @@ def reproducible_tarfile_from_prefix( if ident in hardlink_to_tarinfo_name: file_info.type = tarfile.LNKTYPE file_info.linkname = hardlink_to_tarinfo_name[ident] - tar.addfile(file_info) + add_hardlink(tar, file_info, entry.path) continue hardlink_to_tarinfo_name[ident] = file_info.name # If file not yet seen, copy it file_info.type = tarfile.REGTYPE file_info.size = s.st_size - - with open(entry.path, "rb") as f: - tar.addfile(file_info, f) + add_file(tar, file_info, entry.path) dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical diff --git a/lib/spack/spack/util/filesystem.py b/lib/spack/spack/util/filesystem.py index eac5c589188..8a2d41a6e59 100644 --- a/lib/spack/spack/util/filesystem.py +++ b/lib/spack/spack/util/filesystem.py @@ -10,62 +10,10 @@ import glob import os -import sys -from llnl.util import tty from llnl.util.filesystem import edit_in_place_through_temporary_file -from llnl.util.lang import memoized -from spack.util.executable import Executable, which - - -def _ensure_file_on_win(): - """Ensures the file command is available on Windows - If not, it is bootstrapped. - No-op on all other platforms""" - if sys.platform != "win32": - return - import spack.bootstrap - - with spack.bootstrap.ensure_bootstrap_configuration(): - spack.bootstrap.ensure_file_in_path_or_raise() - - -@memoized -def file_command(*args): - """Creates entry point to `file` system command with provided arguments""" - _ensure_file_on_win() - file_cmd = which("file", required=True) - for arg in args: - file_cmd.add_default_arg(arg) - return file_cmd - - -@memoized -def _get_mime_type(): - """Generate method to call `file` system command to aquire mime type - for a specified path - """ - if sys.platform == "win32": - # -h option (no-dereference) does not exist in Windows - return file_command("-b", "--mime-type") - else: - return file_command("-b", "-h", "--mime-type") - - -def mime_type(filename): - """Returns the mime type and subtype of a file. - - Args: - filename: file to be analyzed - - Returns: - Tuple containing the MIME type and subtype - """ - output = _get_mime_type()(filename, output=str, error=str).strip() - tty.debug("==> " + output) - type, _, subtype = output.partition("/") - return type, subtype +from spack.util.executable import Executable def fix_darwin_install_name(path): diff --git a/share/spack/qa/bootstrap-file.py b/share/spack/qa/bootstrap-file.py deleted file mode 100644 index 720bd99bbce..00000000000 --- a/share/spack/qa/bootstrap-file.py +++ /dev/null @@ -1,4 +0,0 @@ -from spack.util.filesystem import file_command - -if __name__ == "__main__": - file_command()