Fix multiple issues with Python in views (#42601)

This fixes bugs, performance issues, and removes no longer necessary code.

Short version:

1. Creating views from Python extensions would error if the Spack `opt` dir itself was in some symlinked directory. Use of `realpath` would expand those, and keying into `merge_map` would fail.
2. Creating views from Python extensions (and Python itself, potentially) could fail if the `bin/` dir contains symlinks pointing outside the package prefix -- Spack keyed into `merge_map[target_of_symlink]` incorrectly.
3. In the `python` package the `remove_files_from_view` function was broken after a breaking API change two years ago (#24355). However, the entire function body was redundant anyways, so solved it by removing it.
4. Notions of "global view" (i.e. python extensions being linked into Python's own prefix instead of into a view) are completely outdated, and removed. It used to be supported but was removed years ago.
5. Views for Python extension would _always_ copy non-symlinks in `./bin/*`, which is a big mistake, since all we care about is rewriting shebangs of scripts; we don't want to copy binaries. Now we first check if the file is executable, and then read two bytes to check if it has a shebang, and only if so, copy the entire file and patch up shebangs.

The bug fixes for (1) and (2) basically consist of getting rid of `realpath` entirely, and instead simply keep track of file identifiers of files that are copied/modified in the view. Only after patching up regular files do we iterate over symlinks and check if they target one of those. If so, retarget it to the modified file in the view.
This commit is contained in:
Harmen Stoppels 2024-02-12 19:52:52 +01:00 committed by GitHub
parent c33a8dc223
commit 519deac544
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 98 additions and 64 deletions

View File

@ -920,29 +920,35 @@ def get_filetype(path_name):
return output.strip() return output.strip()
@system_path_filter def has_shebang(path):
def is_nonsymlink_exe_with_shebang(path): """Returns whether a path has a shebang line. Returns False if the file cannot be opened."""
"""
Returns whether the path is an executable script with a shebang.
Return False when the path is a *symlink* to an executable script.
"""
try: try:
st = os.lstat(path)
# Should not be a symlink
if stat.S_ISLNK(st.st_mode):
return False
# Should be executable
if not st.st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH):
return False
# Should start with a shebang
with open(path, "rb") as f: with open(path, "rb") as f:
return f.read(2) == b"#!" return f.read(2) == b"#!"
except (IOError, OSError): except OSError:
return False return False
@system_path_filter
def is_nonsymlink_exe_with_shebang(path):
"""Returns whether the path is an executable regular file with a shebang. Returns False too
when the path is a symlink to a script, and also when the file cannot be opened."""
try:
st = os.lstat(path)
except OSError:
return False
# Should not be a symlink
if stat.S_ISLNK(st.st_mode):
return False
# Should be executable
if not st.st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH):
return False
return has_shebang(path)
@system_path_filter(arg_slice=slice(1)) @system_path_filter(arg_slice=slice(1))
def chgrp_if_not_world_writable(path, group): def chgrp_if_not_world_writable(path, group):
"""chgrp path to group if path is not world writable""" """chgrp path to group if path is not world writable"""

View File

@ -6,7 +6,8 @@
import os import os
import re import re
import shutil import shutil
from typing import Iterable, List, Mapping, Optional import stat
from typing import Dict, Iterable, List, Mapping, Optional, Tuple
import archspec import archspec
@ -136,31 +137,50 @@ def view_file_conflicts(self, view, merge_map):
return conflicts return conflicts
def add_files_to_view(self, view, merge_map, skip_if_exists=True): def add_files_to_view(self, view, merge_map, skip_if_exists=True):
if not self.extendee_spec: # Patch up shebangs to the python linked in the view only if python is built by Spack.
if not self.extendee_spec or self.extendee_spec.external:
return super().add_files_to_view(view, merge_map, skip_if_exists) return super().add_files_to_view(view, merge_map, skip_if_exists)
# We only patch shebangs in the bin directory.
copied_files: Dict[Tuple[int, int], str] = {} # File identifier -> source
delayed_links: List[Tuple[str, str]] = [] # List of symlinks from merge map
bin_dir = self.spec.prefix.bin bin_dir = self.spec.prefix.bin
python_prefix = self.extendee_spec.prefix python_prefix = self.extendee_spec.prefix
python_is_external = self.extendee_spec.external
global_view = fs.same_path(python_prefix, view.get_projection_for_spec(self.spec))
for src, dst in merge_map.items(): for src, dst in merge_map.items():
if os.path.exists(dst): if skip_if_exists and os.path.lexists(dst):
continue continue
elif global_view or not fs.path_contains_subdirectory(src, bin_dir):
if not fs.path_contains_subdirectory(src, bin_dir):
view.link(src, dst) view.link(src, dst)
elif not os.path.islink(src): continue
s = os.lstat(src)
# Symlink is delayed because we may need to re-target if its target is copied in view
if stat.S_ISLNK(s.st_mode):
delayed_links.append((src, dst))
continue
# If it's executable and has a shebang, copy and patch it.
if (s.st_mode & 0b111) and fs.has_shebang(src):
copied_files[(s.st_dev, s.st_ino)] = dst
shutil.copy2(src, dst) shutil.copy2(src, dst)
is_script = fs.is_nonsymlink_exe_with_shebang(src) fs.filter_file(
if is_script and not python_is_external: python_prefix, os.path.abspath(view.get_projection_for_spec(self.spec)), dst
fs.filter_file( )
python_prefix,
os.path.abspath(view.get_projection_for_spec(self.spec)), # Finally re-target the symlinks that point to copied files.
dst, for src, dst in delayed_links:
) try:
s = os.stat(src)
target = copied_files[(s.st_dev, s.st_ino)]
except (OSError, KeyError):
target = None
if target:
os.symlink(target, dst)
else: else:
orig_link_target = os.path.realpath(src) view.link(src, dst, spec=self.spec)
new_link_target = os.path.abspath(merge_map[orig_link_target])
view.link(new_link_target, dst)
def remove_files_from_view(self, view, merge_map): def remove_files_from_view(self, view, merge_map):
ignore_namespace = False ignore_namespace = False

View File

@ -8,10 +8,11 @@
import os import os
import platform import platform
import re import re
import stat
import subprocess import subprocess
import sys import sys
from shutil import copy from shutil import copy
from typing import Dict, List from typing import Dict, List, Tuple
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import is_nonsymlink_exe_with_shebang, path_contains_subdirectory from llnl.util.filesystem import is_nonsymlink_exe_with_shebang, path_contains_subdirectory
@ -1265,42 +1266,49 @@ def setup_dependent_package(self, module, dependent_spec):
module.python_purelib = join_path(dependent_spec.prefix, self.purelib) module.python_purelib = join_path(dependent_spec.prefix, self.purelib)
def add_files_to_view(self, view, merge_map, skip_if_exists=True): def add_files_to_view(self, view, merge_map, skip_if_exists=True):
# The goal is to copy the `python` executable, so that its search paths are relative to the
# view instead of the install prefix. This is an obsolete way of creating something that
# resembles a virtual environnent. Also we copy scripts with shebang lines. Finally we need
# to re-target symlinks pointing to copied files.
bin_dir = self.spec.prefix.bin if sys.platform != "win32" else self.spec.prefix bin_dir = self.spec.prefix.bin if sys.platform != "win32" else self.spec.prefix
copied_files: Dict[Tuple[int, int], str] = {} # File identifier -> source
delayed_links: List[Tuple[str, str]] = [] # List of symlinks from merge map
for src, dst in merge_map.items(): for src, dst in merge_map.items():
if skip_if_exists and os.path.lexists(dst):
continue
# Files not in the bin dir are linked the default way.
if not path_contains_subdirectory(src, bin_dir): if not path_contains_subdirectory(src, bin_dir):
view.link(src, dst, spec=self.spec) view.link(src, dst, spec=self.spec)
elif not os.path.islink(src): continue
copy(src, dst)
if is_nonsymlink_exe_with_shebang(src):
filter_file(
self.spec.prefix,
os.path.abspath(view.get_projection_for_spec(self.spec)),
dst,
backup=False,
)
else:
# orig_link_target = os.path.realpath(src) is insufficient when
# the spack install tree is located at a symlink or a
# descendent of a symlink. What we need here is the real
# relative path from the python prefix to src
# TODO: generalize this logic in the link_tree object
# add a method to resolve a link relative to the link_tree
# object root.
realpath_src = os.path.realpath(src)
realpath_prefix = os.path.realpath(self.spec.prefix)
realpath_rel = os.path.relpath(realpath_src, realpath_prefix)
orig_link_target = os.path.join(self.spec.prefix, realpath_rel)
new_link_target = os.path.abspath(merge_map[orig_link_target]) s = os.lstat(src)
view.link(new_link_target, dst, spec=self.spec)
def remove_files_from_view(self, view, merge_map): # Symlink is delayed because we may need to re-target if its target is copied in view
bin_dir = self.spec.prefix.bin if sys.platform != "win32" else self.spec.prefix if stat.S_ISLNK(s.st_mode):
for src, dst in merge_map.items(): delayed_links.append((src, dst))
if not path_contains_subdirectory(src, bin_dir): continue
view.remove_file(src, dst)
# Anything that's not a symlink gets copied. Scripts with shebangs are immediately
# updated when necessary.
copied_files[(s.st_dev, s.st_ino)] = dst
copy(src, dst)
if is_nonsymlink_exe_with_shebang(src):
filter_file(
self.spec.prefix, os.path.abspath(view.get_projection_for_spec(self.spec)), dst
)
# Finally re-target the symlinks that point to copied files.
for src, dst in delayed_links:
try:
s = os.stat(src)
target = copied_files[(s.st_dev, s.st_ino)]
except (OSError, KeyError):
target = None
if target:
os.symlink(target, dst)
else: else:
os.remove(dst) view.link(src, dst, spec=self.spec)
def test_hello_world(self): def test_hello_world(self):
"""run simple hello world program""" """run simple hello world program"""