Add in-place RPATH replacement (#27610)
Whenever the rpath string actually _grows_, it falls back to patchelf, when it stays the same length or gets shorter, we update it in-place, padded with null bytes. This PR only deals with absolute -> absolute rpath replacement. We don't use `_build_tarball(relative=True)` in our CI. If `relative` then it falls back to the old replacement code. With this PR, relocation time goes down significantly, likely because patchelf does some odd things with mmap, causing lots of overhead. Example: - `binutils`: 700MB installed, goes from `1.91s` to `0.57s`, or `3.4x` faster. Relocation time: 27% -> 10% of total install time - `llvm`: 6.8GB installed, goes from `28.56s` to `5.38`, or `5.3x` faster. Relocation time: 44% -> 13% of total install time The bottleneck is now decompression. Note: I'm somewhat confused about the "relative rpath" code paths. Right now this PR only deals with absolute -> absolute replacement. As far as I understand, if you embrace relative rpaths when uploading to the buildcache, the whole point is you _don't_ want to patch rpaths on install? So it seems fine to not expand `$ORIGIN` again imho.
This commit is contained in:
parent
b1f896e6c7
commit
1d4919924d
@ -1634,7 +1634,11 @@ def is_backup_file(file):
|
|||||||
old_prefix,
|
old_prefix,
|
||||||
new_prefix,
|
new_prefix,
|
||||||
)
|
)
|
||||||
if "elf" in platform.binary_formats:
|
elif "elf" in platform.binary_formats and not rel:
|
||||||
|
# The new ELF dynamic section relocation logic only handles absolute to
|
||||||
|
# absolute relocation.
|
||||||
|
relocate.new_relocate_elf_binaries(files_to_relocate, prefix_to_prefix_bin)
|
||||||
|
elif "elf" in platform.binary_formats and rel:
|
||||||
relocate.relocate_elf_binaries(
|
relocate.relocate_elf_binaries(
|
||||||
files_to_relocate,
|
files_to_relocate,
|
||||||
old_layout_root,
|
old_layout_root,
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import macholib.mach_o
|
import macholib.mach_o
|
||||||
import macholib.MachO
|
import macholib.MachO
|
||||||
@ -21,6 +22,7 @@
|
|||||||
import spack.platforms
|
import spack.platforms
|
||||||
import spack.repo
|
import spack.repo
|
||||||
import spack.spec
|
import spack.spec
|
||||||
|
import spack.util.elf as elf
|
||||||
import spack.util.executable as executable
|
import spack.util.executable as executable
|
||||||
|
|
||||||
is_macos = str(spack.platforms.real_host()) == "darwin"
|
is_macos = str(spack.platforms.real_host()) == "darwin"
|
||||||
@ -93,27 +95,15 @@ def _patchelf():
|
|||||||
def _elf_rpaths_for(path):
|
def _elf_rpaths_for(path):
|
||||||
"""Return the RPATHs for an executable or a library.
|
"""Return the RPATHs for an executable or a library.
|
||||||
|
|
||||||
The RPATHs are obtained by ``patchelf --print-rpath PATH``.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
path (str): full path to the executable or library
|
path (str): full path to the executable or library
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
RPATHs as a list of strings.
|
RPATHs as a list of strings. Returns an empty array
|
||||||
|
on ELF parsing errors, or when the ELF file simply
|
||||||
|
has no rpaths.
|
||||||
"""
|
"""
|
||||||
# If we're relocating patchelf itself, use it
|
return elf.get_rpaths(path) or []
|
||||||
patchelf_path = path if path.endswith("/bin/patchelf") else _patchelf()
|
|
||||||
patchelf = executable.Executable(patchelf_path)
|
|
||||||
|
|
||||||
output = ""
|
|
||||||
try:
|
|
||||||
output = patchelf("--print-rpath", path, output=str, error=str)
|
|
||||||
output = output.strip("\n")
|
|
||||||
except executable.ProcessError as e:
|
|
||||||
msg = "patchelf --print-rpath {0} produced an error [{1}]"
|
|
||||||
tty.warn(msg.format(path, str(e)))
|
|
||||||
|
|
||||||
return output.split(":") if output else []
|
|
||||||
|
|
||||||
|
|
||||||
def _make_relative(reference_file, path_root, paths):
|
def _make_relative(reference_file, path_root, paths):
|
||||||
@ -384,15 +374,13 @@ def _set_elf_rpaths(target, rpaths):
|
|||||||
"""Replace the original RPATH of the target with the paths passed
|
"""Replace the original RPATH of the target with the paths passed
|
||||||
as arguments.
|
as arguments.
|
||||||
|
|
||||||
This function uses ``patchelf`` to set RPATHs.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
target: target executable. Must be an ELF object.
|
target: target executable. Must be an ELF object.
|
||||||
rpaths: paths to be set in the RPATH
|
rpaths: paths to be set in the RPATH
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A string concatenating the stdout and stderr of the call
|
A string concatenating the stdout and stderr of the call
|
||||||
to ``patchelf``
|
to ``patchelf`` if it was invoked
|
||||||
"""
|
"""
|
||||||
# Join the paths using ':' as a separator
|
# Join the paths using ':' as a separator
|
||||||
rpaths_str = ":".join(rpaths)
|
rpaths_str = ":".join(rpaths)
|
||||||
@ -595,6 +583,23 @@ def _transform_rpaths(orig_rpaths, orig_root, new_prefixes):
|
|||||||
return new_rpaths
|
return new_rpaths
|
||||||
|
|
||||||
|
|
||||||
|
def new_relocate_elf_binaries(binaries, prefix_to_prefix):
|
||||||
|
"""Take a list of binaries, and an ordered dictionary of
|
||||||
|
prefix to prefix mapping, and update the rpaths accordingly."""
|
||||||
|
|
||||||
|
# Transform to binary string
|
||||||
|
prefix_to_prefix = OrderedDict(
|
||||||
|
(k.encode("utf-8"), v.encode("utf-8")) for (k, v) in prefix_to_prefix.items()
|
||||||
|
)
|
||||||
|
|
||||||
|
for path in binaries:
|
||||||
|
try:
|
||||||
|
elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix)
|
||||||
|
except elf.ElfDynamicSectionUpdateFailed as e:
|
||||||
|
# Fall back to the old `patchelf --set-rpath` method.
|
||||||
|
_set_elf_rpaths(path, e.new.decode("utf-8").split(":"))
|
||||||
|
|
||||||
|
|
||||||
def relocate_elf_binaries(
|
def relocate_elf_binaries(
|
||||||
binaries, orig_root, new_root, new_prefixes, rel, orig_prefix, new_prefix
|
binaries, orig_root, new_root, new_prefixes, rel, orig_prefix, new_prefix
|
||||||
):
|
):
|
||||||
|
@ -192,24 +192,6 @@ def test_file_is_relocatable_errors(tmpdir):
|
|||||||
assert "is not an absolute path" in str(exc_info.value)
|
assert "is not an absolute path" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"patchelf_behavior,expected",
|
|
||||||
[
|
|
||||||
("echo ", []),
|
|
||||||
("echo /opt/foo/lib:/opt/foo/lib64", ["/opt/foo/lib", "/opt/foo/lib64"]),
|
|
||||||
("exit 1", []),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_existing_rpaths(patchelf_behavior, expected, mock_patchelf):
|
|
||||||
# Here we are mocking an executable that is always called "patchelf"
|
|
||||||
# because that will skip the part where we try to build patchelf
|
|
||||||
# by ourselves. The executable will output some rpaths like
|
|
||||||
# `patchelf --print-rpath` would.
|
|
||||||
path = mock_patchelf(patchelf_behavior)
|
|
||||||
rpaths = spack.relocate._elf_rpaths_for(path)
|
|
||||||
assert rpaths == expected
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"start_path,path_root,paths,expected",
|
"start_path,path_root,paths,expected",
|
||||||
[
|
[
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@ -24,15 +25,6 @@ def skip_unless_linux(f):
|
|||||||
)(f)
|
)(f)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.requires_executables("gcc")
|
|
||||||
@skip_unless_linux
|
|
||||||
def test_elf_get_rpaths(binary_with_rpaths):
|
|
||||||
# Compile an "Hello world!" executable and set RPATHs
|
|
||||||
long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"]
|
|
||||||
executable = str(binary_with_rpaths(rpaths=long_rpaths))
|
|
||||||
assert elf.get_rpaths(executable) == long_rpaths
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.requires_executables("gcc")
|
@pytest.mark.requires_executables("gcc")
|
||||||
@skip_unless_linux
|
@skip_unless_linux
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -128,3 +120,48 @@ def test_parser_doesnt_deal_with_nonzero_offset():
|
|||||||
elf_at_offset_one.read(1)
|
elf_at_offset_one.read(1)
|
||||||
with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"):
|
with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"):
|
||||||
elf.parse_elf(elf_at_offset_one)
|
elf.parse_elf(elf_at_offset_one)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires_executables("gcc")
|
||||||
|
@skip_unless_linux
|
||||||
|
def test_elf_get_and_replace_rpaths(binary_with_rpaths):
|
||||||
|
long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
|
||||||
|
executable = str(binary_with_rpaths(rpaths=long_rpaths))
|
||||||
|
|
||||||
|
# Before
|
||||||
|
assert elf.get_rpaths(executable) == long_rpaths
|
||||||
|
|
||||||
|
replacements = OrderedDict(
|
||||||
|
[
|
||||||
|
(b"/very/long/prefix-a", b"/short-a"),
|
||||||
|
(b"/very/long/prefix-b", b"/short-b"),
|
||||||
|
(b"/very/long", b"/dont"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Replace once: should modify the file.
|
||||||
|
assert elf.replace_rpath_in_place_or_raise(executable, replacements)
|
||||||
|
|
||||||
|
# Replace twice: nothing to be done.
|
||||||
|
assert not elf.replace_rpath_in_place_or_raise(executable, replacements)
|
||||||
|
|
||||||
|
# Verify the rpaths were modified correctly
|
||||||
|
assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"]
|
||||||
|
|
||||||
|
# Going back to long rpaths should fail, since we've added trailing \0
|
||||||
|
# bytes, and replacement can't assume it can write back in repeated null
|
||||||
|
# bytes -- it may correspond to zero-length strings for example.
|
||||||
|
with pytest.raises(
|
||||||
|
elf.ElfDynamicSectionUpdateFailed,
|
||||||
|
match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is "
|
||||||
|
"longer than old rpath /short-a/x:/short-b/y",
|
||||||
|
):
|
||||||
|
elf.replace_rpath_in_place_or_raise(
|
||||||
|
executable,
|
||||||
|
OrderedDict(
|
||||||
|
[
|
||||||
|
(b"/short-a", b"/very/long/prefix-a"),
|
||||||
|
(b"/short-b", b"/very/long/prefix-b"),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
import bisect
|
import bisect
|
||||||
|
import re
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
@ -99,10 +100,6 @@ def get_byte_at(byte_array, idx):
|
|||||||
return byte_array[idx]
|
return byte_array[idx]
|
||||||
|
|
||||||
|
|
||||||
class ElfParsingError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ElfFile(object):
|
class ElfFile(object):
|
||||||
"""Parsed ELF file."""
|
"""Parsed ELF file."""
|
||||||
|
|
||||||
@ -121,6 +118,7 @@ class ElfFile(object):
|
|||||||
"has_pt_dynamic",
|
"has_pt_dynamic",
|
||||||
"pt_dynamic_p_offset",
|
"pt_dynamic_p_offset",
|
||||||
"pt_dynamic_p_filesz",
|
"pt_dynamic_p_filesz",
|
||||||
|
"pt_dynamic_strtab_offset", # string table for dynamic section
|
||||||
# rpath
|
# rpath
|
||||||
"has_rpath",
|
"has_rpath",
|
||||||
"dt_rpath_offset",
|
"dt_rpath_offset",
|
||||||
@ -359,7 +357,8 @@ def parse_pt_dynamic(f, elf):
|
|||||||
if not (elf.has_rpath or elf.has_soname or elf.has_needed):
|
if not (elf.has_rpath or elf.has_soname or elf.has_needed):
|
||||||
return
|
return
|
||||||
|
|
||||||
string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr))
|
elf.pt_dynamic_strtab_offset = vaddr_to_offset(elf, strtab_vaddr)
|
||||||
|
string_table = retrieve_strtab(f, elf, elf.pt_dynamic_strtab_offset)
|
||||||
|
|
||||||
if elf.has_needed:
|
if elf.has_needed:
|
||||||
elf.dt_needed_strs = list(
|
elf.dt_needed_strs = list(
|
||||||
@ -457,3 +456,79 @@ def get_rpaths(path):
|
|||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
rpath = rpath.decode("utf-8")
|
rpath = rpath.decode("utf-8")
|
||||||
return rpath.split(":")
|
return rpath.split(":")
|
||||||
|
|
||||||
|
|
||||||
|
def replace_rpath_in_place_or_raise(path, substitutions):
|
||||||
|
regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(path, "rb+") as f:
|
||||||
|
elf = parse_elf(f, interpreter=False, dynamic_section=True)
|
||||||
|
|
||||||
|
# If there's no RPATH, then there's no need to replace anything.
|
||||||
|
if not elf.has_rpath:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Get the non-empty rpaths. Sometimes there's a bunch of trailing
|
||||||
|
# colons ::::: used for padding, we don't add them back to make it
|
||||||
|
# more likely that the string doesn't grow.
|
||||||
|
rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
|
||||||
|
|
||||||
|
num_rpaths = len(rpaths)
|
||||||
|
|
||||||
|
if num_rpaths == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
for i in range(num_rpaths):
|
||||||
|
old_rpath = rpaths[i]
|
||||||
|
match = regex.match(old_rpath)
|
||||||
|
if match:
|
||||||
|
changed = True
|
||||||
|
rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
|
||||||
|
|
||||||
|
# Nothing to replace!
|
||||||
|
if not changed:
|
||||||
|
return False
|
||||||
|
|
||||||
|
new_rpath_string = b":".join(rpaths)
|
||||||
|
|
||||||
|
pad = len(elf.dt_rpath_str) - len(new_rpath_string)
|
||||||
|
|
||||||
|
if pad < 0:
|
||||||
|
raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string)
|
||||||
|
|
||||||
|
# We zero out the bits we shortened because (a) it should be a
|
||||||
|
# C-string and (b) it's nice not to have spurious parts of old
|
||||||
|
# paths in the output of `strings file`. Note that we're all
|
||||||
|
# good when pad == 0; the original terminating null is used.
|
||||||
|
new_rpath_string += b"\x00" * pad
|
||||||
|
|
||||||
|
# The rpath is at a given offset in the string table used by the
|
||||||
|
# dynamic section.
|
||||||
|
rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset
|
||||||
|
|
||||||
|
f.seek(rpath_offset)
|
||||||
|
f.write(new_rpath_string)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except ElfParsingError:
|
||||||
|
# This just means the file wasnt an elf file, so there's no point
|
||||||
|
# in updating its rpath anyways; ignore this problem.
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class ElfDynamicSectionUpdateFailed(Exception):
|
||||||
|
def __init__(self, old, new):
|
||||||
|
self.old = old
|
||||||
|
self.new = new
|
||||||
|
super(ElfDynamicSectionUpdateFailed, self).__init__(
|
||||||
|
"New rpath {} is longer than old rpath {}".format(
|
||||||
|
new.decode("utf-8"),
|
||||||
|
old.decode("utf-8"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ElfParsingError(Exception):
|
||||||
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user