Fix relocation to avoid overwriting merged constant strings (#32253)

Compilers and linker optimize string constants for space by aliasing
them when one is a suffix of another. For gcc / binutils this happens
already at -O1, due to -fmerge-constants. This means that we have
to take care during relocation to always preserve a certain length
of the suffix of those prefixes that are C-strings. 

In this commit we pick length 7 as a safe suffix length, assuming the
suffix is typically the 7 characters from the hash (i.e. random), so
it's unlikely to alias with any string constant used in the sources.

In general we now pad shortened strings from the left with leading
dir seperators, but in the case of C-strings that are much shorter
and don't share a common suffix (due to projections), we do allow
shrinking the C-string, appending a null, and retaining the old part
of the prefix.

Also when rewiring, we ensure that the new hash preserves the last
7 bytes of the old hash.

Co-authored-by: Harmen Stoppels <harmenstoppels@gmail.com>
This commit is contained in:
Tom Scogland 2022-11-05 00:09:35 -10:00 committed by GitHub
parent 71d480515b
commit 3346c0918b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 324 additions and 86 deletions

View File

@ -1600,13 +1600,19 @@ def relocate_package(spec, allow_root):
install_path = spack.hooks.sbang.sbang_install_path()
prefix_to_prefix_text[old_sbang_install_path] = install_path
# First match specific prefix paths. Possibly the *local* install prefix
# of some dependency is in an upstream, so we cannot assume the original
# spack store root can be mapped uniformly to the new spack store root.
for orig_prefix, hash in prefix_to_hash.items():
prefix_to_prefix_text[orig_prefix] = hash_to_prefix.get(hash, None)
prefix_to_prefix_bin[orig_prefix] = hash_to_prefix.get(hash, None)
# Only then add the generic fallback of install prefix -> install prefix.
prefix_to_prefix_text[old_prefix] = new_prefix
prefix_to_prefix_bin[old_prefix] = new_prefix
prefix_to_prefix_text[old_layout_root] = new_layout_root
prefix_to_prefix_bin[old_layout_root] = new_layout_root
for orig_prefix, hash in prefix_to_hash.items():
prefix_to_prefix_text[orig_prefix] = hash_to_prefix.get(hash, None)
prefix_to_prefix_bin[orig_prefix] = hash_to_prefix.get(hash, None)
# This is vestigial code for the *old* location of sbang. Previously,
# sbang was a bash script, and it lived in the spack prefix. It is
# now a POSIX script that lives in the install prefix. Old packages

View File

@ -61,23 +61,30 @@ def __init__(self, file_path, old_len, new_len):
class BinaryTextReplaceError(spack.error.SpackError):
def __init__(self, old_path, new_path):
"""Raised when the new install path is longer than the
old one, so binary text replacement cannot occur.
def __init__(self, msg):
msg += (
" To fix this, compile with more padding "
"(config:install_tree:padded_length), or install to a shorter prefix."
)
super(BinaryTextReplaceError, self).__init__(msg)
Args:
old_path (str): original path to be substituted
new_path (str): candidate path for substitution
"""
msg = "New path longer than old path: binary text"
msg += " replacement not possible."
err_msg = "The new path %s" % new_path
err_msg += " is longer than the old path %s.\n" % old_path
err_msg += "Text replacement in binaries will not work.\n"
err_msg += "Create buildcache from an install path "
err_msg += "longer than new path."
super(BinaryTextReplaceError, self).__init__(msg, err_msg)
class CannotGrowString(BinaryTextReplaceError):
def __init__(self, old, new):
msg = "Cannot replace {!r} with {!r} because the new prefix is longer.".format(old, new)
super(CannotGrowString, self).__init__(msg)
class CannotShrinkCString(BinaryTextReplaceError):
def __init__(self, old, new, full_old_string):
# Just interpolate binary string to not risk issues with invalid
# unicode, which would be really bad user experience: error in error.
# We have no clue if we actually deal with a real C-string nor what
# encoding it has.
msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
old, new, full_old_string
)
super(CannotShrinkCString, self).__init__(msg)
@memoized
@ -451,43 +458,116 @@ def _replace_prefix_text(filename, compiled_prefixes):
f.truncate()
def _replace_prefix_bin(filename, byte_prefixes):
"""Replace all the occurrences of the old install prefix with a
new install prefix in binary files.
def apply_binary_replacements(f, prefix_to_prefix, suffix_safety_size=7):
"""
Given a file opened in rb+ mode, apply the string replacements as
specified by an ordered dictionary of prefix to prefix mappings. This
method takes special care of null-terminated C-strings. C-string constants
are problematic because compilers and linkers optimize readonly strings for
space by aliasing those that share a common suffix (only suffix since all
of them are null terminated). See https://github.com/spack/spack/pull/31739
and https://github.com/spack/spack/pull/32253 for details. Our logic matches
the original prefix with a ``suffix_safety_size + 1`` lookahead for null bytes.
If no null terminator is found, we simply pad with leading /, assuming that
it's a long C-string; the full C-string after replacement has a large suffix
in common with its original value.
If there *is* a null terminator we can do the same as long as the replacement
has a sufficiently long common suffix with the original prefix.
As a last resort when the replacement does not have a long enough common suffix,
we can try to shorten the string, but this only works if the new length is
sufficiently short (typically the case when going from large padding -> normal path)
If the replacement string is longer, or all of the above fails, we error out.
The new install prefix is prefixed with ``b'/'`` until the
lengths of the prefixes are the same.
Arguments:
f: file opened in rb+ mode
prefix_to_prefix (OrderedDict): OrderedDictionary where the keys are
bytes representing the old prefixes and the values are the new
suffix_safety_size (int): in case of null terminated strings, what size
of the suffix should remain to avoid aliasing issues?
"""
assert suffix_safety_size >= 0
assert f.tell() == 0
# Look for exact matches of our paths, and also look if there's a null terminator
# soon after (this covers the case where we search for /abc but match /abc/ with
# a trailing dir seperator).
regex = re.compile(
b"("
+ b"|".join(re.escape(p) for p in prefix_to_prefix.keys())
+ b")([^\0]{0,%d}\0)?" % suffix_safety_size
)
# We *could* read binary data in chunks to avoid loading all in memory,
# but it's nasty to deal with matches across boundaries, so let's stick to
# something simple.
for match in regex.finditer(f.read()):
# The matching prefix (old) and its replacement (new)
old = match.group(1)
new = prefix_to_prefix[old]
# Did we find a trailing null within a N + 1 bytes window after the prefix?
null_terminated = match.end(0) > match.end(1)
# Suffix string length, excluding the null byte
# Only makes sense if null_terminated
suffix_strlen = match.end(0) - match.end(1) - 1
# How many bytes are we shrinking our string?
bytes_shorter = len(old) - len(new)
# We can't make strings larger.
if bytes_shorter < 0:
raise CannotGrowString(old, new)
# If we don't know whether this is a null terminated C-string (we're looking
# only N + 1 bytes ahead), or if it is and we have a common suffix, we can
# simply pad with leading dir separators.
elif (
not null_terminated
or suffix_strlen >= suffix_safety_size # == is enough, but let's be defensive
or old[-suffix_safety_size + suffix_strlen :]
== new[-suffix_safety_size + suffix_strlen :]
):
replacement = b"/" * bytes_shorter + new
# If it *was* null terminated, all that matters is that we can leave N bytes
# of old suffix in place. Note that > is required since we also insert an
# additional null terminator.
elif bytes_shorter > suffix_safety_size:
replacement = new + match.group(2) # includes the trailing null
# Otherwise... we can't :(
else:
raise CannotShrinkCString(old, new, match.group()[:-1])
f.seek(match.start())
f.write(replacement)
def _replace_prefix_bin(filename, prefix_to_prefix):
"""Replace all the occurrences of the old prefix with a new prefix in binary
files. See :func:`~spack.relocate.apply_binary_replacements` for details.
Args:
filename (str): target binary file
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
binary strings of the old prefixes and the values are the new
binary prefixes
byte_prefixes (OrderedDict): ordered dictionary where the keys are
bytes representing the old prefixes and the values are the new
prefixes (all bytes utf-8 encoded)
"""
all_prefixes = re.compile(b"|".join(re.escape(prefix) for prefix in byte_prefixes.keys()))
def padded_replacement(old):
new = byte_prefixes[old]
pad = len(old) - len(new)
if pad < 0:
raise BinaryTextReplaceError(old, new)
return new + b"/" * pad
with open(filename, "rb+") as f:
# Register what replacement string to put on what offsets in the file.
replacements_at_offset = [
(padded_replacement(m.group(0)), m.start())
for m in re.finditer(all_prefixes, f.read())
]
# Apply the replacements
for replacement, offset in replacements_at_offset:
f.seek(offset)
f.write(replacement)
apply_binary_replacements(f, prefix_to_prefix)
def relocate_macho_binaries(
path_names, old_layout_root, new_layout_root, prefix_to_prefix, rel, old_prefix, new_prefix
path_names,
old_layout_root,
new_layout_root,
prefix_to_prefix,
rel,
old_prefix,
new_prefix,
):
"""
Use macholib python package to get the rpaths, depedent libraries

View File

@ -1741,7 +1741,12 @@ def spec_hash(self, hash):
return hash.override(self)
node_dict = self.to_node_dict(hash=hash)
json_text = sjson.dump(node_dict)
return spack.util.hash.b32_hash(json_text)
# This implements "frankenhashes", preserving the last 7 characters of the
# original hash when splicing so that we can avoid relocation issues
out = spack.util.hash.b32_hash(json_text)
if self.build_spec is not self:
return out[:-7] + self.build_spec.spec_hash(hash)[-7:]
return out
def _cached_hash(self, hash, length=None, force=False):
"""Helper function for storing a cached hash on the spec.

View File

@ -1643,7 +1643,7 @@ def mock_executable(tmpdir):
"""
import jinja2
shebang = "#!/bin/bash\n" if not is_windows else "@ECHO OFF"
shebang = "#!/bin/sh\n" if not is_windows else "@ECHO OFF"
def _factory(name, output, subdir=("bin",)):
f = tmpdir.ensure(*subdir, dir=True).join(name)
@ -1808,14 +1808,24 @@ def mock_tty_stdout(monkeypatch):
monkeypatch.setattr(sys.stdout, "isatty", lambda: True)
@pytest.fixture
def prefix_like():
return "package-0.0.0.a1-hashhashhashhashhashhashhashhash"
@pytest.fixture()
def binary_with_rpaths(tmpdir):
def prefix_tmpdir(tmpdir, prefix_like):
return tmpdir.mkdir(prefix_like)
@pytest.fixture()
def binary_with_rpaths(prefix_tmpdir):
"""Factory fixture that compiles an ELF binary setting its RPATH. Relative
paths are encoded with `$ORIGIN` prepended.
"""
def _factory(rpaths, message="Hello world!"):
source = tmpdir.join("main.c")
source = prefix_tmpdir.join("main.c")
source.write(
"""
#include <stdio.h>

View File

@ -2,6 +2,7 @@
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import io
import os
import os.path
import re
@ -141,13 +142,13 @@ def _factory():
@pytest.fixture()
def copy_binary():
def copy_binary(prefix_like):
"""Returns a function that copies a binary somewhere and
returns the new location.
"""
def _copy_somewhere(orig_binary):
new_root = orig_binary.mkdtemp()
new_root = orig_binary.mkdtemp().mkdir(prefix_like)
new_binary = new_root.join("main.x")
shutil.copy(str(orig_binary), str(new_binary))
return new_binary
@ -261,29 +262,33 @@ def test_set_elf_rpaths_warning(mock_patchelf):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_replace_prefix_bin(binary_with_rpaths):
def test_replace_prefix_bin(binary_with_rpaths, prefix_like):
prefix = "/usr/" + prefix_like
prefix_bytes = prefix.encode("utf-8")
new_prefix = "/foo/" + prefix_like
new_prefix_bytes = new_prefix.encode("utf-8")
# Compile an "Hello world!" executable and set RPATHs
executable = binary_with_rpaths(rpaths=["/usr/lib", "/usr/lib64"])
executable = binary_with_rpaths(rpaths=[prefix + "/lib", prefix + "/lib64"])
# Relocate the RPATHs
spack.relocate._replace_prefix_bin(str(executable), {b"/usr": b"/foo"})
spack.relocate._replace_prefix_bin(str(executable), {prefix_bytes: new_prefix_bytes})
# Some compilers add rpaths so ensure changes included in final result
assert "/foo/lib:/foo/lib64" in rpaths_for(executable)
assert "%s/lib:%s/lib64" % (new_prefix, new_prefix) in rpaths_for(executable)
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_relocate_elf_binaries_absolute_paths(binary_with_rpaths, copy_binary, tmpdir):
def test_relocate_elf_binaries_absolute_paths(binary_with_rpaths, copy_binary, prefix_tmpdir):
# Create an executable, set some RPATHs, copy it to another location
orig_binary = binary_with_rpaths(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"])
orig_binary = binary_with_rpaths(rpaths=[str(prefix_tmpdir.mkdir("lib")), "/usr/lib64"])
new_binary = copy_binary(orig_binary)
spack.relocate.relocate_elf_binaries(
binaries=[str(new_binary)],
orig_root=str(orig_binary.dirpath()),
new_root=None, # Not needed when relocating absolute paths
new_prefixes={str(tmpdir): "/foo"},
new_prefixes={str(orig_binary.dirpath()): "/foo"},
rel=False,
# Not needed when relocating absolute paths
orig_prefix=None,
@ -317,9 +322,13 @@ def test_relocate_elf_binaries_relative_paths(binary_with_rpaths, copy_binary):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, tmpdir):
def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, prefix_tmpdir):
orig_binary = binary_with_rpaths(
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"]
rpaths=[
str(prefix_tmpdir.mkdir("lib")),
str(prefix_tmpdir.mkdir("lib64")),
"/opt/local/lib",
]
)
new_binary = copy_binary(orig_binary)
@ -339,15 +348,19 @@ def test_raise_if_not_relocatable(monkeypatch):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_relocate_text_bin(binary_with_rpaths, copy_binary, tmpdir):
def test_relocate_text_bin(binary_with_rpaths, copy_binary, prefix_tmpdir):
orig_binary = binary_with_rpaths(
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"],
message=str(tmpdir),
rpaths=[
str(prefix_tmpdir.mkdir("lib")),
str(prefix_tmpdir.mkdir("lib64")),
"/opt/local/lib",
],
message=str(prefix_tmpdir),
)
new_binary = copy_binary(orig_binary)
# Check original directory is in the executabel and the new one is not
assert text_in_bin(str(tmpdir), new_binary)
# Check original directory is in the executable and the new one is not
assert text_in_bin(str(prefix_tmpdir), new_binary)
assert not text_in_bin(str(new_binary.dirpath()), new_binary)
# Check this call succeed
@ -358,7 +371,7 @@ def test_relocate_text_bin(binary_with_rpaths, copy_binary, tmpdir):
# Check original directory is not there anymore and it was
# substituted with the new one
assert not text_in_bin(str(tmpdir), new_binary)
assert not text_in_bin(str(prefix_tmpdir), new_binary)
assert text_in_bin(str(new_binary.dirpath()), new_binary)
@ -450,30 +463,144 @@ def test_utf8_paths_to_single_binary_regex():
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
def test_ordered_replacement(tmpdir):
def test_ordered_replacement():
# This tests whether binary text replacement respects order, so that
# a long package prefix is replaced before a shorter sub-prefix like
# the root of the spack store (as a fallback).
def replace_and_expect(prefix_map, before, after):
file = str(tmpdir.join("file"))
with open(file, "wb") as f:
f.write(before)
spack.relocate._replace_prefix_bin(file, prefix_map)
with open(file, "rb") as f:
assert f.read() == after
def replace_and_expect(prefix_map, before, after=None, suffix_safety_size=7):
f = io.BytesIO(before)
spack.relocate.apply_binary_replacements(f, OrderedDict(prefix_map), suffix_safety_size)
f.seek(0)
assert f.read() == after
# The case of having a non-null terminated common suffix.
replace_and_expect(
OrderedDict(
[(b"/old-spack/opt/specific-package", b"/first"), (b"/old-spack/opt", b"/second")]
),
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with /first///////////////////////// and /second///////",
b"Binary with /////////first/specific-package and /sec/spack/opt",
suffix_safety_size=7,
)
# The case of having a direct null terminated common suffix.
replace_and_expect(
OrderedDict(
[(b"/old-spack/opt", b"/second"), (b"/old-spack/opt/specific-package", b"/first")]
),
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with /second////////specific-package and /second///////",
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with /////////first/specific-package\0 and /sec/spack/opt\0",
suffix_safety_size=7,
)
# Testing the order of operations (not null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with ///s/spack/opt/specific-package and ///s/spack/opt",
suffix_safety_size=7,
)
# Testing the order of operations (null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with ///s/spack/opt/specific-package\0 and ///s/spack/opt\0",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix long enough
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXage")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with ///////////opt/specific-XXXXage/sub\0 data",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short, but
# shortening is enough to spare more than 7 bytes of old suffix.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /opt/specific-XXXXXge/sub\0ckage/sub\0 data", # ckage/sub = 9 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening leaves exactly 7 suffix bytes untouched, amazing!
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/spack/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /spack/specific-XXXXXge/sub\0age/sub\0 data", # age/sub = 7 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening doesn't leave space for 7 bytes, sad!
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"/old-spack/opt/specific-package",
b"/snacks/specific-XXXXXge",
b"/old-spack/opt/specific-package/sub",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
# expect failure!
suffix_safety_size=7,
)
# Check that it works when changing suffix_safety_size.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXXe")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /snacks/specific-XXXXXXe/sub\0ge/sub\0 data",
suffix_safety_size=6,
)
# Finally check the case of no shortening but a long enough common suffix.
replace_and_expect(
[(b"pkg-gwixwaalgczp6", b"pkg-zkesfralgczp6")],
b"Binary with pkg-gwixwaalgczp6/config\0 data",
b"Binary with pkg-zkesfralgczp6/config\0 data",
suffix_safety_size=7,
)
# Too short matching suffix, identical string length
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-gwixwaxlgczp6",
b"pkg-zkesfrzlgczp6",
b"pkg-gwixwaxlgczp6",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-gwixwaxlgczp6", b"pkg-zkesfrzlgczp6")],
b"Binary with pkg-gwixwaxlgczp6\0 data",
# expect failure
suffix_safety_size=7,
)
# Finally, make sure that the regex is not greedily finding the LAST null byte
# it should find the first null byte in the window. In this test we put one null
# at a distance where we cant keep a long enough suffix, and one where we can,
# so we should expect failure when the first null is used.
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-abcdef",
b"pkg-xyzabc",
b"pkg-abcdef",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-abcdef", b"pkg-xyzabc")],
b"Binary with pkg-abcdef\0/xx\0", # def\0/xx is 7 bytes.
# expect failure
suffix_safety_size=7,
)

View File

@ -18,7 +18,7 @@
if sys.platform == "darwin":
args.extend(["/usr/bin/clang++", "install_name_tool"])
else:
args.extend(["/usr/bin/g++", "patchelf"])
args.extend(["g++", "patchelf"])
@pytest.mark.requires_executables(*args)

View File

@ -83,7 +83,12 @@ class Garply
f.write(garply_cc % prefix.config)
with open("%s/garply/garplinator.cc" % self.stage.source_path, "w") as f:
f.write(garplinator_cc)
gpp = which("/usr/bin/g++")
gpp = which(
"g++",
path=":".join(
[s for s in os.environ["PATH"].split(os.pathsep) if "lib/spack/env" not in s]
),
)
if sys.platform == "darwin":
gpp = which("/usr/bin/clang++")
gpp(

View File

@ -97,7 +97,12 @@ class Quux
f.write(quux_h)
with open("%s/quux/quuxifier.cc" % self.stage.source_path, "w") as f:
f.write(quuxifier_cc)
gpp = which("/usr/bin/g++")
gpp = which(
"g++",
path=":".join(
[s for s in os.environ["PATH"].split(os.pathsep) if "lib/spack/env" not in s]
),
)
if sys.platform == "darwin":
gpp = which("/usr/bin/clang++")
gpp(