Compare commits

...

1 Commits

Author SHA1 Message Date
Greg Becker
719d31682f speed up relocation using memoized offsets
refactor to do scanning in a single pass
parallelize new relocate method with threadpool
relocate_by_offsets can recompute if offsets not memoized
2022-06-03 12:19:01 +02:00
5 changed files with 186 additions and 124 deletions

View File

@@ -624,10 +624,14 @@ def get_buildfile_manifest(spec):
"""
data = {"text_to_relocate": [], "binary_to_relocate": [],
"link_to_relocate": [], "other": [],
"binary_to_relocate_fullpath": []}
"binary_to_relocate_fullpath": [], "offsets": {}}
blacklist = (".spack", "man")
# Get all the paths we will want to relocate in binaries
paths_to_relocate = [s.prefix for s in spec.traverse(root=True)]
paths_to_relocate.append(spack.store.layout.root)
# Do this at during tarball creation to save time when tarball unpacked.
# Used by make_package_relative to determine binaries to change.
for root, dirs, files in os.walk(spec.prefix, topdown=True):
@@ -662,6 +666,11 @@ def get_buildfile_manifest(spec):
(m_subtype in ('x-mach-binary')
and sys.platform == 'darwin') or
(not filename.endswith('.o'))):
# Last path to relocate is the layout root, which is a substring
# of the others
indices = relocate.compute_indices(path_name, paths_to_relocate)
data['offsets'][rel_path_name] = indices
data['binary_to_relocate'].append(rel_path_name)
data['binary_to_relocate_fullpath'].append(path_name)
added = True
@@ -700,6 +709,7 @@ def write_buildinfo_file(spec, workdir, rel=False):
buildinfo['relocate_binaries'] = manifest['binary_to_relocate']
buildinfo['relocate_links'] = manifest['link_to_relocate']
buildinfo['prefix_to_hash'] = prefix_to_hash
buildinfo['offsets'] = manifest['offsets']
filename = buildinfo_file_name(workdir)
with open(filename, 'w') as outfile:
outfile.write(syaml.dump(buildinfo, default_flow_style=True))
@@ -1473,11 +1483,25 @@ def is_backup_file(file):
# If we are not installing back to the same install tree do the relocation
if old_prefix != new_prefix:
files_to_relocate = [os.path.join(workdir, filename)
for filename in buildinfo.get('relocate_binaries')
]
# Relocate links to the new install prefix
links = [link for link in buildinfo.get('relocate_links', [])]
relocate.relocate_links(
links, old_layout_root, old_prefix, new_prefix
)
# For all buildcaches
# relocate the install prefixes in text files including dependencies
relocate.relocate_text(text_names, prefix_to_prefix_text)
# If the buildcache was not created with relativized rpaths
# do the relocation of path in binaries
# do the relocation of rpaths in binaries
# TODO: Is this necessary? How are null-terminated strings handled
# in the rpath header?
files_to_relocate = [
os.path.join(workdir, filename)
for filename in buildinfo.get('relocate_binaries')
]
platform = spack.platforms.by_name(spec.platform)
if 'macho' in platform.binary_formats:
relocate.relocate_macho_binaries(files_to_relocate,
@@ -1493,25 +1517,11 @@ def is_backup_file(file):
prefix_to_prefix_bin, rel,
old_prefix,
new_prefix)
# Relocate links to the new install prefix
links = [link for link in buildinfo.get('relocate_links', [])]
relocate.relocate_links(
links, old_layout_root, old_prefix, new_prefix
)
# For all buildcaches
# relocate the install prefixes in text files including dependencies
relocate.relocate_text(text_names, prefix_to_prefix_text)
paths_to_relocate = [old_prefix, old_layout_root]
paths_to_relocate.extend(prefix_to_hash.keys())
files_to_relocate = list(filter(
lambda pathname: not relocate.file_is_relocatable(
pathname, paths_to_relocate=paths_to_relocate),
map(lambda filename: os.path.join(workdir, filename),
buildinfo['relocate_binaries'])))
# relocate the install prefixes in binary files including dependencies
relocate.relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
# If offsets is None, we will recompute offsets when needed
offsets = buildinfo.get('offsets', None)
relocate.relocate_text_bin(
files_to_relocate, prefix_to_prefix_bin, offsets, workdir)
# If we are installing back to the same location
# relocate the sbang location if the spack directory changed

View File

@@ -95,10 +95,7 @@ def view_copy(src, dst, view, spec=None):
view.get_projection_for_spec(dep)
if spack.relocate.is_binary(dst):
spack.relocate.relocate_text_bin(
binaries=[dst],
prefixes=prefix_to_projection
)
spack.relocate.relocate_text_bin([dst], prefix_to_projection)
else:
prefix_to_projection[spack.store.layout.root] = view._root
prefix_to_projection[orig_sbang] = new_sbang

View File

@@ -469,47 +469,6 @@ def _replace_prefix_text(filename, compiled_prefixes):
f.truncate()
def _replace_prefix_bin(filename, byte_prefixes):
"""Replace all the occurrences of the old install prefix with a
new install prefix in binary files.
The new install prefix is prefixed with ``os.sep`` until the
lengths of the prefixes are the same.
Args:
filename (str): target binary file
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
precompiled regex of the old prefixes and the values are the new
prefixes (uft-8 encoded)
"""
with open(filename, 'rb+') as f:
data = f.read()
f.seek(0)
for orig_bytes, new_bytes in byte_prefixes.items():
original_data_len = len(data)
# Skip this hassle if not found
if orig_bytes not in data:
continue
# We only care about this problem if we are about to replace
length_compatible = len(new_bytes) <= len(orig_bytes)
if not length_compatible:
tty.debug('Binary failing to relocate is %s' % filename)
raise BinaryTextReplaceError(orig_bytes, new_bytes)
pad_length = len(orig_bytes) - len(new_bytes)
padding = os.sep * pad_length
padding = padding.encode('utf-8')
data = data.replace(orig_bytes, new_bytes + padding)
# Really needs to be the same length
if not len(data) == original_data_len:
print('Length of pad:', pad_length, 'should be', len(padding))
print(new_bytes, 'was to replace', orig_bytes)
raise BinaryStringReplacementError(
filename, original_data_len, len(data))
f.write(data)
f.truncate()
def relocate_macho_binaries(path_names, old_layout_root, new_layout_root,
prefix_to_prefix, rel, old_prefix, new_prefix):
"""
@@ -817,49 +776,6 @@ def relocate_text(files, prefixes, concurrency=32):
tp.join()
def relocate_text_bin(binaries, prefixes, concurrency=32):
"""Replace null terminated path strings hard coded into binaries.
The new install prefix must be shorter than the original one.
Args:
binaries (list): binaries to be relocated
prefixes (OrderedDict): String prefixes which need to be changed.
concurrency (int): Desired degree of parallelism.
Raises:
BinaryTextReplaceError: when the new path is longer than the old path
"""
byte_prefixes = collections.OrderedDict({})
for orig_prefix, new_prefix in prefixes.items():
if orig_prefix != new_prefix:
if isinstance(orig_prefix, bytes):
orig_bytes = orig_prefix
else:
orig_bytes = orig_prefix.encode('utf-8')
if isinstance(new_prefix, bytes):
new_bytes = new_prefix
else:
new_bytes = new_prefix.encode('utf-8')
byte_prefixes[orig_bytes] = new_bytes
# Do relocations on text in binaries that refers to the install tree
# multiprocesing.ThreadPool.map requires single argument
args = []
for binary in binaries:
args.append((binary, byte_prefixes))
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
tp.map(llnl.util.lang.star(_replace_prefix_bin), args)
finally:
tp.terminate()
tp.join()
def is_relocatable(spec):
"""Returns True if an installed spec is relocatable.
@@ -1126,3 +1042,120 @@ def fixup_macos_rpaths(spec):
))
else:
tty.debug('No rpath fixup needed for ' + specname)
def compute_indices(filename, paths_to_relocate):
"""
Compute the indices in filename at which each of paths_to_relocate occurs.
Arguments:
filename (str): file to compute indices for
paths_to_relocate (List[str]): paths to find indices of
Returns:
Dict
"""
with open(filename, 'rb') as f:
contents = f.read()
substring_prefix = os.path.commonprefix(paths_to_relocate).encode('utf-8')
indices = {}
index = 0
max_length = max(len(path) for path in paths_to_relocate)
while True:
try:
# We search for the smallest substring of all paths we relocate
# In practice, this is the spack install root, and we relocate
# prefixes in the root and the root itself
index = contents.index(substring_prefix, index)
except ValueError:
# The string isn't found in the rest of the binary
break
else:
# only copy the smallest portion of the binary for comparisons
substring_to_check = contents[index:index + max_length]
for path in paths_to_relocate:
# We guarantee any substring in the list comes after any superstring
p = path.encode('utf-8')
if substring_to_check.startswith(p):
indices[index] = str(path)
index += len(path)
break
else:
index += 1
return indices
def _relocate_binary_text(filename, offsets, prefix_to_prefix):
"""
Relocate the text of a single binary file, given the offsets at which the
replacements need to be made
Arguments:
filename (str): file to modify
offsets (Dict[int, str]): locations of the strings to replace
prefix_to_prefix (Dict[str, str]): strings to replace and their replacements
"""
with open(filename, 'rb+') as f:
for index, prefix in offsets.items():
replacement = prefix_to_prefix[prefix].encode('utf-8')
if len(replacement) > len(prefix):
raise BinaryTextReplaceError(prefix, replacement)
# read forward until we find the end of the string including
# the prefix and compute the replacement as we go
f.seek(index + len(prefix))
c = f.read(1)
while c not in (None, b'\x00'):
replacement += c
c = f.read(1)
# seek back to the index position and write the replacement in
# and add null-terminator
f.seek(index)
f.write(replacement)
f.write(b'\x00')
def relocate_text_bin(
files_to_relocate, prefix_to_prefix, offsets=None,
relative_root=None, concurrency=32
):
"""
For each file given, replace all keys in the given translation dict with
the associated values. Optionally executes using precomputed memoized offsets
for the substitutions.
Arguments:
files_to_relocate (List[str]): The files to modify
prefix_to_prefix (Dict[str, str]): keys are strings to replace, values are
replacements
offsets (Dict[str, Dict[int, str]): (optional) Mapping from relative filenames to
a mapping from indices to strings to replace found at each index
relative_root (str): (optional) prefix for relative paths in offsets
"""
# defaults to the common prefix of all input files
rel_root = relative_root or os.path.commonprefix(files_to_relocate)
if offsets is None:
offsets = {}
for filename in files_to_relocate:
indices = compute_indices(
filename,
list(prefix_to_prefix.keys()),
)
relpath = os.path.relpath(filename, rel_root)
offsets[relpath] = indices
args = [
(filename, offsets[os.path.relpath(filename, rel_root)], prefix_to_prefix)
for filename in files_to_relocate
]
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
tp.map(llnl.util.lang.star(_relocate_binary_text), args)
finally:
tp.terminate()
tp.join()

View File

@@ -93,8 +93,10 @@ def rewire_node(spec, explicit):
False,
spec.build_spec.prefix,
spec.prefix)
relocate.relocate_text_bin(binaries=bins_to_relocate,
prefixes=prefix_to_prefix)
# Relocate text strings of prefixes embedded in binaries
relocate.relocate_text_bin(bins_to_relocate, prefix_to_prefix)
# Copy package into place, except for spec.json (because spec.json
# describes the old spec and not the new spliced spec).
shutil.copytree(os.path.join(tempdir, spec.dag_hash()), spec.prefix,

View File

@@ -294,14 +294,37 @@ def test_set_elf_rpaths_warning(mock_patchelf):
assert output is None
def test_relocate_binary_text(tmpdir):
filename = str(tmpdir.join('binary'))
with open(filename, 'wb') as f:
f.write(b'somebinarytext')
f.write(b'/usr/relpath')
f.write(b'\0')
f.write(b'morebinarytext')
spack.relocate._relocate_binary_text(filename, {14: '/usr'}, {'/usr': '/foo'})
with open(filename, 'rb') as f:
contents = f.read()
assert b'/foo/relpath\0' in contents
@pytest.mark.requires_executables('patchelf', 'strings', 'file', 'gcc')
@skip_unless_linux
def test_replace_prefix_bin(hello_world):
def test_relocate_binary(hello_world):
# Compile an "Hello world!" executable and set RPATHs
executable = hello_world(rpaths=['/usr/lib', '/usr/lib64'])
with open(str(executable), 'rb') as f:
contents = f.read()
index_0 = contents.index(b'/usr')
index_1 = contents.index(b'/usr', index_0 + 1)
offsets = {index_0: '/usr/lib', index_1: '/usr/lib64'}
# Relocate the RPATHs
spack.relocate._replace_prefix_bin(str(executable), {b'/usr': b'/foo'})
spack.relocate._relocate_binary_text(
str(executable), offsets,
{'/usr/lib': '/foo/lib', '/usr/lib64': '/foo/lib64'}
)
# Some compilers add rpaths so ensure changes included in final result
assert '/foo/lib:/foo/lib64' in rpaths_for(executable)
@@ -390,13 +413,11 @@ def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
assert not text_in_bin(str(new_binary.dirpath()), new_binary)
# Check this call succeed
orig_path_bytes = str(orig_binary.dirpath()).encode('utf-8')
new_path_bytes = str(new_binary.dirpath()).encode('utf-8')
orig_path_bytes = str(orig_binary.dirpath())
new_path_bytes = str(new_binary.dirpath())
spack.relocate.relocate_text_bin(
[str(new_binary)],
{orig_path_bytes: new_path_bytes}
)
[str(new_binary)], {orig_path_bytes: new_path_bytes})
# Check original directory is not there anymore and it was
# substituted with the new one
@@ -405,15 +426,14 @@ def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
def test_relocate_text_bin_raise_if_new_prefix_is_longer(tmpdir):
short_prefix = b'/short'
long_prefix = b'/much/longer'
short_prefix = '/short'
long_prefix = '/much/longer'
fpath = str(tmpdir.join('fakebin'))
with open(fpath, 'w') as f:
f.write('/short')
with pytest.raises(spack.relocate.BinaryTextReplaceError):
spack.relocate.relocate_text_bin(
[fpath], {short_prefix: long_prefix}
)
[fpath], {short_prefix: long_prefix})
@pytest.mark.requires_executables('install_name_tool', 'file', 'cc')