Add elf parsing utility function (#33628)
Introduces `spack.util.elf.parse_elf(file_handle)`
This commit is contained in:
parent
6b3ea94630
commit
230e96fbb8
@ -1789,3 +1789,39 @@ def _spider(*args, **kwargs):
|
||||
@pytest.fixture(scope="function")
|
||||
def mock_tty_stdout(monkeypatch):
|
||||
monkeypatch.setattr(sys.stdout, "isatty", lambda: True)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def binary_with_rpaths(tmpdir):
|
||||
"""Factory fixture that compiles an ELF binary setting its RPATH. Relative
|
||||
paths are encoded with `$ORIGIN` prepended.
|
||||
"""
|
||||
|
||||
def _factory(rpaths, message="Hello world!"):
|
||||
source = tmpdir.join("main.c")
|
||||
source.write(
|
||||
"""
|
||||
#include <stdio.h>
|
||||
int main(){{
|
||||
printf("{0}");
|
||||
}}
|
||||
""".format(
|
||||
message
|
||||
)
|
||||
)
|
||||
gcc = spack.util.executable.which("gcc")
|
||||
executable = source.dirpath("main.x")
|
||||
# Encode relative RPATHs using `$ORIGIN` as the root prefix
|
||||
rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
|
||||
rpath_str = ":".join(rpaths)
|
||||
opts = [
|
||||
"-Wl,--disable-new-dtags",
|
||||
"-Wl,-rpath={0}".format(rpath_str),
|
||||
str(source),
|
||||
"-o",
|
||||
str(executable),
|
||||
]
|
||||
gcc(*opts)
|
||||
return executable
|
||||
|
||||
return _factory
|
||||
|
@ -82,42 +82,6 @@ def _factory(output):
|
||||
return _factory
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def hello_world(tmpdir):
|
||||
"""Factory fixture that compiles an ELF binary setting its RPATH. Relative
|
||||
paths are encoded with `$ORIGIN` prepended.
|
||||
"""
|
||||
|
||||
def _factory(rpaths, message="Hello world!"):
|
||||
source = tmpdir.join("main.c")
|
||||
source.write(
|
||||
"""
|
||||
#include <stdio.h>
|
||||
int main(){{
|
||||
printf("{0}");
|
||||
}}
|
||||
""".format(
|
||||
message
|
||||
)
|
||||
)
|
||||
gcc = spack.util.executable.which("gcc")
|
||||
executable = source.dirpath("main.x")
|
||||
# Encode relative RPATHs using `$ORIGIN` as the root prefix
|
||||
rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
|
||||
rpath_str = ":".join(rpaths)
|
||||
opts = [
|
||||
"-Wl,--disable-new-dtags",
|
||||
"-Wl,-rpath={0}".format(rpath_str),
|
||||
str(source),
|
||||
"-o",
|
||||
str(executable),
|
||||
]
|
||||
gcc(*opts)
|
||||
return executable
|
||||
|
||||
return _factory
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def make_dylib(tmpdir_factory):
|
||||
"""Create a shared library with unfriendly qualities.
|
||||
@ -315,9 +279,9 @@ def test_set_elf_rpaths_warning(mock_patchelf):
|
||||
|
||||
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
|
||||
@skip_unless_linux
|
||||
def test_replace_prefix_bin(hello_world):
|
||||
def test_replace_prefix_bin(binary_with_rpaths):
|
||||
# Compile an "Hello world!" executable and set RPATHs
|
||||
executable = hello_world(rpaths=["/usr/lib", "/usr/lib64"])
|
||||
executable = binary_with_rpaths(rpaths=["/usr/lib", "/usr/lib64"])
|
||||
|
||||
# Relocate the RPATHs
|
||||
spack.relocate._replace_prefix_bin(str(executable), {b"/usr": b"/foo"})
|
||||
@ -328,9 +292,9 @@ def test_replace_prefix_bin(hello_world):
|
||||
|
||||
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
|
||||
@skip_unless_linux
|
||||
def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir):
|
||||
def test_relocate_elf_binaries_absolute_paths(binary_with_rpaths, copy_binary, tmpdir):
|
||||
# Create an executable, set some RPATHs, copy it to another location
|
||||
orig_binary = hello_world(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"])
|
||||
orig_binary = binary_with_rpaths(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"])
|
||||
new_binary = copy_binary(orig_binary)
|
||||
|
||||
spack.relocate.relocate_elf_binaries(
|
||||
@ -350,9 +314,9 @@ def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir):
|
||||
|
||||
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
|
||||
@skip_unless_linux
|
||||
def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary):
|
||||
def test_relocate_elf_binaries_relative_paths(binary_with_rpaths, copy_binary):
|
||||
# Create an executable, set some RPATHs, copy it to another location
|
||||
orig_binary = hello_world(rpaths=["lib", "lib64", "/opt/local/lib"])
|
||||
orig_binary = binary_with_rpaths(rpaths=["lib", "lib64", "/opt/local/lib"])
|
||||
new_binary = copy_binary(orig_binary)
|
||||
|
||||
spack.relocate.relocate_elf_binaries(
|
||||
@ -371,8 +335,8 @@ def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary):
|
||||
|
||||
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
|
||||
@skip_unless_linux
|
||||
def test_make_elf_binaries_relative(hello_world, copy_binary, tmpdir):
|
||||
orig_binary = hello_world(
|
||||
def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, tmpdir):
|
||||
orig_binary = binary_with_rpaths(
|
||||
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"]
|
||||
)
|
||||
new_binary = copy_binary(orig_binary)
|
||||
@ -393,8 +357,8 @@ def test_raise_if_not_relocatable(monkeypatch):
|
||||
|
||||
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
|
||||
@skip_unless_linux
|
||||
def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
|
||||
orig_binary = hello_world(
|
||||
def test_relocate_text_bin(binary_with_rpaths, copy_binary, tmpdir):
|
||||
orig_binary = binary_with_rpaths(
|
||||
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"],
|
||||
message=str(tmpdir),
|
||||
)
|
||||
|
130
lib/spack/spack/test/util/elf.py
Normal file
130
lib/spack/spack/test/util/elf.py
Normal file
@ -0,0 +1,130 @@
|
||||
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import llnl.util.filesystem as fs
|
||||
|
||||
import spack.platforms
|
||||
import spack.util.elf as elf
|
||||
import spack.util.executable
|
||||
|
||||
|
||||
# note that our elf parser is platform independent... but I guess creating an elf file
|
||||
# is slightly more difficult with system tools on non-linux.
|
||||
def skip_unless_linux(f):
|
||||
return pytest.mark.skipif(
|
||||
str(spack.platforms.real_host()) != "linux",
|
||||
reason="implementation currently requires linux",
|
||||
)(f)
|
||||
|
||||
|
||||
@pytest.mark.requires_executables("gcc")
|
||||
@skip_unless_linux
|
||||
def test_elf_get_rpaths(binary_with_rpaths):
|
||||
# Compile an "Hello world!" executable and set RPATHs
|
||||
long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"]
|
||||
executable = str(binary_with_rpaths(rpaths=long_rpaths))
|
||||
assert elf.get_rpaths(executable) == long_rpaths
|
||||
|
||||
|
||||
@pytest.mark.requires_executables("gcc")
|
||||
@skip_unless_linux
|
||||
@pytest.mark.parametrize(
|
||||
"linker_flag,is_runpath",
|
||||
[
|
||||
("-Wl,--disable-new-dtags", False),
|
||||
("-Wl,--enable-new-dtags", True),
|
||||
],
|
||||
)
|
||||
def test_elf_parsing_shared_linking(linker_flag, is_runpath, tmpdir):
|
||||
gcc = spack.util.executable.which("gcc")
|
||||
|
||||
with fs.working_dir(str(tmpdir)):
|
||||
# Create a library to link to so we can force a dynamic section in an ELF file
|
||||
with open("foo.c", "w") as f:
|
||||
f.write("int foo(){return 0;}")
|
||||
with open("bar.c", "w") as f:
|
||||
f.write("int foo(); int _start(){return foo();}")
|
||||
|
||||
# Create library and executable linking to it.
|
||||
gcc("-shared", "-o", "libfoo.so", "-Wl,-soname,libfoo.so.1", "-nostdlib", "foo.c")
|
||||
gcc(
|
||||
"-o",
|
||||
"bar",
|
||||
linker_flag,
|
||||
"-Wl,-rpath,/first",
|
||||
"-Wl,-rpath,/second",
|
||||
"-Wl,--no-as-needed",
|
||||
"-nostdlib",
|
||||
"libfoo.so",
|
||||
"bar.c",
|
||||
"-o",
|
||||
"bar",
|
||||
)
|
||||
|
||||
with open("libfoo.so", "rb") as f:
|
||||
foo_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True)
|
||||
|
||||
assert not foo_parsed.has_pt_interp
|
||||
assert foo_parsed.has_pt_dynamic
|
||||
assert not foo_parsed.has_rpath
|
||||
assert not foo_parsed.has_needed
|
||||
assert foo_parsed.has_soname
|
||||
assert foo_parsed.dt_soname_str == b"libfoo.so.1"
|
||||
|
||||
with open("bar", "rb") as f:
|
||||
bar_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True)
|
||||
|
||||
assert bar_parsed.has_pt_interp
|
||||
assert bar_parsed.has_pt_dynamic
|
||||
assert bar_parsed.has_rpath
|
||||
assert bar_parsed.has_needed
|
||||
assert not bar_parsed.has_soname
|
||||
assert bar_parsed.dt_rpath_str == b"/first:/second"
|
||||
assert bar_parsed.dt_needed_strs == [b"libfoo.so.1"]
|
||||
|
||||
|
||||
def test_broken_elf():
|
||||
# No elf magic
|
||||
with pytest.raises(elf.ElfParsingError, match="Not an ELF file"):
|
||||
elf.parse_elf(io.BytesIO(b"x"))
|
||||
|
||||
# Incomplete ELF header
|
||||
with pytest.raises(elf.ElfParsingError, match="Not an ELF file"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF"))
|
||||
|
||||
# Invalid class
|
||||
with pytest.raises(elf.ElfParsingError, match="Invalid class"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF\x09\x01" + b"\x00" * 10))
|
||||
|
||||
# Invalid data type
|
||||
with pytest.raises(elf.ElfParsingError, match="Invalid data type"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x09" + b"\x00" * 10))
|
||||
|
||||
# 64-bit needs at least 64 bytes of header; this is only 56 bytes
|
||||
with pytest.raises(elf.ElfParsingError, match="ELF header malformed"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF\x02\x01" + b"\x00" * 50))
|
||||
|
||||
# 32-bit needs at least 52 bytes of header; this is only 46 bytes
|
||||
with pytest.raises(elf.ElfParsingError, match="ELF header malformed"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + b"\x00" * 40))
|
||||
|
||||
# Not a ET_DYN/ET_EXEC on a 32-bit LE ELF
|
||||
with pytest.raises(elf.ElfParsingError, match="Not an ET_DYN or ET_EXEC"):
|
||||
elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + (b"\x00" * 10) + b"\x09" + (b"\x00" * 35)))
|
||||
|
||||
|
||||
def test_parser_doesnt_deal_with_nonzero_offset():
|
||||
# Currently we don't have logic to parse ELF files at nonzero offsets in a file
|
||||
# This could be useful when e.g. modifying an ELF file inside a tarball or so,
|
||||
# but currently we cannot.
|
||||
elf_at_offset_one = io.BytesIO(b"\x00\x7fELF\x01\x01" + b"\x00" * 10)
|
||||
elf_at_offset_one.read(1)
|
||||
with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"):
|
||||
elf.parse_elf(elf_at_offset_one)
|
459
lib/spack/spack/util/elf.py
Normal file
459
lib/spack/spack/util/elf.py
Normal file
@ -0,0 +1,459 @@
|
||||
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import bisect
|
||||
import struct
|
||||
import sys
|
||||
from collections import namedtuple
|
||||
from struct import calcsize, unpack, unpack_from
|
||||
|
||||
ElfHeader = namedtuple(
|
||||
"ElfHeader",
|
||||
[
|
||||
"e_type",
|
||||
"e_machine",
|
||||
"e_version",
|
||||
"e_entry",
|
||||
"e_phoff",
|
||||
"e_shoff",
|
||||
"e_flags",
|
||||
"e_ehsize",
|
||||
"e_phentsize",
|
||||
"e_phnum",
|
||||
"e_shentsize",
|
||||
"e_shnum",
|
||||
"e_shstrndx",
|
||||
],
|
||||
)
|
||||
|
||||
SectionHeader = namedtuple(
|
||||
"SectionHeader",
|
||||
[
|
||||
"sh_name",
|
||||
"sh_type",
|
||||
"sh_flags",
|
||||
"sh_addr",
|
||||
"sh_offset",
|
||||
"sh_size",
|
||||
"sh_link",
|
||||
"sh_info",
|
||||
"sh_addralign",
|
||||
"sh_entsize",
|
||||
],
|
||||
)
|
||||
|
||||
ProgramHeader32 = namedtuple(
|
||||
"ProgramHeader32",
|
||||
[
|
||||
"p_type",
|
||||
"p_offset",
|
||||
"p_vaddr",
|
||||
"p_paddr",
|
||||
"p_filesz",
|
||||
"p_memsz",
|
||||
"p_flags",
|
||||
"p_align",
|
||||
],
|
||||
)
|
||||
|
||||
ProgramHeader64 = namedtuple(
|
||||
"ProgramHeader64",
|
||||
[
|
||||
"p_type",
|
||||
"p_flags",
|
||||
"p_offset",
|
||||
"p_vaddr",
|
||||
"p_paddr",
|
||||
"p_filesz",
|
||||
"p_memsz",
|
||||
"p_align",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class ELF_CONSTANTS:
|
||||
MAGIC = b"\x7fELF"
|
||||
CLASS32 = 1
|
||||
CLASS64 = 2
|
||||
DATA2LSB = 1
|
||||
DATA2MSB = 2
|
||||
ET_EXEC = 2
|
||||
ET_DYN = 3
|
||||
PT_LOAD = 1
|
||||
PT_DYNAMIC = 2
|
||||
PT_INTERP = 3
|
||||
DT_NULL = 0
|
||||
DT_NEEDED = 1
|
||||
DT_STRTAB = 5
|
||||
DT_SONAME = 14
|
||||
DT_RPATH = 15
|
||||
DT_RUNPATH = 29
|
||||
SHT_STRTAB = 3
|
||||
|
||||
|
||||
def get_byte_at(byte_array, idx):
|
||||
if sys.version_info[0] < 3:
|
||||
return ord(byte_array[idx])
|
||||
return byte_array[idx]
|
||||
|
||||
|
||||
class ElfParsingError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ElfFile(object):
|
||||
"""Parsed ELF file."""
|
||||
|
||||
__slots__ = [
|
||||
"is_64_bit",
|
||||
"is_little_endian",
|
||||
"byte_order",
|
||||
"elf_hdr",
|
||||
"pt_load",
|
||||
# pt_interp
|
||||
"has_pt_interp",
|
||||
"pt_interp_p_offset",
|
||||
"pt_interp_p_filesz",
|
||||
"pt_interp_str",
|
||||
# pt_dynamic
|
||||
"has_pt_dynamic",
|
||||
"pt_dynamic_p_offset",
|
||||
"pt_dynamic_p_filesz",
|
||||
# rpath
|
||||
"has_rpath",
|
||||
"dt_rpath_offset",
|
||||
"dt_rpath_str",
|
||||
"rpath_strtab_offset",
|
||||
"is_runpath",
|
||||
# dt needed
|
||||
"has_needed",
|
||||
"dt_needed_strtab_offsets",
|
||||
"dt_needed_strs",
|
||||
# dt soname
|
||||
"has_soname",
|
||||
"dt_soname_strtab_offset",
|
||||
"dt_soname_str",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.dt_needed_strtab_offsets = []
|
||||
self.has_soname = False
|
||||
self.has_rpath = False
|
||||
self.has_needed = False
|
||||
self.pt_load = []
|
||||
self.has_pt_dynamic = False
|
||||
self.has_pt_interp = False
|
||||
|
||||
|
||||
def parse_c_string(byte_string, start=0):
|
||||
"""
|
||||
Retrieve a C-string at a given offset in a byte string
|
||||
|
||||
Arguments:
|
||||
byte_string (bytes): String
|
||||
start (int): Offset into the string
|
||||
|
||||
Returns:
|
||||
bytes: A copy of the C-string excluding the terminating null byte
|
||||
"""
|
||||
str_end = byte_string.find(b"\0", start)
|
||||
if str_end == -1:
|
||||
raise ElfParsingError("C-string is not null terminated")
|
||||
return byte_string[start:str_end]
|
||||
|
||||
|
||||
def read_exactly(f, num_bytes, msg):
|
||||
"""
|
||||
Read exactly num_bytes at the current offset, otherwise raise
|
||||
a parsing error with the given error message.
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
num_bytes (int): Number of bytes to read
|
||||
msg (str): Error to show when bytes cannot be read
|
||||
|
||||
Returns:
|
||||
bytes: the ``num_bytes`` bytes that were read.
|
||||
"""
|
||||
data = f.read(num_bytes)
|
||||
if len(data) != num_bytes:
|
||||
raise ElfParsingError(msg)
|
||||
return data
|
||||
|
||||
|
||||
def parse_program_headers(f, elf):
|
||||
"""
|
||||
Parse program headers
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
"""
|
||||
# Forward to the program header
|
||||
f.seek(elf.elf_hdr.e_phoff)
|
||||
|
||||
# Here we have to make a mapping from virtual address to offset in the file.
|
||||
ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
|
||||
ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL")
|
||||
ph_size = calcsize(ph_fmt)
|
||||
ph_num = elf.elf_hdr.e_phnum
|
||||
|
||||
# Read all program headers in one go
|
||||
data = read_exactly(f, ph_num * ph_size, "Malformed program header")
|
||||
|
||||
for i in range(ph_num):
|
||||
ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size))
|
||||
|
||||
# Skip segments of size 0; we don't distinguish between missing segment and
|
||||
# empty segments. I've see an empty PT_DYNAMIC section for an ELF file that
|
||||
# contained debug data.
|
||||
if ph.p_filesz == 0:
|
||||
continue
|
||||
|
||||
# For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments
|
||||
# This way we can map offsets by virtual address to offsets in the file.
|
||||
if ph.p_type == ELF_CONSTANTS.PT_LOAD:
|
||||
elf.pt_load.append((ph.p_offset, ph.p_vaddr))
|
||||
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_INTERP:
|
||||
elf.pt_interp_p_offset = ph.p_offset
|
||||
elf.pt_interp_p_filesz = ph.p_filesz
|
||||
elf.has_pt_interp = True
|
||||
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC:
|
||||
elf.pt_dynamic_p_offset = ph.p_offset
|
||||
elf.pt_dynamic_p_filesz = ph.p_filesz
|
||||
elf.has_pt_dynamic = True
|
||||
|
||||
# The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since
|
||||
# patchelf for example has a flag to leave them in an arbitrary order.
|
||||
elf.pt_load.sort(key=lambda x: x[1])
|
||||
|
||||
|
||||
def parse_pt_interp(f, elf):
|
||||
"""
|
||||
Parse the interpreter (i.e. absolute path to the dynamic linker)
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
"""
|
||||
f.seek(elf.pt_interp_p_offset)
|
||||
data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry")
|
||||
elf.pt_interp_str = parse_c_string(data)
|
||||
|
||||
|
||||
def find_strtab_size_at_offset(f, elf, offset):
|
||||
"""
|
||||
Retrieve the size of a string table section at a particular known offset
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
offset (int): offset of the section in the file (i.e. ``sh_offset``)
|
||||
|
||||
Returns:
|
||||
int: the size of the string table in bytes
|
||||
"""
|
||||
section_hdr_fmt = elf.byte_order + ("LLQQQQLLQQ" if elf.is_64_bit else "LLLLLLLLLL")
|
||||
section_hdr_size = calcsize(section_hdr_fmt)
|
||||
f.seek(elf.elf_hdr.e_shoff)
|
||||
for _ in range(elf.elf_hdr.e_shnum):
|
||||
data = read_exactly(f, section_hdr_size, "Malformed section header")
|
||||
sh = SectionHeader._make(unpack(section_hdr_fmt, data))
|
||||
if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset:
|
||||
return sh.sh_size
|
||||
|
||||
raise ElfParsingError("Could not determine strtab size")
|
||||
|
||||
|
||||
def retrieve_strtab(f, elf, offset):
|
||||
"""
|
||||
Read a full string table at the given offset, which
|
||||
requires looking it up in the section headers.
|
||||
|
||||
Arguments:
|
||||
elf (ElfFile): ELF file parser data
|
||||
vaddr (int): virtual address
|
||||
|
||||
Returns:
|
||||
bytes: file offset
|
||||
"""
|
||||
size = find_strtab_size_at_offset(f, elf, offset)
|
||||
f.seek(offset)
|
||||
return read_exactly(f, size, "Could not read string table")
|
||||
|
||||
|
||||
def vaddr_to_offset(elf, vaddr):
|
||||
"""
|
||||
Given a virtual address, find the corresponding offset in the ELF file itself.
|
||||
|
||||
Arguments:
|
||||
elf (ElfFile): ELF file parser data
|
||||
vaddr (int): virtual address
|
||||
"""
|
||||
idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1
|
||||
p_offset, p_vaddr = elf.pt_load[idx]
|
||||
return p_offset - p_vaddr + vaddr
|
||||
|
||||
|
||||
def parse_pt_dynamic(f, elf):
|
||||
"""
|
||||
Parse the dynamic section of an ELF file
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parse data
|
||||
"""
|
||||
dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
|
||||
dynamic_array_size = calcsize(dynamic_array_fmt)
|
||||
|
||||
current_offset = elf.pt_dynamic_p_offset
|
||||
count_rpath = 0
|
||||
count_runpath = 0
|
||||
count_strtab = 0
|
||||
|
||||
f.seek(elf.pt_dynamic_p_offset)
|
||||
|
||||
# In case of broken ELF files, don't read beyond the advertized size.
|
||||
for _ in range(elf.pt_dynamic_p_filesz // dynamic_array_size):
|
||||
data = read_exactly(f, dynamic_array_size, "Malformed dynamic array entry")
|
||||
tag, val = unpack(dynamic_array_fmt, data)
|
||||
if tag == ELF_CONSTANTS.DT_NULL:
|
||||
break
|
||||
elif tag == ELF_CONSTANTS.DT_RPATH:
|
||||
count_rpath += 1
|
||||
elf.rpath_strtab_offset = val
|
||||
elf.dt_rpath_offset = current_offset
|
||||
elf.is_runpath = False
|
||||
elf.has_rpath = True
|
||||
elif tag == ELF_CONSTANTS.DT_RUNPATH:
|
||||
count_runpath += 1
|
||||
elf.rpath_strtab_offset = val
|
||||
elf.dt_rpath_offset = current_offset
|
||||
elf.is_runpath = True
|
||||
elf.has_rpath = True
|
||||
elif tag == ELF_CONSTANTS.DT_STRTAB:
|
||||
count_strtab += 1
|
||||
strtab_vaddr = val
|
||||
elif tag == ELF_CONSTANTS.DT_NEEDED:
|
||||
elf.has_needed = True
|
||||
elf.dt_needed_strtab_offsets.append(val)
|
||||
elif tag == ELF_CONSTANTS.DT_SONAME:
|
||||
elf.has_soname = True
|
||||
elf.dt_soname_strtab_offset = val
|
||||
current_offset += dynamic_array_size
|
||||
|
||||
# No rpath/runpath, that happens.
|
||||
if count_rpath == count_runpath == 0:
|
||||
elf.has_rpath = False
|
||||
elif count_rpath + count_runpath != 1:
|
||||
raise ElfParsingError("Could not find a unique rpath/runpath.")
|
||||
|
||||
if count_strtab != 1:
|
||||
raise ElfParsingError("Could not find a unique strtab of for the dynamic section strings")
|
||||
|
||||
# Nothing to retrieve, so don't bother getting the string table.
|
||||
if not (elf.has_rpath or elf.has_soname or elf.has_needed):
|
||||
return
|
||||
|
||||
string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr))
|
||||
|
||||
if elf.has_needed:
|
||||
elf.dt_needed_strs = list(
|
||||
parse_c_string(string_table, offset) for offset in elf.dt_needed_strtab_offsets
|
||||
)
|
||||
|
||||
if elf.has_soname:
|
||||
elf.dt_soname_str = parse_c_string(string_table, elf.dt_soname_strtab_offset)
|
||||
|
||||
if elf.has_rpath:
|
||||
elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset)
|
||||
|
||||
|
||||
def parse_header(f, elf):
|
||||
# Read the 32/64 bit class independent part of the header and validate
|
||||
e_ident = f.read(16)
|
||||
|
||||
# Require ELF magic bytes.
|
||||
if len(e_ident) != 16 or e_ident[:4] != ELF_CONSTANTS.MAGIC:
|
||||
raise ElfParsingError("Not an ELF file")
|
||||
|
||||
# Defensively require a valid class and data.
|
||||
e_ident_class, e_ident_data = get_byte_at(e_ident, 4), get_byte_at(e_ident, 5)
|
||||
|
||||
if e_ident_class not in (ELF_CONSTANTS.CLASS32, ELF_CONSTANTS.CLASS64):
|
||||
raise ElfParsingError("Invalid class found")
|
||||
|
||||
if e_ident_data not in (ELF_CONSTANTS.DATA2LSB, ELF_CONSTANTS.DATA2MSB):
|
||||
raise ElfParsingError("Invalid data type")
|
||||
|
||||
elf.is_64_bit = e_ident_class == ELF_CONSTANTS.CLASS64
|
||||
elf.is_little_endian = e_ident_data == ELF_CONSTANTS.DATA2LSB
|
||||
|
||||
# Set up byte order and types for unpacking
|
||||
elf.byte_order = "<" if elf.is_little_endian else ">"
|
||||
|
||||
# Parse the rest of the header
|
||||
elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH")
|
||||
hdr_size = calcsize(elf_header_fmt)
|
||||
data = read_exactly(f, hdr_size, "ELF header malformed")
|
||||
elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data))
|
||||
|
||||
|
||||
def _do_parse_elf(f, interpreter=True, dynamic_section=True):
|
||||
# We don't (yet?) allow parsing ELF files at a nonzero offset, we just
|
||||
# jump to absolute offsets as they are specified in the ELF file.
|
||||
if f.tell() != 0:
|
||||
raise ElfParsingError("Cannot parse at a nonzero offset")
|
||||
|
||||
elf = ElfFile()
|
||||
parse_header(f, elf)
|
||||
|
||||
# We don't handle anything but executables and shared libraries now.
|
||||
if elf.elf_hdr.e_type not in (ELF_CONSTANTS.ET_EXEC, ELF_CONSTANTS.ET_DYN):
|
||||
raise ElfParsingError("Not an ET_DYN or ET_EXEC type")
|
||||
|
||||
parse_program_headers(f, elf)
|
||||
|
||||
# Parse PT_INTERP section
|
||||
if interpreter and elf.has_pt_interp:
|
||||
parse_pt_interp(f, elf)
|
||||
|
||||
# Parse PT_DYNAMIC section.
|
||||
if dynamic_section and elf.has_pt_dynamic and len(elf.pt_load) > 0:
|
||||
parse_pt_dynamic(f, elf)
|
||||
|
||||
return elf
|
||||
|
||||
|
||||
def parse_elf(f, interpreter=False, dynamic_section=False):
|
||||
"""Given a file handle f for an ELF file opened in binary mode, return an ElfFile
|
||||
object that is stores data about rpaths"""
|
||||
try:
|
||||
return _do_parse_elf(f, interpreter, dynamic_section)
|
||||
except (DeprecationWarning, struct.error):
|
||||
# According to the docs old versions of Python can throw DeprecationWarning
|
||||
# instead of struct.error.
|
||||
raise ElfParsingError("Malformed ELF file")
|
||||
|
||||
|
||||
def get_rpaths(path):
|
||||
"""Returns list of rpaths of the given file as UTF-8 strings, or None if the file
|
||||
does not have any rpaths."""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
elf = parse_elf(f, interpreter=False, dynamic_section=True)
|
||||
except ElfParsingError:
|
||||
return None
|
||||
|
||||
if not elf.has_rpath:
|
||||
return None
|
||||
|
||||
# If it does, split the string in components
|
||||
rpath = elf.dt_rpath_str
|
||||
if sys.version_info[0] >= 3:
|
||||
rpath = rpath.decode("utf-8")
|
||||
return rpath.split(":")
|
Loading…
Reference in New Issue
Block a user