436 lines
15 KiB
Python
436 lines
15 KiB
Python
"""
|
|
Utilities for reading and writing Mach-O headers
|
|
"""
|
|
from __future__ import print_function
|
|
|
|
import sys
|
|
import struct
|
|
import os
|
|
|
|
from .mach_o import MH_FILETYPE_SHORTNAMES, LC_DYSYMTAB, LC_SYMTAB
|
|
from .mach_o import load_command, S_ZEROFILL, section_64, section
|
|
from .mach_o import LC_REGISTRY, LC_ID_DYLIB, LC_SEGMENT, fat_header
|
|
from .mach_o import LC_SEGMENT_64, MH_CIGAM_64, MH_MAGIC_64, FAT_MAGIC
|
|
from .mach_o import mach_header, fat_arch64, FAT_MAGIC_64, fat_arch
|
|
from .mach_o import LC_REEXPORT_DYLIB, LC_PREBOUND_DYLIB, LC_LOAD_WEAK_DYLIB
|
|
from .mach_o import LC_LOAD_UPWARD_DYLIB, LC_LOAD_DYLIB, mach_header_64
|
|
from .mach_o import MH_CIGAM, MH_MAGIC
|
|
from .ptypes import sizeof
|
|
|
|
from macholib.util import fileview
|
|
try:
|
|
from macholib.compat import bytes
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
unicode
|
|
except NameError:
|
|
unicode = str
|
|
|
|
if sys.version_info[0] == 2:
|
|
range = xrange # noqa: F821
|
|
|
|
__all__ = ['MachO']
|
|
|
|
_RELOCATABLE = set((
|
|
# relocatable commands that should be used for dependency walking
|
|
LC_LOAD_DYLIB,
|
|
LC_LOAD_UPWARD_DYLIB,
|
|
LC_LOAD_WEAK_DYLIB,
|
|
LC_PREBOUND_DYLIB,
|
|
LC_REEXPORT_DYLIB,
|
|
))
|
|
|
|
_RELOCATABLE_NAMES = {
|
|
LC_LOAD_DYLIB: 'load_dylib',
|
|
LC_LOAD_UPWARD_DYLIB: 'load_upward_dylib',
|
|
LC_LOAD_WEAK_DYLIB: 'load_weak_dylib',
|
|
LC_PREBOUND_DYLIB: 'prebound_dylib',
|
|
LC_REEXPORT_DYLIB: 'reexport_dylib',
|
|
}
|
|
|
|
|
|
def _shouldRelocateCommand(cmd):
|
|
"""
|
|
Should this command id be investigated for relocation?
|
|
"""
|
|
return cmd in _RELOCATABLE
|
|
|
|
|
|
def lc_str_value(offset, cmd_info):
|
|
"""
|
|
Fetch the actual value of a field of type "lc_str"
|
|
"""
|
|
cmd_load, cmd_cmd, cmd_data = cmd_info
|
|
|
|
offset -= sizeof(cmd_load) + sizeof(cmd_cmd)
|
|
return cmd_data[offset:].strip(b'\x00')
|
|
|
|
|
|
class MachO(object):
|
|
"""
|
|
Provides reading/writing the Mach-O header of a specific existing file
|
|
"""
|
|
# filename - the original filename of this mach-o
|
|
# sizediff - the current deviation from the initial mach-o size
|
|
# header - the mach-o header
|
|
# commands - a list of (load_command, somecommand, data)
|
|
# data is either a str, or a list of segment structures
|
|
# total_size - the current mach-o header size (including header)
|
|
# low_offset - essentially, the maximum mach-o header size
|
|
# id_cmd - the index of my id command, or None
|
|
|
|
def __init__(self, filename):
|
|
|
|
# supports the ObjectGraph protocol
|
|
self.graphident = filename
|
|
self.filename = filename
|
|
self.loader_path = os.path.dirname(filename)
|
|
|
|
# initialized by load
|
|
self.fat = None
|
|
self.headers = []
|
|
with open(filename, 'rb') as fp:
|
|
self.load(fp)
|
|
|
|
def __repr__(self):
|
|
return "<MachO filename=%r>" % (self.filename,)
|
|
|
|
def load(self, fh):
|
|
assert fh.tell() == 0
|
|
header = struct.unpack('>I', fh.read(4))[0]
|
|
fh.seek(0)
|
|
if header in (FAT_MAGIC, FAT_MAGIC_64):
|
|
self.load_fat(fh)
|
|
else:
|
|
fh.seek(0, 2)
|
|
size = fh.tell()
|
|
fh.seek(0)
|
|
self.load_header(fh, 0, size)
|
|
|
|
def load_fat(self, fh):
|
|
self.fat = fat_header.from_fileobj(fh)
|
|
if self.fat.magic == FAT_MAGIC:
|
|
archs = [fat_arch.from_fileobj(fh)
|
|
for i in range(self.fat.nfat_arch)]
|
|
elif self.fat.magic == FAT_MAGIC_64:
|
|
archs = [fat_arch64.from_fileobj(fh)
|
|
for i in range(self.fat.nfat_arch)]
|
|
else:
|
|
raise ValueError("Unknown fat header magic: %r" % (self.fat.magic))
|
|
|
|
for arch in archs:
|
|
self.load_header(fh, arch.offset, arch.size)
|
|
|
|
def rewriteLoadCommands(self, *args, **kw):
|
|
changed = False
|
|
for header in self.headers:
|
|
if header.rewriteLoadCommands(*args, **kw):
|
|
changed = True
|
|
return changed
|
|
|
|
def load_header(self, fh, offset, size):
|
|
fh.seek(offset)
|
|
header = struct.unpack('>I', fh.read(4))[0]
|
|
fh.seek(offset)
|
|
if header == MH_MAGIC:
|
|
magic, hdr, endian = MH_MAGIC, mach_header, '>'
|
|
elif header == MH_CIGAM:
|
|
magic, hdr, endian = MH_CIGAM, mach_header, '<'
|
|
elif header == MH_MAGIC_64:
|
|
magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>'
|
|
elif header == MH_CIGAM_64:
|
|
magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<'
|
|
else:
|
|
raise ValueError("Unknown Mach-O header: 0x%08x in %r" % (
|
|
header, fh))
|
|
hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian)
|
|
self.headers.append(hdr)
|
|
|
|
def write(self, f):
|
|
for header in self.headers:
|
|
header.write(f)
|
|
|
|
|
|
class MachOHeader(object):
|
|
"""
|
|
Provides reading/writing the Mach-O header of a specific existing file
|
|
"""
|
|
# filename - the original filename of this mach-o
|
|
# sizediff - the current deviation from the initial mach-o size
|
|
# header - the mach-o header
|
|
# commands - a list of (load_command, somecommand, data)
|
|
# data is either a str, or a list of segment structures
|
|
# total_size - the current mach-o header size (including header)
|
|
# low_offset - essentially, the maximum mach-o header size
|
|
# id_cmd - the index of my id command, or None
|
|
|
|
def __init__(self, parent, fh, offset, size, magic, hdr, endian):
|
|
self.MH_MAGIC = magic
|
|
self.mach_header = hdr
|
|
|
|
# These are all initialized by self.load()
|
|
self.parent = parent
|
|
self.offset = offset
|
|
self.size = size
|
|
|
|
self.endian = endian
|
|
self.header = None
|
|
self.commands = None
|
|
self.id_cmd = None
|
|
self.sizediff = None
|
|
self.total_size = None
|
|
self.low_offset = None
|
|
self.filetype = None
|
|
self.headers = []
|
|
|
|
self.load(fh)
|
|
|
|
def __repr__(self):
|
|
return "<%s filename=%r offset=%d size=%d endian=%r>" % (
|
|
type(self).__name__, self.parent.filename, self.offset, self.size,
|
|
self.endian)
|
|
|
|
def load(self, fh):
|
|
fh = fileview(fh, self.offset, self.size)
|
|
fh.seek(0)
|
|
|
|
self.sizediff = 0
|
|
kw = {'_endian_': self.endian}
|
|
header = self.mach_header.from_fileobj(fh, **kw)
|
|
self.header = header
|
|
# if header.magic != self.MH_MAGIC:
|
|
# raise ValueError("header has magic %08x, expecting %08x" % (
|
|
# header.magic, self.MH_MAGIC))
|
|
|
|
cmd = self.commands = []
|
|
|
|
self.filetype = self.get_filetype_shortname(header.filetype)
|
|
|
|
read_bytes = 0
|
|
low_offset = sys.maxsize
|
|
for i in range(header.ncmds):
|
|
# read the load command
|
|
cmd_load = load_command.from_fileobj(fh, **kw)
|
|
|
|
# read the specific command
|
|
klass = LC_REGISTRY.get(cmd_load.cmd, None)
|
|
if klass is None:
|
|
raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
|
|
cmd_cmd = klass.from_fileobj(fh, **kw)
|
|
|
|
if cmd_load.cmd == LC_ID_DYLIB:
|
|
# remember where this command was
|
|
if self.id_cmd is not None:
|
|
raise ValueError("This dylib already has an id")
|
|
self.id_cmd = i
|
|
|
|
if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
|
|
# for segment commands, read the list of segments
|
|
segs = []
|
|
# assert that the size makes sense
|
|
if cmd_load.cmd == LC_SEGMENT:
|
|
section_cls = section
|
|
else: # LC_SEGMENT_64
|
|
section_cls = section_64
|
|
|
|
expected_size = (
|
|
sizeof(klass) + sizeof(load_command) +
|
|
(sizeof(section_cls) * cmd_cmd.nsects)
|
|
)
|
|
if cmd_load.cmdsize != expected_size:
|
|
raise ValueError("Segment size mismatch")
|
|
# this is a zero block or something
|
|
# so the beginning is wherever the fileoff of this command is
|
|
if cmd_cmd.nsects == 0:
|
|
if cmd_cmd.filesize != 0:
|
|
low_offset = min(low_offset, cmd_cmd.fileoff)
|
|
else:
|
|
# this one has multiple segments
|
|
for j in range(cmd_cmd.nsects):
|
|
# read the segment
|
|
seg = section_cls.from_fileobj(fh, **kw)
|
|
# if the segment has a size and is not zero filled
|
|
# then its beginning is the offset of this segment
|
|
not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL)
|
|
if seg.offset > 0 and seg.size > 0 and not_zerofill:
|
|
low_offset = min(low_offset, seg.offset)
|
|
if not_zerofill:
|
|
c = fh.tell()
|
|
fh.seek(seg.offset)
|
|
sd = fh.read(seg.size)
|
|
seg.add_section_data(sd)
|
|
fh.seek(c)
|
|
segs.append(seg)
|
|
# data is a list of segments
|
|
cmd_data = segs
|
|
|
|
# XXX: Disabled for now because writing back doesn't work
|
|
# elif cmd_load.cmd == LC_CODE_SIGNATURE:
|
|
# c = fh.tell()
|
|
# fh.seek(cmd_cmd.dataoff)
|
|
# cmd_data = fh.read(cmd_cmd.datasize)
|
|
# fh.seek(c)
|
|
# elif cmd_load.cmd == LC_SYMTAB:
|
|
# c = fh.tell()
|
|
# fh.seek(cmd_cmd.stroff)
|
|
# cmd_data = fh.read(cmd_cmd.strsize)
|
|
# fh.seek(c)
|
|
|
|
else:
|
|
# data is a raw str
|
|
data_size = (
|
|
cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
|
|
)
|
|
cmd_data = fh.read(data_size)
|
|
cmd.append((cmd_load, cmd_cmd, cmd_data))
|
|
read_bytes += cmd_load.cmdsize
|
|
|
|
# make sure the header made sense
|
|
if read_bytes != header.sizeofcmds:
|
|
raise ValueError("Read %d bytes, header reports %d bytes" % (
|
|
read_bytes, header.sizeofcmds))
|
|
self.total_size = sizeof(self.mach_header) + read_bytes
|
|
self.low_offset = low_offset
|
|
|
|
def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand):
|
|
"""
|
|
for all relocatable commands
|
|
yield (command_index, command_name, filename)
|
|
"""
|
|
for (idx, (lc, cmd, data)) in enumerate(self.commands):
|
|
if shouldRelocateCommand(lc.cmd):
|
|
name = _RELOCATABLE_NAMES[lc.cmd]
|
|
ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__)
|
|
yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode(
|
|
sys.getfilesystemencoding())
|
|
|
|
def rewriteInstallNameCommand(self, loadcmd):
|
|
"""Rewrite the load command of this dylib"""
|
|
if self.id_cmd is not None:
|
|
self.rewriteDataForCommand(self.id_cmd, loadcmd)
|
|
return True
|
|
return False
|
|
|
|
def changedHeaderSizeBy(self, bytes):
|
|
self.sizediff += bytes
|
|
if (self.total_size + self.sizediff) > self.low_offset:
|
|
print(
|
|
"WARNING: Mach-O header in %r may be too large to relocate" % (
|
|
self.parent.filename,))
|
|
|
|
def rewriteLoadCommands(self, changefunc):
|
|
"""
|
|
Rewrite the load commands based upon a change dictionary
|
|
"""
|
|
data = changefunc(self.parent.filename)
|
|
changed = False
|
|
if data is not None:
|
|
if self.rewriteInstallNameCommand(
|
|
data.encode(sys.getfilesystemencoding())):
|
|
changed = True
|
|
for idx, name, filename in self.walkRelocatables():
|
|
data = changefunc(filename)
|
|
if data is not None:
|
|
if self.rewriteDataForCommand(idx, data.encode(
|
|
sys.getfilesystemencoding())):
|
|
changed = True
|
|
return changed
|
|
|
|
def rewriteDataForCommand(self, idx, data):
|
|
lc, cmd, old_data = self.commands[idx]
|
|
hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__)
|
|
align = struct.calcsize('Q')
|
|
data = data + (b'\x00' * (align - (len(data) % align)))
|
|
newsize = hdrsize + len(data)
|
|
self.commands[idx] = (lc, cmd, data)
|
|
self.changedHeaderSizeBy(newsize - lc.cmdsize)
|
|
lc.cmdsize, cmd.name = newsize, hdrsize
|
|
return True
|
|
|
|
def synchronize_size(self):
|
|
if (self.total_size + self.sizediff) > self.low_offset:
|
|
raise ValueError(
|
|
("New Mach-O header is too large to relocate in %r "
|
|
"(new size=%r, max size=%r, delta=%r)") % (
|
|
self.parent.filename, self.total_size + self.sizediff,
|
|
self.low_offset, self.sizediff))
|
|
self.header.sizeofcmds += self.sizediff
|
|
self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds
|
|
self.sizediff = 0
|
|
|
|
def write(self, fileobj):
|
|
fileobj = fileview(fileobj, self.offset, self.size)
|
|
fileobj.seek(0)
|
|
|
|
# serialize all the mach-o commands
|
|
self.synchronize_size()
|
|
|
|
self.header.to_fileobj(fileobj)
|
|
for lc, cmd, data in self.commands:
|
|
lc.to_fileobj(fileobj)
|
|
cmd.to_fileobj(fileobj)
|
|
|
|
if sys.version_info[0] == 2:
|
|
if isinstance(data, unicode):
|
|
fileobj.write(data.encode(sys.getfilesystemencoding()))
|
|
|
|
elif isinstance(data, (bytes, str)):
|
|
fileobj.write(data)
|
|
else:
|
|
# segments..
|
|
for obj in data:
|
|
obj.to_fileobj(fileobj)
|
|
else:
|
|
if isinstance(data, str):
|
|
fileobj.write(data.encode(sys.getfilesystemencoding()))
|
|
|
|
elif isinstance(data, bytes):
|
|
fileobj.write(data)
|
|
|
|
else:
|
|
# segments..
|
|
for obj in data:
|
|
obj.to_fileobj(fileobj)
|
|
|
|
# zero out the unused space, doubt this is strictly necessary
|
|
# and is generally probably already the case
|
|
fileobj.write(b'\x00' * (self.low_offset - fileobj.tell()))
|
|
|
|
def getSymbolTableCommand(self):
|
|
for lc, cmd, data in self.commands:
|
|
if lc.cmd == LC_SYMTAB:
|
|
return cmd
|
|
return None
|
|
|
|
def getDynamicSymbolTableCommand(self):
|
|
for lc, cmd, data in self.commands:
|
|
if lc.cmd == LC_DYSYMTAB:
|
|
return cmd
|
|
return None
|
|
|
|
def get_filetype_shortname(self, filetype):
|
|
if filetype in MH_FILETYPE_SHORTNAMES:
|
|
return MH_FILETYPE_SHORTNAMES[filetype]
|
|
else:
|
|
return 'unknown'
|
|
|
|
|
|
def main(fn):
|
|
m = MachO(fn)
|
|
seen = set()
|
|
for header in m.headers:
|
|
for idx, name, other in header.walkRelocatables():
|
|
if other not in seen:
|
|
seen.add(other)
|
|
print('\t' + name + ": " + other)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
files = sys.argv[1:] or ['/bin/ls']
|
|
for fn in files:
|
|
print(fn)
|
|
main(fn)
|