| #===- object.py - Python Object Bindings --------------------*- python -*--===# |
| # |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| # |
| #===------------------------------------------------------------------------===# |
| |
| r""" |
| Object File Interface |
| ===================== |
| |
| This module provides an interface for reading information from object files |
| (e.g. binary executables and libraries). |
| |
| Using this module, you can obtain information about an object file's sections, |
| symbols, and relocations. These are represented by the classes ObjectFile, |
| Section, Symbol, and Relocation, respectively. |
| |
| Usage |
| ----- |
| |
| The only way to use this module is to start by creating an ObjectFile. You can |
| create an ObjectFile by loading a file (specified by its path) or by creating a |
| llvm.core.MemoryBuffer and loading that. |
| |
| Once you have an object file, you can inspect its sections and symbols directly |
| by calling get_sections() and get_symbols() respectively. To inspect |
| relocations, call get_relocations() on a Section instance. |
| |
| Iterator Interface |
| ------------------ |
| |
| The LLVM bindings expose iteration over sections, symbols, and relocations in a |
| way that only allows one instance to be operated on at a single time. This is |
| slightly annoying from a Python perspective, as it isn't very Pythonic to have |
| objects that "expire" but are still active from a dynamic language. |
| |
| To aid working around this limitation, each Section, Symbol, and Relocation |
| instance caches its properties after first access. So, if the underlying |
| iterator is advanced, the properties can still be obtained provided they have |
| already been retrieved. |
| |
| In addition, we also provide a "cache" method on each class to cache all |
| available data. You can call this on each obtained instance. Or, you can pass |
| cache=True to the appropriate get_XXX() method to have this done for you. |
| |
| Here are some examples on how to perform iteration: |
| |
| obj = ObjectFile(filename='/bin/ls') |
| |
| # This is OK. Each Section is only accessed inside its own iteration slot. |
| section_names = [] |
| for section in obj.get_sections(): |
| section_names.append(section.name) |
| |
| # This is NOT OK. You perform a lookup after the object has expired. |
| symbols = list(obj.get_symbols()) |
| for symbol in symbols: |
| print symbol.name # This raises because the object has expired. |
| |
| # In this example, we mix a working and failing scenario. |
| symbols = [] |
| for symbol in obj.get_symbols(): |
| symbols.append(symbol) |
| print symbol.name |
| |
| for symbol in symbols: |
| print symbol.name # OK |
| print symbol.address # NOT OK. We didn't look up this property before. |
| |
| # Cache everything up front. |
| symbols = list(obj.get_symbols(cache=True)) |
| for symbol in symbols: |
| print symbol.name # OK |
| |
| """ |
| |
| from ctypes import c_char_p |
| from ctypes import c_char |
| from ctypes import POINTER |
| from ctypes import c_uint64 |
| from ctypes import string_at |
| |
| from .common import CachedProperty |
| from .common import LLVMObject |
| from .common import c_object_p |
| from .common import get_library |
| from .core import MemoryBuffer |
| |
| __all__ = [ |
| "lib", |
| "ObjectFile", |
| "Relocation", |
| "Section", |
| "Symbol", |
| ] |
| |
| class ObjectFile(LLVMObject): |
| """Represents an object/binary file.""" |
| |
| def __init__(self, filename=None, contents=None): |
| """Construct an instance from a filename or binary data. |
| |
| filename must be a path to a file that can be opened with open(). |
| contents can be either a native Python buffer type (like str) or a |
| llvm.core.MemoryBuffer instance. |
| """ |
| if contents: |
| assert isinstance(contents, MemoryBuffer) |
| |
| if filename is not None: |
| contents = MemoryBuffer(filename=filename) |
| |
| if contents is None: |
| raise Exception('No input found.') |
| |
| ptr = lib.LLVMCreateObjectFile(contents) |
| LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) |
| self.take_ownership(contents) |
| |
| def get_sections(self, cache=False): |
| """Obtain the sections in this object file. |
| |
| This is a generator for llvm.object.Section instances. |
| |
| Sections are exposed as limited-use objects. See the module's |
| documentation on iterators for more. |
| """ |
| sections = lib.LLVMGetSections(self) |
| last = None |
| while True: |
| if lib.LLVMIsSectionIteratorAtEnd(self, sections): |
| break |
| |
| last = Section(sections) |
| if cache: |
| last.cache() |
| |
| yield last |
| |
| lib.LLVMMoveToNextSection(sections) |
| last.expire() |
| |
| if last is not None: |
| last.expire() |
| |
| lib.LLVMDisposeSectionIterator(sections) |
| |
| def get_symbols(self, cache=False): |
| """Obtain the symbols in this object file. |
| |
| This is a generator for llvm.object.Symbol instances. |
| |
| Each Symbol instance is a limited-use object. See this module's |
| documentation on iterators for more. |
| """ |
| symbols = lib.LLVMGetSymbols(self) |
| last = None |
| while True: |
| if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): |
| break |
| |
| last = Symbol(symbols, self) |
| if cache: |
| last.cache() |
| |
| yield last |
| |
| lib.LLVMMoveToNextSymbol(symbols) |
| last.expire() |
| |
| if last is not None: |
| last.expire() |
| |
| lib.LLVMDisposeSymbolIterator(symbols) |
| |
| class Section(LLVMObject): |
| """Represents a section in an object file.""" |
| |
| def __init__(self, ptr): |
| """Construct a new section instance. |
| |
| Section instances can currently only be created from an ObjectFile |
| instance. Therefore, this constructor should not be used outside of |
| this module. |
| """ |
| LLVMObject.__init__(self, ptr) |
| |
| self.expired = False |
| |
| @CachedProperty |
| def name(self): |
| """Obtain the string name of the section. |
| |
| This is typically something like '.dynsym' or '.rodata'. |
| """ |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| return lib.LLVMGetSectionName(self) |
| |
| @CachedProperty |
| def size(self): |
| """The size of the section, in long bytes.""" |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| return lib.LLVMGetSectionSize(self) |
| |
| @CachedProperty |
| def contents(self): |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| siz = self.size |
| |
| r = lib.LLVMGetSectionContents(self) |
| if r: |
| return string_at(r, siz) |
| return None |
| |
| @CachedProperty |
| def address(self): |
| """The address of this section, in long bytes.""" |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| return lib.LLVMGetSectionAddress(self) |
| |
| def has_symbol(self, symbol): |
| """Returns whether a Symbol instance is present in this Section.""" |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| assert isinstance(symbol, Symbol) |
| return lib.LLVMGetSectionContainsSymbol(self, symbol) |
| |
| def get_relocations(self, cache=False): |
| """Obtain the relocations in this Section. |
| |
| This is a generator for llvm.object.Relocation instances. |
| |
| Each instance is a limited used object. See this module's documentation |
| on iterators for more. |
| """ |
| if self.expired: |
| raise Exception('Section instance has expired.') |
| |
| relocations = lib.LLVMGetRelocations(self) |
| last = None |
| while True: |
| if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): |
| break |
| |
| last = Relocation(relocations) |
| if cache: |
| last.cache() |
| |
| yield last |
| |
| lib.LLVMMoveToNextRelocation(relocations) |
| last.expire() |
| |
| if last is not None: |
| last.expire() |
| |
| lib.LLVMDisposeRelocationIterator(relocations) |
| |
| def cache(self): |
| """Cache properties of this Section. |
| |
| This can be called as a workaround to the single active Section |
| limitation. When called, the properties of the Section are fetched so |
| they are still available after the Section has been marked inactive. |
| """ |
| getattr(self, 'name') |
| getattr(self, 'size') |
| getattr(self, 'contents') |
| getattr(self, 'address') |
| |
| def expire(self): |
| """Expire the section. |
| |
| This is called internally by the section iterator. |
| """ |
| self.expired = True |
| |
| class Symbol(LLVMObject): |
| """Represents a symbol in an object file.""" |
| def __init__(self, ptr, object_file): |
| assert isinstance(ptr, c_object_p) |
| assert isinstance(object_file, ObjectFile) |
| |
| LLVMObject.__init__(self, ptr) |
| |
| self.expired = False |
| self._object_file = object_file |
| |
| @CachedProperty |
| def name(self): |
| """The str name of the symbol. |
| |
| This is often a function or variable name. Keep in mind that name |
| mangling could be in effect. |
| """ |
| if self.expired: |
| raise Exception('Symbol instance has expired.') |
| |
| return lib.LLVMGetSymbolName(self) |
| |
| @CachedProperty |
| def address(self): |
| """The address of this symbol, in long bytes.""" |
| if self.expired: |
| raise Exception('Symbol instance has expired.') |
| |
| return lib.LLVMGetSymbolAddress(self) |
| |
| @CachedProperty |
| def size(self): |
| """The size of the symbol, in long bytes.""" |
| if self.expired: |
| raise Exception('Symbol instance has expired.') |
| |
| return lib.LLVMGetSymbolSize(self) |
| |
| @CachedProperty |
| def section(self): |
| """The Section to which this Symbol belongs. |
| |
| The returned Section instance does not expire, unlike Sections that are |
| commonly obtained through iteration. |
| |
| Because this obtains a new section iterator each time it is accessed, |
| calling this on a number of Symbol instances could be expensive. |
| """ |
| sections = lib.LLVMGetSections(self._object_file) |
| lib.LLVMMoveToContainingSection(sections, self) |
| |
| return Section(sections) |
| |
| def cache(self): |
| """Cache all cacheable properties.""" |
| getattr(self, 'name') |
| getattr(self, 'address') |
| getattr(self, 'size') |
| |
| def expire(self): |
| """Mark the object as expired to prevent future API accesses. |
| |
| This is called internally by this module and it is unlikely that |
| external callers have a legitimate reason for using it. |
| """ |
| self.expired = True |
| |
| class Relocation(LLVMObject): |
| """Represents a relocation definition.""" |
| def __init__(self, ptr): |
| """Create a new relocation instance. |
| |
| Relocations are created from objects derived from Section instances. |
| Therefore, this constructor should not be called outside of this |
| module. See Section.get_relocations() for the proper method to obtain |
| a Relocation instance. |
| """ |
| assert isinstance(ptr, c_object_p) |
| |
| LLVMObject.__init__(self, ptr) |
| |
| self.expired = False |
| |
| @CachedProperty |
| def offset(self): |
| """The offset of this relocation, in long bytes.""" |
| if self.expired: |
| raise Exception('Relocation instance has expired.') |
| |
| return lib.LLVMGetRelocationOffset(self) |
| |
| @CachedProperty |
| def symbol(self): |
| """The Symbol corresponding to this Relocation.""" |
| if self.expired: |
| raise Exception('Relocation instance has expired.') |
| |
| ptr = lib.LLVMGetRelocationSymbol(self) |
| return Symbol(ptr) |
| |
| @CachedProperty |
| def type_number(self): |
| """The relocation type, as a long.""" |
| if self.expired: |
| raise Exception('Relocation instance has expired.') |
| |
| return lib.LLVMGetRelocationType(self) |
| |
| @CachedProperty |
| def type_name(self): |
| """The relocation type's name, as a str.""" |
| if self.expired: |
| raise Exception('Relocation instance has expired.') |
| |
| return lib.LLVMGetRelocationTypeName(self) |
| |
| @CachedProperty |
| def value_string(self): |
| if self.expired: |
| raise Exception('Relocation instance has expired.') |
| |
| return lib.LLVMGetRelocationValueString(self) |
| |
| def expire(self): |
| """Expire this instance, making future API accesses fail.""" |
| self.expired = True |
| |
| def cache(self): |
| """Cache all cacheable properties on this instance.""" |
| getattr(self, 'address') |
| getattr(self, 'offset') |
| getattr(self, 'symbol') |
| getattr(self, 'type') |
| getattr(self, 'type_name') |
| getattr(self, 'value_string') |
| |
| def register_library(library): |
| """Register function prototypes with LLVM library instance.""" |
| |
| # Object.h functions |
| library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] |
| library.LLVMCreateObjectFile.restype = c_object_p |
| |
| library.LLVMDisposeObjectFile.argtypes = [ObjectFile] |
| |
| library.LLVMGetSections.argtypes = [ObjectFile] |
| library.LLVMGetSections.restype = c_object_p |
| |
| library.LLVMDisposeSectionIterator.argtypes = [c_object_p] |
| |
| library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] |
| library.LLVMIsSectionIteratorAtEnd.restype = bool |
| |
| library.LLVMMoveToNextSection.argtypes = [c_object_p] |
| |
| library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] |
| |
| library.LLVMGetSymbols.argtypes = [ObjectFile] |
| library.LLVMGetSymbols.restype = c_object_p |
| |
| library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] |
| |
| library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] |
| library.LLVMIsSymbolIteratorAtEnd.restype = bool |
| |
| library.LLVMMoveToNextSymbol.argtypes = [c_object_p] |
| |
| library.LLVMGetSectionName.argtypes = [c_object_p] |
| library.LLVMGetSectionName.restype = c_char_p |
| |
| library.LLVMGetSectionSize.argtypes = [c_object_p] |
| library.LLVMGetSectionSize.restype = c_uint64 |
| |
| library.LLVMGetSectionContents.argtypes = [c_object_p] |
| # Can't use c_char_p here as it isn't a NUL-terminated string. |
| library.LLVMGetSectionContents.restype = POINTER(c_char) |
| |
| library.LLVMGetSectionAddress.argtypes = [c_object_p] |
| library.LLVMGetSectionAddress.restype = c_uint64 |
| |
| library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] |
| library.LLVMGetSectionContainsSymbol.restype = bool |
| |
| library.LLVMGetRelocations.argtypes = [c_object_p] |
| library.LLVMGetRelocations.restype = c_object_p |
| |
| library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] |
| |
| library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] |
| library.LLVMIsRelocationIteratorAtEnd.restype = bool |
| |
| library.LLVMMoveToNextRelocation.argtypes = [c_object_p] |
| |
| library.LLVMGetSymbolName.argtypes = [Symbol] |
| library.LLVMGetSymbolName.restype = c_char_p |
| |
| library.LLVMGetSymbolAddress.argtypes = [Symbol] |
| library.LLVMGetSymbolAddress.restype = c_uint64 |
| |
| library.LLVMGetSymbolSize.argtypes = [Symbol] |
| library.LLVMGetSymbolSize.restype = c_uint64 |
| |
| library.LLVMGetRelocationOffset.argtypes = [c_object_p] |
| library.LLVMGetRelocationOffset.restype = c_uint64 |
| |
| library.LLVMGetRelocationSymbol.argtypes = [c_object_p] |
| library.LLVMGetRelocationSymbol.restype = c_object_p |
| |
| library.LLVMGetRelocationType.argtypes = [c_object_p] |
| library.LLVMGetRelocationType.restype = c_uint64 |
| |
| library.LLVMGetRelocationTypeName.argtypes = [c_object_p] |
| library.LLVMGetRelocationTypeName.restype = c_char_p |
| |
| library.LLVMGetRelocationValueString.argtypes = [c_object_p] |
| library.LLVMGetRelocationValueString.restype = c_char_p |
| |
| lib = get_library() |
| register_library(lib) |