| #!/usr/bin/env python |
| from __future__ import print_function |
| |
| """ |
| Helper script to print out the raw content of an ELF section. |
| Example usages: |
| ``` |
| # print out as bits by default |
| extract-section.py .text --input-file=foo.o |
| ``` |
| ``` |
| # read from stdin and print out in hex |
| cat foo.o | extract-section.py -h .text |
| ``` |
| This is merely a wrapper around `llvm-readobj` that focuses on the binary |
| content as well as providing more formatting options. |
| """ |
| |
| # Unfortunately reading binary from stdin is not so trivial in Python... |
| def read_raw_stdin(): |
| import sys |
| |
| if sys.version_info >= (3, 0): |
| reading_source = sys.stdin.buffer |
| else: |
| # Windows will always read as string so we need some |
| # special handling |
| if sys.platform == "win32": |
| import os, msvcrt |
| |
| msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY) |
| reading_source = sys.stdin |
| return reading_source.read() |
| |
| |
| def get_raw_section_dump(readobj_path, section_name, input_file): |
| import subprocess |
| |
| cmd = [ |
| readobj_path, |
| "--elf-output-style=GNU", |
| "--hex-dump={}".format(section_name), |
| input_file, |
| ] |
| proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| |
| if input_file == "-": |
| # From stdin |
| out, _ = proc.communicate(input=read_raw_stdin()) |
| else: |
| out, _ = proc.communicate() |
| |
| return out.decode("utf-8") if type(out) is not str else out |
| |
| |
| if __name__ == "__main__": |
| import argparse |
| |
| # The default '-h' (--help) will conflict with our '-h' (hex) format |
| arg_parser = argparse.ArgumentParser(add_help=False) |
| arg_parser.add_argument( |
| "--readobj-path", |
| metavar="<executable path>", |
| type=str, |
| help="Path to llvm-readobj", |
| ) |
| arg_parser.add_argument( |
| "--input-file", |
| metavar="<file>", |
| type=str, |
| help="Input object file, or '-' to read from stdin", |
| ) |
| arg_parser.add_argument( |
| "section", metavar="<name>", type=str, help="Name of the section to extract" |
| ) |
| # Output format |
| format_group = arg_parser.add_mutually_exclusive_group() |
| format_group.add_argument( |
| "-b", |
| dest="format", |
| action="store_const", |
| const="bits", |
| help="Print out in bits", |
| ) |
| arg_parser.add_argument( |
| "--byte-indicator", |
| action="store_true", |
| help="Whether to print a '.' every 8 bits in bits printing mode", |
| ) |
| arg_parser.add_argument( |
| "--bits-endian", |
| metavar="<little/big>", |
| type=str, |
| choices=["little", "big"], |
| help="Print out bits in specified endianness (little or big); defaults to big", |
| ) |
| format_group.add_argument( |
| "-h", |
| dest="format", |
| action="store_const", |
| const="hex", |
| help="Print out in hexadecimal", |
| ) |
| arg_parser.add_argument( |
| "--hex-width", |
| metavar="<# of bytes>", |
| type=int, |
| help="The width (in byte) of every element in hex printing mode", |
| ) |
| |
| arg_parser.add_argument("--help", action="help") |
| arg_parser.set_defaults( |
| format="bits", |
| tool_path="llvm-readobj", |
| input_file="-", |
| byte_indicator=False, |
| hex_width=4, |
| bits_endian="big", |
| ) |
| args = arg_parser.parse_args() |
| |
| raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file) |
| |
| results = [] |
| for line in raw_section.splitlines(False): |
| if line.startswith("Hex dump"): |
| continue |
| parts = line.strip().split(" ")[1:] |
| for part in parts[:4]: |
| # exclude any non-hex dump string |
| try: |
| val = int(part, 16) |
| if args.format == "bits": |
| # divided into bytes first |
| offsets = (24, 16, 8, 0) |
| if args.bits_endian == "little": |
| offsets = (0, 8, 16, 24) |
| for byte in [(val >> off) & 0xFF for off in offsets]: |
| for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]: |
| results.append(str(bit)) |
| if args.byte_indicator: |
| results.append(".") |
| elif args.format == "hex": |
| assert args.hex_width <= 4 and args.hex_width > 0 |
| width_bits = args.hex_width * 8 |
| offsets = [off for off in range(32 - width_bits, -1, -width_bits)] |
| mask = (1 << width_bits) - 1 |
| format_str = "{:0" + str(args.hex_width * 2) + "x}" |
| for word in [(val >> i) & mask for i in offsets]: |
| results.append(format_str.format(word)) |
| except: |
| break |
| print(" ".join(results), end="") |