bolt/utils/llvm-bolt-wrapper.py - llvm-project - Git at Google

 #!/usr/bin/env python3
 import argparse
 import subprocess
 from typing import *
 import tempfile
 import copy
 import os
 import shutil
 import sys
 import re
 import configparser
 from types import SimpleNamespace
 from textwrap import dedent

 # USAGE:
 # 0. Prepare two BOLT build versions: base and compare.
 # 1. Create the config by invoking this script with required options.
 #    Save the config as `llvm-bolt-wrapper.ini` next to the script or
 #    in the testing directory.
 # In the base BOLT build directory:
 # 2. Rename `llvm-bolt` to `llvm-bolt.real`
 # 3. Create a symlink from this script to `llvm-bolt`
 # 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below.
 #
 # This script will compare binaries produced by base and compare BOLT, and
 # report elapsed processing time and max RSS.

 # read options from config file llvm-bolt-wrapper.ini in script CWD
 #
 # [config]
 # # mandatory
 # base_bolt = /full/path/to/llvm-bolt.real
 # cmp_bolt = /full/path/to/other/llvm-bolt
 # # optional, default to False
 # verbose
 # keep_tmp
 # no_minimize
 # run_sequentially
 # compare_output
 # skip_binary_cmp
 # # optional, defaults to timing.log in CWD
 # timing_file = timing1.log


 def read_cfg():
     src_dir = os.path.dirname(os.path.abspath(__file__))
     cfg = configparser.ConfigParser(allow_no_value=True)
     cfgs = cfg.read("llvm-bolt-wrapper.ini")
     if not cfgs:
         cfgs = cfg.read(os.path.join(src_dir, "llvm-bolt-wrapper.ini"))
     assert cfgs, f"llvm-bolt-wrapper.ini is not found in {os.getcwd()}"

     def get_cfg(key):
         # if key is not present in config, assume False
         if key not in cfg["config"]:
             return False
         # if key is present, but has no value, assume True
         if not cfg["config"][key]:
             return True
         # if key has associated value, interpret the value
         return cfg["config"].getboolean(key)

     d = {
         # BOLT binary locations
         "BASE_BOLT": cfg["config"]["base_bolt"],
         "CMP_BOLT": cfg["config"]["cmp_bolt"],
         # optional
         "VERBOSE": get_cfg("verbose"),
         "KEEP_TMP": get_cfg("keep_tmp"),
         "NO_MINIMIZE": get_cfg("no_minimize"),
         "RUN_SEQUENTIALLY": get_cfg("run_sequentially"),
         "COMPARE_OUTPUT": get_cfg("compare_output"),
         "SKIP_BINARY_CMP": get_cfg("skip_binary_cmp"),
         "TIMING_FILE": cfg["config"].get("timing_file", "timing.log"),
     }
     if d["VERBOSE"]:
         print(f"Using config {os.path.abspath(cfgs[0])}")
     return SimpleNamespace(**d)


 # perf2bolt mode
 PERF2BOLT_MODE = ["-aggregate-only", "-ignore-build-id"]

 # boltdiff mode
 BOLTDIFF_MODE = ["-diff-only", "-o", "/dev/null"]

 # options to suppress binary differences as much as possible
 MINIMIZE_DIFFS = ["-bolt-info=0"]

 # bolt output options that need to be intercepted
 BOLT_OUTPUT_OPTS = {
     "-o": "BOLT output binary",
     "-w": "BOLT recorded profile",
 }

 # regex patterns to exclude the line from log comparison
 SKIP_MATCH = [
     "BOLT-INFO: BOLT version",
     r"^Args: ",
     r"^BOLT-DEBUG:",
     r"BOLT-INFO:.*data.*output data",
     "WARNING: reading perf data directly",
 ]


 def run_cmd(cmd, out_f, cfg):
     if cfg.VERBOSE:
         print(" ".join(cmd))
     return subprocess.Popen(cmd, stdout=out_f, stderr=subprocess.STDOUT)


 def run_bolt(bolt_path, bolt_args, out_f, cfg):
     p2b = os.path.basename(sys.argv[0]) == "perf2bolt"  # perf2bolt mode
     bd = os.path.basename(sys.argv[0]) == "llvm-boltdiff"  # boltdiff mode
     cmd = ["/usr/bin/time", "-f", "%e %M", bolt_path] + bolt_args
     if p2b:
         # -ignore-build-id can occur at most once, hence remove it from cmd
         if "-ignore-build-id" in cmd:
             cmd.remove("-ignore-build-id")
         cmd += PERF2BOLT_MODE
     elif bd:
         cmd += BOLTDIFF_MODE
     elif not cfg.NO_MINIMIZE:
         cmd += MINIMIZE_DIFFS
     return run_cmd(cmd, out_f, cfg)


 def prepend_dash(args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]:
     """
     Accepts parsed arguments and returns flat list with dash prepended to
     the option.
     Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp']
     """
     dashed = [("-" + key, value) for (key, value) in args.items()]
     flattened = list(sum(dashed, ()))
     return flattened


 def replace_cmp_path(tmp: AnyStr, args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]:
     """
     Keeps file names, but replaces the path to a temp folder.
     Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp')
     Except preserve /dev/null.
     """
     replace_path = (
         lambda x: os.path.join(tmp, os.path.basename(x))
         if x != "/dev/null"
         else "/dev/null"
     )
     new_args = {key: replace_path(value) for key, value in args.items()}
     return prepend_dash(new_args)


 def preprocess_args(args: argparse.Namespace) -> Mapping[AnyStr, AnyStr]:
     """
     Drop options that weren't parsed (e.g. -w), convert to a dict
     """
     return {key: value for key, value in vars(args).items() if value}


 def write_to(txt, filename, mode="w"):
     with open(filename, mode) as f:
         f.write(txt)


 def wait(proc, fdesc):
     proc.wait()
     fdesc.close()
     return open(fdesc.name)


 def compare_logs(main, cmp, skip_begin=0, skip_end=0, str_input=True):
     """
     Compares logs but allows for certain lines to be excluded from comparison.
     If str_input is True (default), the input it assumed to be a string,
     which is split into lines. Otherwise the input is assumed to be a file.
     Returns None on success, mismatch otherwise.
     """
     main_inp = main.splitlines() if str_input else main.readlines()
     cmp_inp = cmp.splitlines() if str_input else cmp.readlines()
     # rewind logs after consumption
     if not str_input:
         main.seek(0)
         cmp.seek(0)
     for lhs, rhs in list(zip(main_inp, cmp_inp))[skip_begin : -skip_end or None]:
         if lhs != rhs:
             # check skip patterns
             for skip in SKIP_MATCH:
                 # both lines must contain the pattern
                 if re.search(skip, lhs) and re.search(skip, rhs):
                     break
             # otherwise return mismatching lines
             else:
                 return (lhs, rhs)
     return None


 def fmt_cmp(cmp_tuple):
     if not cmp_tuple:
         return ""
     return f"main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n"


 def compare_with(lhs, rhs, cmd, skip_begin=0, skip_end=0):
     """
     Runs cmd on both lhs and rhs and compares stdout.
     Returns tuple (mismatch, lhs_stdout):
         - if stdout matches between two files, mismatch is None,
         - otherwise mismatch is a tuple of mismatching lines.
     """
     run = lambda binary: subprocess.run(
         cmd.split() + [binary], text=True, check=True, capture_output=True
     ).stdout
     run_lhs = run(lhs)
     run_rhs = run(rhs)
     cmp = compare_logs(run_lhs, run_rhs, skip_begin, skip_end)
     return cmp, run_lhs


 def parse_cmp_offset(cmp_out):
     """
     Extracts byte number from cmp output:
     file1 file2 differ: byte X, line Y
     """
     # NOTE: cmp counts bytes starting from 1!
     return int(re.search(r"byte (\d+),", cmp_out).groups()[0]) - 1


 def report_real_time(binary, main_err, cmp_err, cfg):
     """
     Extracts real time from stderr and appends it to TIMING FILE it as csv:
     "output binary; base bolt; cmp bolt"
     """

     def get_real_from_stderr(logline):
         return "; ".join(logline.split())

     for line in main_err:
         pass
     main = get_real_from_stderr(line)
     for line in cmp_err:
         pass
     cmp = get_real_from_stderr(line)
     write_to(f"{binary}; {main}; {cmp}\n", cfg.TIMING_FILE, "a")
     # rewind logs after consumption
     main_err.seek(0)
     cmp_err.seek(0)


 def clean_exit(tmp, out, exitcode, cfg):
     # temp files are only cleaned on success
     if not cfg.KEEP_TMP:
         shutil.rmtree(tmp)

     # report stdout and stderr from the main process
     shutil.copyfileobj(out, sys.stdout)
     sys.exit(exitcode)


 def find_section(offset, readelf_hdr):
     hdr = readelf_hdr.split("\n")
     section = None
     # extract sections table (parse objdump -hw output)
     for line in hdr[5:-1]:
         cols = line.strip().split()
         # extract section offset
         file_offset = int(cols[5], 16)
         # section size
         size = int(cols[2], 16)
         if offset >= file_offset and offset < file_offset + size:
             if sys.stdout.isatty():  # terminal supports colors
                 print(f"\033[1m{line}\033[0m")
             else:
                 print(f">{line}")
             section = cols[1]
         else:
             print(line)
     return section


 def main_config_generator():
     parser = argparse.ArgumentParser()
     parser.add_argument("base_bolt", help="Full path to base llvm-bolt binary")
     parser.add_argument("cmp_bolt", help="Full path to cmp llvm-bolt binary")
     parser.add_argument(
         "--verbose",
         action="store_true",
         help="Print subprocess invocation cmdline (default False)",
     )
     parser.add_argument(
         "--keep_tmp",
         action="store_true",
         help="Preserve tmp folder on a clean exit "
         "(tmp directory is preserved on crash by default)",
     )
     parser.add_argument(
         "--no_minimize",
         action="store_true",
         help=f"Do not add `{MINIMIZE_DIFFS}` that is used "
         "by default to reduce binary differences",
     )
     parser.add_argument(
         "--run_sequentially",
         action="store_true",
         help="Run both binaries sequentially (default "
         "in parallel). Use for timing comparison",
     )
     parser.add_argument(
         "--compare_output",
         action="store_true",
         help="Compare bolt stdout/stderr (disabled by default)",
     )
     parser.add_argument(
         "--skip_binary_cmp", action="store_true", help="Disable output comparison"
     )
     parser.add_argument(
         "--timing_file",
         help="Override path to timing log " "file (default `timing.log` in CWD)",
     )
     args = parser.parse_args()

     print(
         dedent(
             f"""\
     [config]
     # mandatory
     base_bolt = {args.base_bolt}
     cmp_bolt = {args.cmp_bolt}"""
         )
     )
     del args.base_bolt
     del args.cmp_bolt
     d = vars(args)
     if any(d.values()):
         print("# optional")
         for key, value in d.items():
             if value:
                 print(key)


 def main():
     cfg = read_cfg()
     # intercept output arguments
     parser = argparse.ArgumentParser(add_help=False)
     for option, help in BOLT_OUTPUT_OPTS.items():
         parser.add_argument(option, help=help)
     args, unknownargs = parser.parse_known_args()
     args = preprocess_args(args)
     cmp_args = copy.deepcopy(args)
     tmp = tempfile.mkdtemp()
     cmp_args = replace_cmp_path(tmp, cmp_args)

     # reconstruct output arguments: prepend dash
     args = prepend_dash(args)

     # run both BOLT binaries
     main_f = open(os.path.join(tmp, "main_bolt.stdout"), "w")
     cmp_f = open(os.path.join(tmp, "cmp_bolt.stdout"), "w")
     main_bolt = run_bolt(cfg.BASE_BOLT, unknownargs + args, main_f, cfg)
     if cfg.RUN_SEQUENTIALLY:
         main_out = wait(main_bolt, main_f)
         cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg)
     else:
         cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg)
         main_out = wait(main_bolt, main_f)
     cmp_out = wait(cmp_bolt, cmp_f)

     # check exit code
     if main_bolt.returncode != cmp_bolt.returncode:
         print(tmp)
         exit("exitcode mismatch")

     # don't compare output upon unsuccessful exit
     if main_bolt.returncode != 0:
         cfg.SKIP_BINARY_CMP = True

     # compare logs, skip_end=1 skips the line with time
     out = (
         compare_logs(main_out, cmp_out, skip_end=1, str_input=False)
         if cfg.COMPARE_OUTPUT
         else None
     )
     if out:
         print(tmp)
         print(fmt_cmp(out))
         write_to(fmt_cmp(out), os.path.join(tmp, "summary.txt"))
         exit("logs mismatch")

     if os.path.basename(sys.argv[0]) == "llvm-boltdiff":  # boltdiff mode
         # no output binary to compare, so just exit
         clean_exit(tmp, main_out, main_bolt.returncode, cfg)

     # compare binaries (using cmp)
     main_binary = args[args.index("-o") + 1]
     cmp_binary = cmp_args[cmp_args.index("-o") + 1]
     if main_binary == "/dev/null":
         assert cmp_binary == "/dev/null"
         cfg.SKIP_BINARY_CMP = True

     # report binary timing as csv: output binary; base bolt real; cmp bolt real
     report_real_time(main_binary, main_out, cmp_out, cfg)

     if not cfg.SKIP_BINARY_CMP:
         # check if files exist
         main_exists = os.path.exists(main_binary)
         cmp_exists = os.path.exists(cmp_binary)
         if main_exists and cmp_exists:
             # proceed to comparison
             pass
         elif not main_exists and not cmp_exists:
             # both don't exist, assume it's intended, skip comparison
             clean_exit(tmp, main_out, main_bolt.returncode, cfg)
         elif main_exists:
             assert not cmp_exists
             exit(f"{cmp_binary} doesn't exist")
         else:
             assert not main_exists
             exit(f"{main_binary} doesn't exist")

         cmp_proc = subprocess.run(
             ["cmp", "-b", main_binary, cmp_binary], capture_output=True, text=True
         )
         if cmp_proc.returncode:
             # check if output is an ELF file (magic bytes)
             with open(main_binary, "rb") as f:
                 magic = f.read(4)
                 if magic != b"\x7fELF":
                     exit("output mismatch")
             # check if ELF headers match
             mismatch, _ = compare_with(main_binary, cmp_binary, "readelf -We")
             if mismatch:
                 print(fmt_cmp(mismatch))
                 write_to(fmt_cmp(mismatch), os.path.join(tmp, "headers.txt"))
                 exit("headers mismatch")
             # if headers match, compare sections (skip line with filename)
             mismatch, hdr = compare_with(
                 main_binary, cmp_binary, "objdump -hw", skip_begin=2
             )
             assert not mismatch
             # check which section has the first mismatch
             mismatch_offset = parse_cmp_offset(cmp_proc.stdout)
             section = find_section(mismatch_offset, hdr)
             exit(f"binary mismatch @{hex(mismatch_offset)} ({section})")

     clean_exit(tmp, main_out, main_bolt.returncode, cfg)


 if __name__ == "__main__":
     # config generator mode if the script is launched as is
     if os.path.basename(__file__) == "llvm-bolt-wrapper.py":
         main_config_generator()
     else:
         # llvm-bolt interceptor mode otherwise
         main()
	#!/usr/bin/env python3
	import argparse
	import subprocess
	from typing import *
	import tempfile
	import copy
	import os
	import shutil
	import sys
	import re
	import configparser
	from types import SimpleNamespace
	from textwrap import dedent

	# USAGE:
	# 0. Prepare two BOLT build versions: base and compare.
	# 1. Create the config by invoking this script with required options.
	# Save the config as `llvm-bolt-wrapper.ini` next to the script or
	# in the testing directory.
	# In the base BOLT build directory:
	# 2. Rename `llvm-bolt` to `llvm-bolt.real`
	# 3. Create a symlink from this script to `llvm-bolt`
	# 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below.
	#
	# This script will compare binaries produced by base and compare BOLT, and
	# report elapsed processing time and max RSS.

	# read options from config file llvm-bolt-wrapper.ini in script CWD
	#
	# [config]
	# # mandatory
	# base_bolt = /full/path/to/llvm-bolt.real
	# cmp_bolt = /full/path/to/other/llvm-bolt
	# # optional, default to False
	# verbose
	# keep_tmp
	# no_minimize
	# run_sequentially
	# compare_output
	# skip_binary_cmp
	# # optional, defaults to timing.log in CWD
	# timing_file = timing1.log


	def read_cfg():
	src_dir = os.path.dirname(os.path.abspath(__file__))
	cfg = configparser.ConfigParser(allow_no_value=True)
	cfgs = cfg.read("llvm-bolt-wrapper.ini")
	if not cfgs:
	cfgs = cfg.read(os.path.join(src_dir, "llvm-bolt-wrapper.ini"))
	assert cfgs, f"llvm-bolt-wrapper.ini is not found in {os.getcwd()}"

	def get_cfg(key):
	# if key is not present in config, assume False
	if key not in cfg["config"]:
	return False
	# if key is present, but has no value, assume True
	if not cfg["config"][key]:
	return True
	# if key has associated value, interpret the value
	return cfg["config"].getboolean(key)

	d = {
	# BOLT binary locations
	"BASE_BOLT": cfg["config"]["base_bolt"],
	"CMP_BOLT": cfg["config"]["cmp_bolt"],
	# optional
	"VERBOSE": get_cfg("verbose"),
	"KEEP_TMP": get_cfg("keep_tmp"),
	"NO_MINIMIZE": get_cfg("no_minimize"),
	"RUN_SEQUENTIALLY": get_cfg("run_sequentially"),
	"COMPARE_OUTPUT": get_cfg("compare_output"),
	"SKIP_BINARY_CMP": get_cfg("skip_binary_cmp"),
	"TIMING_FILE": cfg["config"].get("timing_file", "timing.log"),
	}
	if d["VERBOSE"]:
	print(f"Using config {os.path.abspath(cfgs[0])}")
	return SimpleNamespace(**d)


	# perf2bolt mode
	PERF2BOLT_MODE = ["-aggregate-only", "-ignore-build-id"]

	# boltdiff mode
	BOLTDIFF_MODE = ["-diff-only", "-o", "/dev/null"]

	# options to suppress binary differences as much as possible
	MINIMIZE_DIFFS = ["-bolt-info=0"]

	# bolt output options that need to be intercepted
	BOLT_OUTPUT_OPTS = {
	"-o": "BOLT output binary",
	"-w": "BOLT recorded profile",
	}

	# regex patterns to exclude the line from log comparison
	SKIP_MATCH = [
	"BOLT-INFO: BOLT version",
	r"^Args: ",
	r"^BOLT-DEBUG:",
	r"BOLT-INFO:.data.output data",
	"WARNING: reading perf data directly",
	]


	def run_cmd(cmd, out_f, cfg):
	if cfg.VERBOSE:
	print(" ".join(cmd))
	return subprocess.Popen(cmd, stdout=out_f, stderr=subprocess.STDOUT)


	def run_bolt(bolt_path, bolt_args, out_f, cfg):
	p2b = os.path.basename(sys.argv[0]) == "perf2bolt" # perf2bolt mode
	bd = os.path.basename(sys.argv[0]) == "llvm-boltdiff" # boltdiff mode
	cmd = ["/usr/bin/time", "-f", "%e %M", bolt_path] + bolt_args
	if p2b:
	# -ignore-build-id can occur at most once, hence remove it from cmd
	if "-ignore-build-id" in cmd:
	cmd.remove("-ignore-build-id")
	cmd += PERF2BOLT_MODE
	elif bd:
	cmd += BOLTDIFF_MODE
	elif not cfg.NO_MINIMIZE:
	cmd += MINIMIZE_DIFFS
	return run_cmd(cmd, out_f, cfg)


	def prepend_dash(args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]:
	"""
	Accepts parsed arguments and returns flat list with dash prepended to
	the option.
	Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp']
	"""
	dashed = [("-" + key, value) for (key, value) in args.items()]
	flattened = list(sum(dashed, ()))
	return flattened


	def replace_cmp_path(tmp: AnyStr, args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]:
	"""
	Keeps file names, but replaces the path to a temp folder.
	Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp')
	Except preserve /dev/null.
	"""
	replace_path = (
	lambda x: os.path.join(tmp, os.path.basename(x))
	if x != "/dev/null"
	else "/dev/null"
	)
	new_args = {key: replace_path(value) for key, value in args.items()}
	return prepend_dash(new_args)


	def preprocess_args(args: argparse.Namespace) -> Mapping[AnyStr, AnyStr]:
	"""
	Drop options that weren't parsed (e.g. -w), convert to a dict
	"""
	return {key: value for key, value in vars(args).items() if value}


	def write_to(txt, filename, mode="w"):
	with open(filename, mode) as f:
	f.write(txt)


	def wait(proc, fdesc):
	proc.wait()
	fdesc.close()
	return open(fdesc.name)


	def compare_logs(main, cmp, skip_begin=0, skip_end=0, str_input=True):
	"""
	Compares logs but allows for certain lines to be excluded from comparison.
	If str_input is True (default), the input it assumed to be a string,
	which is split into lines. Otherwise the input is assumed to be a file.
	Returns None on success, mismatch otherwise.
	"""
	main_inp = main.splitlines() if str_input else main.readlines()
	cmp_inp = cmp.splitlines() if str_input else cmp.readlines()
	# rewind logs after consumption
	if not str_input:
	main.seek(0)
	cmp.seek(0)
	for lhs, rhs in list(zip(main_inp, cmp_inp))[skip_begin : -skip_end or None]:
	if lhs != rhs:
	# check skip patterns
	for skip in SKIP_MATCH:
	# both lines must contain the pattern
	if re.search(skip, lhs) and re.search(skip, rhs):
	break
	# otherwise return mismatching lines
	else:
	return (lhs, rhs)
	return None


	def fmt_cmp(cmp_tuple):
	if not cmp_tuple:
	return ""
	return f"main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n"


	def compare_with(lhs, rhs, cmd, skip_begin=0, skip_end=0):
	"""
	Runs cmd on both lhs and rhs and compares stdout.
	Returns tuple (mismatch, lhs_stdout):
	- if stdout matches between two files, mismatch is None,
	- otherwise mismatch is a tuple of mismatching lines.
	"""
	run = lambda binary: subprocess.run(
	cmd.split() + [binary], text=True, check=True, capture_output=True
	).stdout
	run_lhs = run(lhs)
	run_rhs = run(rhs)
	cmp = compare_logs(run_lhs, run_rhs, skip_begin, skip_end)
	return cmp, run_lhs


	def parse_cmp_offset(cmp_out):
	"""
	Extracts byte number from cmp output:
	file1 file2 differ: byte X, line Y
	"""
	# NOTE: cmp counts bytes starting from 1!
	return int(re.search(r"byte (\d+),", cmp_out).groups()[0]) - 1


	def report_real_time(binary, main_err, cmp_err, cfg):
	"""
	Extracts real time from stderr and appends it to TIMING FILE it as csv:
	"output binary; base bolt; cmp bolt"
	"""

	def get_real_from_stderr(logline):
	return "; ".join(logline.split())

	for line in main_err:
	pass
	main = get_real_from_stderr(line)
	for line in cmp_err:
	pass
	cmp = get_real_from_stderr(line)
	write_to(f"{binary}; {main}; {cmp}\n", cfg.TIMING_FILE, "a")
	# rewind logs after consumption
	main_err.seek(0)
	cmp_err.seek(0)


	def clean_exit(tmp, out, exitcode, cfg):
	# temp files are only cleaned on success
	if not cfg.KEEP_TMP:
	shutil.rmtree(tmp)

	# report stdout and stderr from the main process
	shutil.copyfileobj(out, sys.stdout)
	sys.exit(exitcode)


	def find_section(offset, readelf_hdr):
	hdr = readelf_hdr.split("\n")
	section = None
	# extract sections table (parse objdump -hw output)
	for line in hdr[5:-1]:
	cols = line.strip().split()
	# extract section offset
	file_offset = int(cols[5], 16)
	# section size
	size = int(cols[2], 16)
	if offset >= file_offset and offset < file_offset + size:
	if sys.stdout.isatty(): # terminal supports colors
	print(f"\033[1m{line}\033[0m")
	else:
	print(f">{line}")
	section = cols[1]
	else:
	print(line)
	return section


	def main_config_generator():
	parser = argparse.ArgumentParser()
	parser.add_argument("base_bolt", help="Full path to base llvm-bolt binary")
	parser.add_argument("cmp_bolt", help="Full path to cmp llvm-bolt binary")
	parser.add_argument(
	"--verbose",
	action="store_true",
	help="Print subprocess invocation cmdline (default False)",
	)
	parser.add_argument(
	"--keep_tmp",
	action="store_true",
	help="Preserve tmp folder on a clean exit "
	"(tmp directory is preserved on crash by default)",
	)
	parser.add_argument(
	"--no_minimize",
	action="store_true",
	help=f"Do not add `{MINIMIZE_DIFFS}` that is used "
	"by default to reduce binary differences",
	)
	parser.add_argument(
	"--run_sequentially",
	action="store_true",
	help="Run both binaries sequentially (default "
	"in parallel). Use for timing comparison",
	)
	parser.add_argument(
	"--compare_output",
	action="store_true",
	help="Compare bolt stdout/stderr (disabled by default)",
	)
	parser.add_argument(
	"--skip_binary_cmp", action="store_true", help="Disable output comparison"
	)
	parser.add_argument(
	"--timing_file",
	help="Override path to timing log " "file (default `timing.log` in CWD)",
	)
	args = parser.parse_args()

	print(
	dedent(
	f"""\
	[config]
	# mandatory
	base_bolt = {args.base_bolt}
	cmp_bolt = {args.cmp_bolt}"""
	)
	)
	del args.base_bolt
	del args.cmp_bolt
	d = vars(args)
	if any(d.values()):
	print("# optional")
	for key, value in d.items():
	if value:
	print(key)


	def main():
	cfg = read_cfg()
	# intercept output arguments
	parser = argparse.ArgumentParser(add_help=False)
	for option, help in BOLT_OUTPUT_OPTS.items():
	parser.add_argument(option, help=help)
	args, unknownargs = parser.parse_known_args()
	args = preprocess_args(args)
	cmp_args = copy.deepcopy(args)
	tmp = tempfile.mkdtemp()
	cmp_args = replace_cmp_path(tmp, cmp_args)

	# reconstruct output arguments: prepend dash
	args = prepend_dash(args)

	# run both BOLT binaries
	main_f = open(os.path.join(tmp, "main_bolt.stdout"), "w")
	cmp_f = open(os.path.join(tmp, "cmp_bolt.stdout"), "w")
	main_bolt = run_bolt(cfg.BASE_BOLT, unknownargs + args, main_f, cfg)
	if cfg.RUN_SEQUENTIALLY:
	main_out = wait(main_bolt, main_f)
	cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg)
	else:
	cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg)
	main_out = wait(main_bolt, main_f)
	cmp_out = wait(cmp_bolt, cmp_f)

	# check exit code
	if main_bolt.returncode != cmp_bolt.returncode:
	print(tmp)
	exit("exitcode mismatch")

	# don't compare output upon unsuccessful exit
	if main_bolt.returncode != 0:
	cfg.SKIP_BINARY_CMP = True

	# compare logs, skip_end=1 skips the line with time
	out = (
	compare_logs(main_out, cmp_out, skip_end=1, str_input=False)
	if cfg.COMPARE_OUTPUT
	else None
	)
	if out:
	print(tmp)
	print(fmt_cmp(out))
	write_to(fmt_cmp(out), os.path.join(tmp, "summary.txt"))
	exit("logs mismatch")

	if os.path.basename(sys.argv[0]) == "llvm-boltdiff": # boltdiff mode
	# no output binary to compare, so just exit
	clean_exit(tmp, main_out, main_bolt.returncode, cfg)

	# compare binaries (using cmp)
	main_binary = args[args.index("-o") + 1]
	cmp_binary = cmp_args[cmp_args.index("-o") + 1]
	if main_binary == "/dev/null":
	assert cmp_binary == "/dev/null"
	cfg.SKIP_BINARY_CMP = True

	# report binary timing as csv: output binary; base bolt real; cmp bolt real
	report_real_time(main_binary, main_out, cmp_out, cfg)

	if not cfg.SKIP_BINARY_CMP:
	# check if files exist
	main_exists = os.path.exists(main_binary)
	cmp_exists = os.path.exists(cmp_binary)
	if main_exists and cmp_exists:
	# proceed to comparison
	pass
	elif not main_exists and not cmp_exists:
	# both don't exist, assume it's intended, skip comparison
	clean_exit(tmp, main_out, main_bolt.returncode, cfg)
	elif main_exists:
	assert not cmp_exists
	exit(f"{cmp_binary} doesn't exist")
	else:
	assert not main_exists
	exit(f"{main_binary} doesn't exist")

	cmp_proc = subprocess.run(
	["cmp", "-b", main_binary, cmp_binary], capture_output=True, text=True
	)
	if cmp_proc.returncode:
	# check if output is an ELF file (magic bytes)
	with open(main_binary, "rb") as f:
	magic = f.read(4)
	if magic != b"\x7fELF":
	exit("output mismatch")
	# check if ELF headers match
	mismatch, _ = compare_with(main_binary, cmp_binary, "readelf -We")
	if mismatch:
	print(fmt_cmp(mismatch))
	write_to(fmt_cmp(mismatch), os.path.join(tmp, "headers.txt"))
	exit("headers mismatch")
	# if headers match, compare sections (skip line with filename)
	mismatch, hdr = compare_with(
	main_binary, cmp_binary, "objdump -hw", skip_begin=2
	)
	assert not mismatch
	# check which section has the first mismatch
	mismatch_offset = parse_cmp_offset(cmp_proc.stdout)
	section = find_section(mismatch_offset, hdr)
	exit(f"binary mismatch @{hex(mismatch_offset)} ({section})")

	clean_exit(tmp, main_out, main_bolt.returncode, cfg)


	if __name__ == "__main__":
	# config generator mode if the script is launched as is
	if os.path.basename(__file__) == "llvm-bolt-wrapper.py":
	main_config_generator()
	else:
	# llvm-bolt interceptor mode otherwise
	main()