clang/utils/perf-training/perf-helper.py - llvm-project - Git at Google

 # ===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
 # ===------------------------------------------------------------------------===#

 from __future__ import absolute_import, division, print_function

 import sys
 import os
 import subprocess
 import argparse
 import time
 import bisect
 import shlex
 import tempfile

 test_env = {"PATH": os.environ["PATH"]}


 def findFilesWithExtension(path, extension):
     filenames = []
     for root, dirs, files in os.walk(path):
         for filename in files:
             if filename.endswith(f".{extension}"):
                 filenames.append(os.path.join(root, filename))
     return filenames


 def clean(args):
     if len(args) < 2:
         print(
             "Usage: %s clean <paths> <extension>\n" % __file__
             + "\tRemoves all files with extension from <path>."
         )
         return 1
     for path in args[1:-1]:
         for filename in findFilesWithExtension(path, args[-1]):
             os.remove(filename)
     return 0


 def merge(args):
     if len(args) < 3:
         print(
             "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
             + "\tMerges all profraw files from path into output."
         )
         return 1
     cmd = [args[0], "merge", "-o", args[1]]
     for path in args[2:]:
         cmd.extend(findFilesWithExtension(path, "profraw"))
     subprocess.check_call(cmd)
     return 0


 def merge_fdata(args):
     if len(args) != 3:
         print(
             "Usage: %s merge-fdata <merge-fdata> <output> <path>\n" % __file__
             + "\tMerges all fdata files from path into output."
         )
         return 1
     cmd = [args[0], "-o", args[1]]
     cmd.extend(findFilesWithExtension(args[2], "fdata"))
     subprocess.check_call(cmd)
     return 0


 def perf(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
     )
     parser.add_argument(
         "--lbr", action="store_true", help="Use perf with branch stacks"
     )
     parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")

     opts = parser.parse_args(args)
     cmd = opts.cmd[1:]

     perf_args = [
         "perf",
         "record",
         "--event=cycles:u",
         "--freq=max",
         "--output=%d.perf.data" % os.getpid(),
     ]
     if opts.lbr:
         perf_args += ["--branch-filter=any,u"]
     perf_args.extend(cmd)

     start_time = time.time()
     subprocess.check_call(perf_args)

     elapsed = time.time() - start_time
     print("... data collection took %.4fs" % elapsed)
     return 0


 def perf2bolt(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper perf2bolt",
         description="perf2bolt conversion wrapper for perf.data files",
     )
     parser.add_argument("bolt", help="Path to llvm-bolt")
     parser.add_argument("path", help="Path containing perf.data files")
     parser.add_argument("binary", help="Input binary")
     parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode")
     opts = parser.parse_args(args)

     p2b_args = [
         opts.bolt,
         opts.binary,
         "--aggregate-only",
         "--profile-format=yaml",
     ]
     if not opts.lbr:
         p2b_args += ["-nl"]
     p2b_args += ["-p"]
     for filename in findFilesWithExtension(opts.path, "perf.data"):
         subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
     return 0


 def dtrace(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper dtrace",
         description="dtrace wrapper for order file generation",
     )
     parser.add_argument(
         "--buffer-size",
         metavar="size",
         type=int,
         required=False,
         default=1,
         help="dtrace buffer size in MB (default 1)",
     )
     parser.add_argument(
         "--use-oneshot",
         required=False,
         action="store_true",
         help="Use dtrace's oneshot probes",
     )
     parser.add_argument(
         "--use-ustack",
         required=False,
         action="store_true",
         help="Use dtrace's ustack to print function names",
     )
     parser.add_argument(
         "--cc1",
         required=False,
         action="store_true",
         help="Execute cc1 directly (don't profile the driver)",
     )
     parser.add_argument("cmd", nargs="*", help="")

     # Use python's arg parser to handle all leading option arguments, but pass
     # everything else through to dtrace
     first_cmd = next(arg for arg in args if not arg.startswith("--"))
     last_arg_idx = args.index(first_cmd)

     opts = parser.parse_args(args[:last_arg_idx])
     cmd = args[last_arg_idx:]

     if opts.cc1:
         cmd = get_cc1_command_for_args(cmd, test_env)

     if opts.use_oneshot:
         target = "oneshot$target:::entry"
     else:
         target = "pid$target:::entry"
     predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
     log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
     if opts.use_ustack:
         action = "ustack(1);"
     else:
         action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
     dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)

     dtrace_args = []
     if not os.geteuid() == 0:
         print(
             "Script must be run as root, or you must add the following to your sudoers:"
             + "%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace"
         )
         dtrace_args.append("sudo")

     dtrace_args.extend(
         (
             "dtrace",
             "-xevaltime=exec",
             "-xbufsize=%dm" % (opts.buffer_size),
             "-q",
             "-n",
             dtrace_script,
             "-c",
             " ".join(cmd),
         )
     )

     if sys.platform == "darwin":
         dtrace_args.append("-xmangled")

     start_time = time.time()

     with open("%d.dtrace" % os.getpid(), "w") as f:
         f.write("### Command: %s" % dtrace_args)
         subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)

     elapsed = time.time() - start_time
     print("... data collection took %.4fs" % elapsed)

     return 0


 def get_cc1_command_for_args(cmd, env):
     # Find the cc1 command used by the compiler. To do this we execute the
     # compiler with '-###' to figure out what it wants to do.
     cmd = cmd + ["-###"]
     cc_output = subprocess.check_output(
         cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True
     ).strip()
     cc_commands = []
     for ln in cc_output.split("\n"):
         # Filter out known garbage.
         if (
             ln == "Using built-in specs."
             or ln.startswith("Configured with:")
             or ln.startswith("Target:")
             or ln.startswith("Thread model:")
             or ln.startswith("InstalledDir:")
             or ln.startswith("LLVM Profile Note")
             or ln.startswith(" (in-process)")
             or " version " in ln
         ):
             continue
         cc_commands.append(ln)

     if len(cc_commands) != 1:
         print("Fatal error: unable to determine cc1 command: %r" % cc_output)
         exit(1)

     cc1_cmd = shlex.split(cc_commands[0])
     if not cc1_cmd:
         print("Fatal error: unable to determine cc1 command: %r" % cc_output)
         exit(1)

     return cc1_cmd


 def cc1(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper cc1", description="cc1 wrapper for order file generation"
     )
     parser.add_argument("cmd", nargs="*", help="")

     # Use python's arg parser to handle all leading option arguments, but pass
     # everything else through to dtrace
     first_cmd = next(arg for arg in args if not arg.startswith("--"))
     last_arg_idx = args.index(first_cmd)

     opts = parser.parse_args(args[:last_arg_idx])
     cmd = args[last_arg_idx:]

     # clear the profile file env, so that we don't generate profdata
     # when capturing the cc1 command
     cc1_env = test_env
     cc1_env["LLVM_PROFILE_FILE"] = os.devnull
     cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)

     subprocess.check_call(cc1_cmd)
     return 0


 def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, missing_symbols, opts):
     def fix_mangling(symbol):
         if sys.platform == "darwin":
             if symbol[0] != "_" and symbol != "start":
                 symbol = "_" + symbol
         return symbol

     def get_symbols_with_prefix(symbol):
         start_index = bisect.bisect_left(all_symbols, symbol)
         for s in all_symbols[start_index:]:
             if not s.startswith(symbol):
                 break
             yield s

     # Extract the list of symbols from the given file, which is assumed to be
     # the output of a dtrace run logging either probefunc or ustack(1) and
     # nothing else. The dtrace -xdemangle option needs to be used.
     #
     # This is particular to OS X at the moment, because of the '_' handling.
     with open(path) as f:
         current_timestamp = None
         for ln in f:
             # Drop leading and trailing whitespace.
             ln = ln.strip()
             if not ln.startswith("dtrace-"):
                 continue

             # If this is a timestamp specifier, extract it.
             if ln.startswith("dtrace-TS: "):
                 _, data = ln.split(": ", 1)
                 if not data.isdigit():
                     print(
                         "warning: unrecognized timestamp line %r, ignoring" % ln,
                         file=sys.stderr,
                     )
                     continue
                 current_timestamp = int(data)
                 continue
             elif ln.startswith("dtrace-Symbol: "):

                 _, ln = ln.split(": ", 1)
                 if not ln:
                     continue

                 # If there is a '`' in the line, assume it is a ustack(1) entry in
                 # the form of <modulename>`<modulefunc>, where <modulefunc> is never
                 # truncated (but does need the mangling patched).
                 if "`" in ln:
                     yield (current_timestamp, fix_mangling(ln.split("`", 1)[1]))
                     continue

                 # Otherwise, assume this is a probefunc printout. DTrace on OS X
                 # seems to have a bug where it prints the mangled version of symbols
                 # which aren't C++ mangled. We just add a '_' to anything but start
                 # which doesn't already have a '_'.
                 symbol = fix_mangling(ln)

                 # If we don't know all the symbols, or the symbol is one of them,
                 # just return it.
                 if not all_symbols_set or symbol in all_symbols_set:
                     yield (current_timestamp, symbol)
                     continue

                 # Otherwise, we have a symbol name which isn't present in the
                 # binary. We assume it is truncated, and try to extend it.

                 # Get all the symbols with this prefix.
                 possible_symbols = list(get_symbols_with_prefix(symbol))
                 if not possible_symbols:
                     continue

                 # If we found too many possible symbols, ignore this as a prefix.
                 if len(possible_symbols) > 100:
                     print(
                         "warning: ignoring symbol %r " % symbol
                         + "(no match and too many possible suffixes)",
                         file=sys.stderr,
                     )
                     continue

                 # Report that we resolved a missing symbol.
                 if opts.show_missing_symbols and symbol not in missing_symbols:
                     print(
                         "warning: resolved missing symbol %r" % symbol, file=sys.stderr
                     )
                     missing_symbols.add(symbol)

                 # Otherwise, treat all the possible matches as having occurred. This
                 # is an over-approximation, but it should be ok in practice.
                 for s in possible_symbols:
                     yield (current_timestamp, s)


 def uniq(list):
     seen = set()
     for item in list:
         if item not in seen:
             yield item
             seen.add(item)


 def form_by_call_order(symbol_lists):
     # Simply strategy, just return symbols in order of occurrence, even across
     # multiple runs.
     return uniq(s for symbols in symbol_lists for s in symbols)


 def form_by_call_order_fair(symbol_lists):
     # More complicated strategy that tries to respect the call order across all
     # of the test cases, instead of giving a huge preference to the first test
     # case.

     # First, uniq all the lists.
     uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]

     # Compute the successors for each list.
     succs = {}
     for symbols in uniq_lists:
         for a, b in zip(symbols[:-1], symbols[1:]):
             succs[a] = items = succs.get(a, [])
             if b not in items:
                 items.append(b)

     # Emit all the symbols, but make sure to always emit all successors from any
     # call list whenever we see a symbol.
     #
     # There isn't much science here, but this sometimes works better than the
     # more naive strategy. Then again, sometimes it doesn't so more research is
     # probably needed.
     return uniq(
         s
         for symbols in symbol_lists
         for node in symbols
         for s in ([node] + succs.get(node, []))
     )


 def form_by_frequency(symbol_lists):
     # Form the order file by just putting the most commonly occurring symbols
     # first. This assumes the data files didn't use the oneshot dtrace method.

     counts = {}
     for symbols in symbol_lists:
         for a in symbols:
             counts[a] = counts.get(a, 0) + 1

     by_count = list(counts.items())
     by_count.sort(key=lambda __n: -__n[1])
     return [s for s, n in by_count]


 def form_by_random(symbol_lists):
     # Randomize the symbols.
     merged_symbols = uniq(s for symbols in symbol_lists for s in symbols)
     random.shuffle(merged_symbols)
     return merged_symbols


 def form_by_alphabetical(symbol_lists):
     # Alphabetize the symbols.
     merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
     merged_symbols.sort()
     return merged_symbols


 methods = dict(
     (name[len("form_by_") :], value)
     for name, value in locals().items()
     if name.startswith("form_by_")
 )


 def genOrderFile(args):
     parser = argparse.ArgumentParser("%prog  [options] <dtrace data file directories>]")
     parser.add_argument("input", nargs="+", help="")
     parser.add_argument(
         "--binary",
         metavar="PATH",
         type=str,
         dest="binary_path",
         help="Path to the binary being ordered (for getting all symbols)",
         default=None,
     )
     parser.add_argument(
         "--output",
         dest="output_path",
         help="path to output order file to write",
         default=None,
         required=True,
         metavar="PATH",
     )
     parser.add_argument(
         "--show-missing-symbols",
         dest="show_missing_symbols",
         help="show symbols which are 'fixed up' to a valid name (requires --binary)",
         action="store_true",
         default=None,
     )
     parser.add_argument(
         "--output-unordered-symbols",
         dest="output_unordered_symbols_path",
         help="write a list of the unordered symbols to PATH (requires --binary)",
         default=None,
         metavar="PATH",
     )
     parser.add_argument(
         "--method",
         dest="method",
         help="order file generation method to use",
         choices=list(methods.keys()),
         default="call_order",
     )
     opts = parser.parse_args(args)

     # If the user gave us a binary, get all the symbols in the binary by
     # snarfing 'nm' output.
     if opts.binary_path is not None:
         output = subprocess.check_output(
             ["nm", "-P", opts.binary_path], universal_newlines=True
         )
         lines = output.split("\n")
         all_symbols = [ln.split(" ", 1)[0] for ln in lines if ln.strip()]
         print("found %d symbols in binary" % len(all_symbols))
         all_symbols.sort()
     else:
         all_symbols = []
     all_symbols_set = set(all_symbols)

     # Compute the list of input files.
     input_files = []
     for dirname in opts.input:
         input_files.extend(findFilesWithExtension(dirname, "dtrace"))

     # Load all of the input files.
     print("loading from %d data files" % len(input_files))
     missing_symbols = set()
     timestamped_symbol_lists = [
         list(
             parse_dtrace_symbol_file(
                 path, all_symbols, all_symbols_set, missing_symbols, opts
             )
         )
         for path in input_files
     ]

     # Reorder each symbol list.
     symbol_lists = []
     for timestamped_symbols_list in timestamped_symbol_lists:
         timestamped_symbols_list.sort()
         symbol_lists.append([symbol for _, symbol in timestamped_symbols_list])

     # Execute the desire order file generation method.
     method = methods.get(opts.method)
     result = list(method(symbol_lists))

     # Report to the user on what percentage of symbols are present in the order
     # file.
     num_ordered_symbols = len(result)
     if all_symbols:
         print(
             "note: order file contains %d/%d symbols (%.2f%%)"
             % (
                 num_ordered_symbols,
                 len(all_symbols),
                 100.0 * num_ordered_symbols / len(all_symbols),
             ),
             file=sys.stderr,
         )

     if opts.output_unordered_symbols_path:
         ordered_symbols_set = set(result)
         with open(opts.output_unordered_symbols_path, "w") as f:
             f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))

     # Write the order file.
     with open(opts.output_path, "w") as f:
         f.write("\n".join(result))
         f.write("\n")

     return 0


 commands = {
     "clean": clean,
     "merge": merge,
     "dtrace": dtrace,
     "cc1": cc1,
     "gen-order-file": genOrderFile,
     "merge-fdata": merge_fdata,
     "perf": perf,
     "perf2bolt": perf2bolt,
 }


 def main():
     f = commands[sys.argv[1]]
     sys.exit(f(sys.argv[2:]))


 if __name__ == "__main__":
     main()
	# ===- perf-helper.py - Clang Python Bindings ------------------ python ---===#
	#
	# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	#
	# ===------------------------------------------------------------------------===#

	from __future__ import absolute_import, division, print_function

	import sys
	import os
	import subprocess
	import argparse
	import time
	import bisect
	import shlex
	import tempfile

	test_env = {"PATH": os.environ["PATH"]}


	def findFilesWithExtension(path, extension):
	filenames = []
	for root, dirs, files in os.walk(path):
	for filename in files:
	if filename.endswith(f".{extension}"):
	filenames.append(os.path.join(root, filename))
	return filenames


	def clean(args):
	if len(args) < 2:
	print(
	"Usage: %s clean <paths> <extension>\n" % __file__
	+ "\tRemoves all files with extension from <path>."
	)
	return 1
	for path in args[1:-1]:
	for filename in findFilesWithExtension(path, args[-1]):
	os.remove(filename)
	return 0


	def merge(args):
	if len(args) < 3:
	print(
	"Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
	+ "\tMerges all profraw files from path into output."
	)
	return 1
	cmd = [args[0], "merge", "-o", args[1]]
	for path in args[2:]:
	cmd.extend(findFilesWithExtension(path, "profraw"))
	subprocess.check_call(cmd)
	return 0


	def merge_fdata(args):
	if len(args) != 3:
	print(
	"Usage: %s merge-fdata <merge-fdata> <output> <path>\n" % __file__
	+ "\tMerges all fdata files from path into output."
	)
	return 1
	cmd = [args[0], "-o", args[1]]
	cmd.extend(findFilesWithExtension(args[2], "fdata"))
	subprocess.check_call(cmd)
	return 0


	def perf(args):
	parser = argparse.ArgumentParser(
	prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
	)
	parser.add_argument(
	"--lbr", action="store_true", help="Use perf with branch stacks"
	)
	parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")

	opts = parser.parse_args(args)
	cmd = opts.cmd[1:]

	perf_args = [
	"perf",
	"record",
	"--event=cycles:u",
	"--freq=max",
	"--output=%d.perf.data" % os.getpid(),
	]
	if opts.lbr:
	perf_args += ["--branch-filter=any,u"]
	perf_args.extend(cmd)

	start_time = time.time()
	subprocess.check_call(perf_args)

	elapsed = time.time() - start_time
	print("... data collection took %.4fs" % elapsed)
	return 0


	def perf2bolt(args):
	parser = argparse.ArgumentParser(
	prog="perf-helper perf2bolt",
	description="perf2bolt conversion wrapper for perf.data files",
	)
	parser.add_argument("bolt", help="Path to llvm-bolt")
	parser.add_argument("path", help="Path containing perf.data files")
	parser.add_argument("binary", help="Input binary")
	parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode")
	opts = parser.parse_args(args)

	p2b_args = [
	opts.bolt,
	opts.binary,
	"--aggregate-only",
	"--profile-format=yaml",
	]
	if not opts.lbr:
	p2b_args += ["-nl"]
	p2b_args += ["-p"]
	for filename in findFilesWithExtension(opts.path, "perf.data"):
	subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
	return 0


	def dtrace(args):
	parser = argparse.ArgumentParser(
	prog="perf-helper dtrace",
	description="dtrace wrapper for order file generation",
	)
	parser.add_argument(
	"--buffer-size",
	metavar="size",
	type=int,
	required=False,
	default=1,
	help="dtrace buffer size in MB (default 1)",
	)
	parser.add_argument(
	"--use-oneshot",
	required=False,
	action="store_true",
	help="Use dtrace's oneshot probes",
	)
	parser.add_argument(
	"--use-ustack",
	required=False,
	action="store_true",
	help="Use dtrace's ustack to print function names",
	)
	parser.add_argument(
	"--cc1",
	required=False,
	action="store_true",
	help="Execute cc1 directly (don't profile the driver)",
	)
	parser.add_argument("cmd", nargs="*", help="")

	# Use python's arg parser to handle all leading option arguments, but pass
	# everything else through to dtrace
	first_cmd = next(arg for arg in args if not arg.startswith("--"))
	last_arg_idx = args.index(first_cmd)

	opts = parser.parse_args(args[:last_arg_idx])
	cmd = args[last_arg_idx:]

	if opts.cc1:
	cmd = get_cc1_command_for_args(cmd, test_env)

	if opts.use_oneshot:
	target = "oneshot$target:::entry"
	else:
	target = "pid$target:::entry"
	predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
	log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
	if opts.use_ustack:
	action = "ustack(1);"
	else:
	action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
	dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)

	dtrace_args = []
	if not os.geteuid() == 0:
	print(
	"Script must be run as root, or you must add the following to your sudoers:"
	+ "%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace"
	)
	dtrace_args.append("sudo")

	dtrace_args.extend(
	(
	"dtrace",
	"-xevaltime=exec",
	"-xbufsize=%dm" % (opts.buffer_size),
	"-q",
	"-n",
	dtrace_script,
	"-c",
	" ".join(cmd),
	)
	)

	if sys.platform == "darwin":
	dtrace_args.append("-xmangled")

	start_time = time.time()

	with open("%d.dtrace" % os.getpid(), "w") as f:
	f.write("### Command: %s" % dtrace_args)
	subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)

	elapsed = time.time() - start_time
	print("... data collection took %.4fs" % elapsed)

	return 0


	def get_cc1_command_for_args(cmd, env):
	# Find the cc1 command used by the compiler. To do this we execute the
	# compiler with '-###' to figure out what it wants to do.
	cmd = cmd + ["-###"]
	cc_output = subprocess.check_output(
	cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True
	).strip()
	cc_commands = []
	for ln in cc_output.split("\n"):
	# Filter out known garbage.
	if (
	ln == "Using built-in specs."
	or ln.startswith("Configured with:")
	or ln.startswith("Target:")
	or ln.startswith("Thread model:")
	or ln.startswith("InstalledDir:")
	or ln.startswith("LLVM Profile Note")
	or ln.startswith(" (in-process)")
	or " version " in ln
	):
	continue
	cc_commands.append(ln)

	if len(cc_commands) != 1:
	print("Fatal error: unable to determine cc1 command: %r" % cc_output)
	exit(1)

	cc1_cmd = shlex.split(cc_commands[0])
	if not cc1_cmd:
	print("Fatal error: unable to determine cc1 command: %r" % cc_output)
	exit(1)

	return cc1_cmd


	def cc1(args):
	parser = argparse.ArgumentParser(
	prog="perf-helper cc1", description="cc1 wrapper for order file generation"
	)
	parser.add_argument("cmd", nargs="*", help="")

	# Use python's arg parser to handle all leading option arguments, but pass
	# everything else through to dtrace
	first_cmd = next(arg for arg in args if not arg.startswith("--"))
	last_arg_idx = args.index(first_cmd)

	opts = parser.parse_args(args[:last_arg_idx])
	cmd = args[last_arg_idx:]

	# clear the profile file env, so that we don't generate profdata
	# when capturing the cc1 command
	cc1_env = test_env
	cc1_env["LLVM_PROFILE_FILE"] = os.devnull
	cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)

	subprocess.check_call(cc1_cmd)
	return 0


	def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, missing_symbols, opts):
	def fix_mangling(symbol):
	if sys.platform == "darwin":
	if symbol[0] != "_" and symbol != "start":
	symbol = "_" + symbol
	return symbol

	def get_symbols_with_prefix(symbol):
	start_index = bisect.bisect_left(all_symbols, symbol)
	for s in all_symbols[start_index:]:
	if not s.startswith(symbol):
	break
	yield s

	# Extract the list of symbols from the given file, which is assumed to be
	# the output of a dtrace run logging either probefunc or ustack(1) and
	# nothing else. The dtrace -xdemangle option needs to be used.
	#
	# This is particular to OS X at the moment, because of the '_' handling.
	with open(path) as f:
	current_timestamp = None
	for ln in f:
	# Drop leading and trailing whitespace.
	ln = ln.strip()
	if not ln.startswith("dtrace-"):
	continue

	# If this is a timestamp specifier, extract it.
	if ln.startswith("dtrace-TS: "):
	_, data = ln.split(": ", 1)
	if not data.isdigit():
	print(
	"warning: unrecognized timestamp line %r, ignoring" % ln,
	file=sys.stderr,
	)
	continue
	current_timestamp = int(data)
	continue
	elif ln.startswith("dtrace-Symbol: "):

	_, ln = ln.split(": ", 1)
	if not ln:
	continue

	# If there is a '`' in the line, assume it is a ustack(1) entry in
	# the form of <modulename>`<modulefunc>, where <modulefunc> is never
	# truncated (but does need the mangling patched).
	if "`" in ln:
	yield (current_timestamp, fix_mangling(ln.split("`", 1)[1]))
	continue

	# Otherwise, assume this is a probefunc printout. DTrace on OS X
	# seems to have a bug where it prints the mangled version of symbols
	# which aren't C++ mangled. We just add a '_' to anything but start
	# which doesn't already have a '_'.
	symbol = fix_mangling(ln)

	# If we don't know all the symbols, or the symbol is one of them,
	# just return it.
	if not all_symbols_set or symbol in all_symbols_set:
	yield (current_timestamp, symbol)
	continue

	# Otherwise, we have a symbol name which isn't present in the
	# binary. We assume it is truncated, and try to extend it.

	# Get all the symbols with this prefix.
	possible_symbols = list(get_symbols_with_prefix(symbol))
	if not possible_symbols:
	continue

	# If we found too many possible symbols, ignore this as a prefix.
	if len(possible_symbols) > 100:
	print(
	"warning: ignoring symbol %r " % symbol
	+ "(no match and too many possible suffixes)",
	file=sys.stderr,
	)
	continue

	# Report that we resolved a missing symbol.
	if opts.show_missing_symbols and symbol not in missing_symbols:
	print(
	"warning: resolved missing symbol %r" % symbol, file=sys.stderr
	)
	missing_symbols.add(symbol)

	# Otherwise, treat all the possible matches as having occurred. This
	# is an over-approximation, but it should be ok in practice.
	for s in possible_symbols:
	yield (current_timestamp, s)


	def uniq(list):
	seen = set()
	for item in list:
	if item not in seen:
	yield item
	seen.add(item)


	def form_by_call_order(symbol_lists):
	# Simply strategy, just return symbols in order of occurrence, even across
	# multiple runs.
	return uniq(s for symbols in symbol_lists for s in symbols)


	def form_by_call_order_fair(symbol_lists):
	# More complicated strategy that tries to respect the call order across all
	# of the test cases, instead of giving a huge preference to the first test
	# case.

	# First, uniq all the lists.
	uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]

	# Compute the successors for each list.
	succs = {}
	for symbols in uniq_lists:
	for a, b in zip(symbols[:-1], symbols[1:]):
	succs[a] = items = succs.get(a, [])
	if b not in items:
	items.append(b)

	# Emit all the symbols, but make sure to always emit all successors from any
	# call list whenever we see a symbol.
	#
	# There isn't much science here, but this sometimes works better than the
	# more naive strategy. Then again, sometimes it doesn't so more research is
	# probably needed.
	return uniq(
	s
	for symbols in symbol_lists
	for node in symbols
	for s in ([node] + succs.get(node, []))
	)


	def form_by_frequency(symbol_lists):
	# Form the order file by just putting the most commonly occurring symbols
	# first. This assumes the data files didn't use the oneshot dtrace method.

	counts = {}
	for symbols in symbol_lists:
	for a in symbols:
	counts[a] = counts.get(a, 0) + 1

	by_count = list(counts.items())
	by_count.sort(key=lambda __n: -__n[1])
	return [s for s, n in by_count]


	def form_by_random(symbol_lists):
	# Randomize the symbols.
	merged_symbols = uniq(s for symbols in symbol_lists for s in symbols)
	random.shuffle(merged_symbols)
	return merged_symbols


	def form_by_alphabetical(symbol_lists):
	# Alphabetize the symbols.
	merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
	merged_symbols.sort()
	return merged_symbols


	methods = dict(
	(name[len("form_by_") :], value)
	for name, value in locals().items()
	if name.startswith("form_by_")
	)


	def genOrderFile(args):
	parser = argparse.ArgumentParser("%prog [options] <dtrace data file directories>]")
	parser.add_argument("input", nargs="+", help="")
	parser.add_argument(
	"--binary",
	metavar="PATH",
	type=str,
	dest="binary_path",
	help="Path to the binary being ordered (for getting all symbols)",
	default=None,
	)
	parser.add_argument(
	"--output",
	dest="output_path",
	help="path to output order file to write",
	default=None,
	required=True,
	metavar="PATH",
	)
	parser.add_argument(
	"--show-missing-symbols",
	dest="show_missing_symbols",
	help="show symbols which are 'fixed up' to a valid name (requires --binary)",
	action="store_true",
	default=None,
	)
	parser.add_argument(
	"--output-unordered-symbols",
	dest="output_unordered_symbols_path",
	help="write a list of the unordered symbols to PATH (requires --binary)",
	default=None,
	metavar="PATH",
	)
	parser.add_argument(
	"--method",
	dest="method",
	help="order file generation method to use",
	choices=list(methods.keys()),
	default="call_order",
	)
	opts = parser.parse_args(args)

	# If the user gave us a binary, get all the symbols in the binary by
	# snarfing 'nm' output.
	if opts.binary_path is not None:
	output = subprocess.check_output(
	["nm", "-P", opts.binary_path], universal_newlines=True
	)
	lines = output.split("\n")
	all_symbols = [ln.split(" ", 1)[0] for ln in lines if ln.strip()]
	print("found %d symbols in binary" % len(all_symbols))
	all_symbols.sort()
	else:
	all_symbols = []
	all_symbols_set = set(all_symbols)

	# Compute the list of input files.
	input_files = []
	for dirname in opts.input:
	input_files.extend(findFilesWithExtension(dirname, "dtrace"))

	# Load all of the input files.
	print("loading from %d data files" % len(input_files))
	missing_symbols = set()
	timestamped_symbol_lists = [
	list(
	parse_dtrace_symbol_file(
	path, all_symbols, all_symbols_set, missing_symbols, opts
	)
	)
	for path in input_files
	]

	# Reorder each symbol list.
	symbol_lists = []
	for timestamped_symbols_list in timestamped_symbol_lists:
	timestamped_symbols_list.sort()
	symbol_lists.append([symbol for _, symbol in timestamped_symbols_list])

	# Execute the desire order file generation method.
	method = methods.get(opts.method)
	result = list(method(symbol_lists))

	# Report to the user on what percentage of symbols are present in the order
	# file.
	num_ordered_symbols = len(result)
	if all_symbols:
	print(
	"note: order file contains %d/%d symbols (%.2f%%)"
	% (
	num_ordered_symbols,
	len(all_symbols),
	100.0 * num_ordered_symbols / len(all_symbols),
	),
	file=sys.stderr,
	)

	if opts.output_unordered_symbols_path:
	ordered_symbols_set = set(result)
	with open(opts.output_unordered_symbols_path, "w") as f:
	f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))

	# Write the order file.
	with open(opts.output_path, "w") as f:
	f.write("\n".join(result))
	f.write("\n")

	return 0


	commands = {
	"clean": clean,
	"merge": merge,
	"dtrace": dtrace,
	"cc1": cc1,
	"gen-order-file": genOrderFile,
	"merge-fdata": merge_fdata,
	"perf": perf,
	"perf2bolt": perf2bolt,
	}


	def main():
	f = commands[sys.argv[1]]
	sys.exit(f(sys.argv[2:]))


	if __name__ == "__main__":
	main()