llvm/utils/abtest.py - llvm-project - Git at Google

 #!/usr/bin/env python
 #
 # Given a previous good compile narrow down miscompiles.
 # Expects two directories named "before" and "after" each containing a set of
 # assembly or object files where the "after" version is assumed to be broken.
 # You also have to provide a script called "link_test". It is called with a
 # list of files which should be linked together and result tested. "link_test"
 # should returns with exitcode 0 if the linking and testing succeeded.
 #
 # If a response file is provided, only the object files that are listed in the
 # file are inspected. In addition, the "link_test" is called with a temporary
 # response file representing one iteration of bisection.
 #
 # abtest.py operates by taking all files from the "before" directory and
 # in each step replacing one of them with a file from the "bad" directory.
 #
 # Additionally you can perform the same steps with a single .s file. In this
 # mode functions are identified by " -- Begin function FunctionName" and
 # " -- End function" markers. The abtest.py then takes all
 # function from the file in the "before" directory and replaces one function
 # with the corresponding function from the "bad" file in each step.
 #
 # Example usage to identify miscompiled files:
 #    1. Create a link_test script, make it executable. Simple Example:
 #          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
 #    2. Run the script to figure out which files are miscompiled:
 #       > ./abtest.py
 #       somefile.s: ok
 #       someotherfile.s: skipped: same content
 #       anotherfile.s: failed: './link_test' exitcode != 0
 #       ...
 # Example usage to identify miscompiled functions inside a file:
 #    3. Run the tests on a single file (assuming before/file.s and
 #       after/file.s exist)
 #       > ./abtest.py file.s
 #       funcname1 [0/XX]: ok
 #       funcname2 [1/XX]: ok
 #       funcname3 [2/XX]: skipped: same content
 #       funcname4 [3/XX]: failed: './link_test' exitcode != 0
 #       ...
 from fnmatch import filter
 from sys import stderr
 import argparse
 import filecmp
 import os
 import subprocess
 import sys
 import tempfile

 # Specify LINKTEST via `--test`. Default value is './link_test'.
 LINKTEST = ""
 ESCAPE = "\033[%sm"
 BOLD = ESCAPE % "1"
 RED = ESCAPE % "31"
 NORMAL = ESCAPE % "0"
 FAILED = RED + "failed" + NORMAL


 def find(dir, file_filter=None):
     files = [
         walkdir[0]+"/"+file
         for walkdir in os.walk(dir)
         for file in walkdir[2]
     ]
     if file_filter is not None:
         files = filter(files, file_filter)
     return sorted(files)


 def error(message):
     stderr.write("Error: %s\n" % (message,))


 def warn(message):
     stderr.write("Warning: %s\n" % (message,))


 def info(message):
     stderr.write("Info: %s\n" % (message,))


 def announce_test(name):
     stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
     stderr.flush()


 def announce_result(result):
     stderr.write(result)
     stderr.write("\n")
     stderr.flush()


 def format_namelist(l):
     result = ", ".join(l[0:3])
     if len(l) > 3:
         result += "... (%d total)" % len(l)
     return result


 def check_sanity(choices, perform_test):
     announce_test("sanity check A")
     all_a = {name: a_b[0] for name, a_b in choices}
     res_a = perform_test(all_a)
     if res_a is not True:
         error("Picking all choices from A failed to pass the test")
         sys.exit(1)

     announce_test("sanity check B (expecting failure)")
     all_b = {name: a_b[1] for name, a_b in choices}
     res_b = perform_test(all_b)
     if res_b is not False:
         error("Picking all choices from B did unexpectedly pass the test")
         sys.exit(1)


 def check_sequentially(choices, perform_test):
     known_good = set()
     all_a = {name: a_b[0] for name, a_b in choices}
     n = 1
     for name, a_b in sorted(choices):
         picks = dict(all_a)
         picks[name] = a_b[1]
         announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
         n += 1
         res = perform_test(picks)
         if res is True:
             known_good.add(name)
     return known_good


 def check_bisect(choices, perform_test):
     known_good = set()
     if len(choices) == 0:
         return known_good

     choice_map = dict(choices)
     all_a = {name: a_b[0] for name, a_b in choices}

     def test_partition(partition, upcoming_partition):
         # Compute the maximum number of checks we have to do in the worst case.
         max_remaining_steps = len(partition) * 2 - 1
         if upcoming_partition is not None:
             max_remaining_steps += len(upcoming_partition) * 2 - 1
         for x in partitions_to_split:
             max_remaining_steps += (len(x) - 1) * 2

         picks = dict(all_a)
         for x in partition:
             picks[x] = choice_map[x][1]
         announce_test("checking %s [<=%d remaining]" %
                       (format_namelist(partition), max_remaining_steps))
         res = perform_test(picks)
         if res is True:
             known_good.update(partition)
         elif len(partition) > 1:
             partitions_to_split.insert(0, partition)

     # TODO:
     # - We could optimize based on the knowledge that when splitting a failed
     #   partition into two and one side checks out okay then we can deduce that
     #   the other partition must be a failure.
     all_choice_names = [name for name, _ in choices]
     partitions_to_split = [all_choice_names]
     while len(partitions_to_split) > 0:
         partition = partitions_to_split.pop()

         middle = len(partition) // 2
         left = partition[0:middle]
         right = partition[middle:]

         if len(left) > 0:
             test_partition(left, right)
         assert len(right) > 0
         test_partition(right, None)

     return known_good


 def extract_functions(file):
     functions = []
     in_function = None
     for line in open(file):
         marker = line.find(" -- Begin function ")
         if marker != -1:
             if in_function is not None:
                 warn("Missing end of function %s" % (in_function,))
             funcname = line[marker + 19:-1]
             in_function = funcname
             text = line
             continue

         marker = line.find(" -- End function")
         if marker != -1:
             text += line
             functions.append((in_function, text))
             in_function = None
             continue

         if in_function is not None:
             text += line
     return functions


 def replace_functions(source, dest, replacements):
     out = open(dest, "w")
     skip = False
     in_function = None
     for line in open(source):
         marker = line.find(" -- Begin function ")
         if marker != -1:
             if in_function is not None:
                 warn("Missing end of function %s" % (in_function,))
             funcname = line[marker + 19:-1]
             in_function = funcname
             replacement = replacements.get(in_function)
             if replacement is not None:
                 out.write(replacement)
                 skip = True
         else:
             marker = line.find(" -- End function")
             if marker != -1:
                 in_function = None
                 if skip:
                     skip = False
                     continue

         if not skip:
             out.write(line)


 def testrun(files):
     linkline = "%s %s" % (LINKTEST, " ".join(files),)
     res = subprocess.call(linkline, shell=True)
     if res != 0:
         announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
         return False
     else:
         announce_result("ok")
         return True


 def prepare_files(gooddir, baddir, rspfile):
     files_a = []
     files_b = []

     if rspfile is not None:
         def get_basename(name):
             # remove prefix
             if name.startswith(gooddir):
                 return name[len(gooddir):]
             if name.startswith(baddir):
                 return name[len(baddir):]
             assert False, ""

         with open(rspfile, "r") as rf:
             for line in rf.read().splitlines():
                 for obj in line.split():
                     assert not os.path.isabs(obj), "TODO: support abs path"
                     files_a.append(gooddir + "/" + obj)
                     files_b.append(baddir + "/" + obj)
     else:
         get_basename = lambda name: os.path.basename(name)
         files_a = find(gooddir, "*")
         files_b = find(baddir, "*")

     basenames_a = set(map(get_basename, files_a))
     basenames_b = set(map(get_basename, files_b))

     for name in files_b:
         basename = get_basename(name)
         if basename not in basenames_a:
             warn("There is no corresponding file to '%s' in %s" %
                  (name, gooddir))
     choices = []
     skipped = []
     for name in files_a:
         basename = get_basename(name)
         if basename not in basenames_b:
             warn("There is no corresponding file to '%s' in %s" %
                  (name, baddir))

         file_a = gooddir + "/" + basename
         file_b = baddir + "/" + basename
         if filecmp.cmp(file_a, file_b):
             skipped.append(basename)
             continue

         choice = (basename, (file_a, file_b))
         choices.append(choice)

     if len(skipped) > 0:
         info("Skipped (same content): %s" % format_namelist(skipped))

     def perform_test(picks):
         files = []
         # Note that we iterate over files_a so we don't change the order
         # (cannot use `picks` as it is a dictionary without order)
         for x in files_a:
             basename = get_basename(x)
             picked = picks.get(basename)
             if picked is None:
                 assert basename in skipped
                 files.append(x)
             else:
                 files.append(picked)

         # If response file is used, create a temporary response file for the
         # picked files.
         if rspfile is not None:
             with tempfile.NamedTemporaryFile('w', suffix='.rsp',
                                              delete=False) as tf:
                 tf.write(" ".join(files))
                 tf.flush()
             ret = testrun([tf.name])
             os.remove(tf.name)
             return ret

         return testrun(files)

     return perform_test, choices


 def prepare_functions(to_check, gooddir, goodfile, badfile):
     files_good = find(gooddir, "*")

     functions_a = extract_functions(goodfile)
     functions_a_map = dict(functions_a)
     functions_b_map = dict(extract_functions(badfile))

     for name in functions_b_map.keys():
         if name not in functions_a_map:
             warn("Function '%s' missing from good file" % name)
     choices = []
     skipped = []
     for name, candidate_a in functions_a:
         candidate_b = functions_b_map.get(name)
         if candidate_b is None:
             warn("Function '%s' missing from bad file" % name)
             continue
         if candidate_a == candidate_b:
             skipped.append(name)
             continue
         choice = name, (candidate_a, candidate_b)
         choices.append(choice)

     if len(skipped) > 0:
         info("Skipped (same content): %s" % format_namelist(skipped))

     combined_file = '/tmp/combined2.s'
     files = []
     found_good_file = False
     for c in files_good:
         if os.path.basename(c) == to_check:
             found_good_file = True
             files.append(combined_file)
             continue
         files.append(c)
     assert found_good_file

     def perform_test(picks):
         for name, x in picks.items():
             assert x == functions_a_map[name] or x == functions_b_map[name]
         replace_functions(goodfile, combined_file, picks)
         return testrun(files)
     return perform_test, choices


 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--a', dest='dir_a', default='before')
     parser.add_argument('--b', dest='dir_b', default='after')
     parser.add_argument('--rsp', default=None)
     parser.add_argument('--test', default='./link_test')
     parser.add_argument('--insane', help='Skip sanity check',
                         action='store_true')
     parser.add_argument('--seq',
                         help='Check sequentially instead of bisection',
                         action='store_true')
     parser.add_argument('file', metavar='file', nargs='?')
     config = parser.parse_args()

     gooddir = config.dir_a
     baddir = config.dir_b
     rspfile = config.rsp
     global LINKTEST
     LINKTEST = config.test

     # Preparation phase: Creates a dictionary mapping names to a list of two
     # choices each. The bisection algorithm will pick one choice for each name
     # and then run the perform_test function on it.
     if config.file is not None:
         goodfile = gooddir + "/" + config.file
         badfile = baddir + "/" + config.file
         perform_test, choices = prepare_functions(config.file, gooddir,
                                                   goodfile, badfile)
     else:
         perform_test, choices = prepare_files(gooddir, baddir, rspfile)

     info("%d bisection choices" % len(choices))

     # "Checking whether build environment is sane ..."
     if not config.insane:
         if not os.access(LINKTEST, os.X_OK):
             error("Expect '%s' to be present and executable" % (LINKTEST,))
             exit(1)

         check_sanity(choices, perform_test)

     if config.seq:
         known_good = check_sequentially(choices, perform_test)
     else:
         known_good = check_bisect(choices, perform_test)

     stderr.write("")
     if len(known_good) != len(choices):
         stderr.write("== Failing ==\n")
         for name, _ in choices:
             if name not in known_good:
                 stderr.write("%s\n" % name)
     else:
         # This shouldn't happen when the sanity check works...
         # Maybe link_test isn't deterministic?
         stderr.write("Could not identify failing parts?!?")


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python
	#
	# Given a previous good compile narrow down miscompiles.
	# Expects two directories named "before" and "after" each containing a set of
	# assembly or object files where the "after" version is assumed to be broken.
	# You also have to provide a script called "link_test". It is called with a
	# list of files which should be linked together and result tested. "link_test"
	# should returns with exitcode 0 if the linking and testing succeeded.
	#
	# If a response file is provided, only the object files that are listed in the
	# file are inspected. In addition, the "link_test" is called with a temporary
	# response file representing one iteration of bisection.
	#
	# abtest.py operates by taking all files from the "before" directory and
	# in each step replacing one of them with a file from the "bad" directory.
	#
	# Additionally you can perform the same steps with a single .s file. In this
	# mode functions are identified by " -- Begin function FunctionName" and
	# " -- End function" markers. The abtest.py then takes all
	# function from the file in the "before" directory and replaces one function
	# with the corresponding function from the "bad" file in each step.
	#
	# Example usage to identify miscompiled files:
	# 1. Create a link_test script, make it executable. Simple Example:
	# clang "$@" -o /tmp/test && /tmp/test \|\| echo "PROBLEM"
	# 2. Run the script to figure out which files are miscompiled:
	# > ./abtest.py
	# somefile.s: ok
	# someotherfile.s: skipped: same content
	# anotherfile.s: failed: './link_test' exitcode != 0
	# ...
	# Example usage to identify miscompiled functions inside a file:
	# 3. Run the tests on a single file (assuming before/file.s and
	# after/file.s exist)
	# > ./abtest.py file.s
	# funcname1 [0/XX]: ok
	# funcname2 [1/XX]: ok
	# funcname3 [2/XX]: skipped: same content
	# funcname4 [3/XX]: failed: './link_test' exitcode != 0
	# ...
	from fnmatch import filter
	from sys import stderr
	import argparse
	import filecmp
	import os
	import subprocess
	import sys
	import tempfile

	# Specify LINKTEST via `--test`. Default value is './link_test'.
	LINKTEST = ""
	ESCAPE = "\033[%sm"
	BOLD = ESCAPE % "1"
	RED = ESCAPE % "31"
	NORMAL = ESCAPE % "0"
	FAILED = RED + "failed" + NORMAL


	def find(dir, file_filter=None):
	files = [
	walkdir[0]+"/"+file
	for walkdir in os.walk(dir)
	for file in walkdir[2]
	]
	if file_filter is not None:
	files = filter(files, file_filter)
	return sorted(files)


	def error(message):
	stderr.write("Error: %s\n" % (message,))


	def warn(message):
	stderr.write("Warning: %s\n" % (message,))


	def info(message):
	stderr.write("Info: %s\n" % (message,))


	def announce_test(name):
	stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
	stderr.flush()


	def announce_result(result):
	stderr.write(result)
	stderr.write("\n")
	stderr.flush()


	def format_namelist(l):
	result = ", ".join(l[0:3])
	if len(l) > 3:
	result += "... (%d total)" % len(l)
	return result


	def check_sanity(choices, perform_test):
	announce_test("sanity check A")
	all_a = {name: a_b[0] for name, a_b in choices}
	res_a = perform_test(all_a)
	if res_a is not True:
	error("Picking all choices from A failed to pass the test")
	sys.exit(1)

	announce_test("sanity check B (expecting failure)")
	all_b = {name: a_b[1] for name, a_b in choices}
	res_b = perform_test(all_b)
	if res_b is not False:
	error("Picking all choices from B did unexpectedly pass the test")
	sys.exit(1)


	def check_sequentially(choices, perform_test):
	known_good = set()
	all_a = {name: a_b[0] for name, a_b in choices}
	n = 1
	for name, a_b in sorted(choices):
	picks = dict(all_a)
	picks[name] = a_b[1]
	announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
	n += 1
	res = perform_test(picks)
	if res is True:
	known_good.add(name)
	return known_good


	def check_bisect(choices, perform_test):
	known_good = set()
	if len(choices) == 0:
	return known_good

	choice_map = dict(choices)
	all_a = {name: a_b[0] for name, a_b in choices}

	def test_partition(partition, upcoming_partition):
	# Compute the maximum number of checks we have to do in the worst case.
	max_remaining_steps = len(partition) * 2 - 1
	if upcoming_partition is not None:
	max_remaining_steps += len(upcoming_partition) * 2 - 1
	for x in partitions_to_split:
	max_remaining_steps += (len(x) - 1) * 2

	picks = dict(all_a)
	for x in partition:
	picks[x] = choice_map[x][1]
	announce_test("checking %s [<=%d remaining]" %
	(format_namelist(partition), max_remaining_steps))
	res = perform_test(picks)
	if res is True:
	known_good.update(partition)
	elif len(partition) > 1:
	partitions_to_split.insert(0, partition)

	# TODO:
	# - We could optimize based on the knowledge that when splitting a failed
	# partition into two and one side checks out okay then we can deduce that
	# the other partition must be a failure.
	all_choice_names = [name for name, _ in choices]
	partitions_to_split = [all_choice_names]
	while len(partitions_to_split) > 0:
	partition = partitions_to_split.pop()

	middle = len(partition) // 2
	left = partition[0:middle]
	right = partition[middle:]

	if len(left) > 0:
	test_partition(left, right)
	assert len(right) > 0
	test_partition(right, None)

	return known_good


	def extract_functions(file):
	functions = []
	in_function = None
	for line in open(file):
	marker = line.find(" -- Begin function ")
	if marker != -1:
	if in_function is not None:
	warn("Missing end of function %s" % (in_function,))
	funcname = line[marker + 19:-1]
	in_function = funcname
	text = line
	continue

	marker = line.find(" -- End function")
	if marker != -1:
	text += line
	functions.append((in_function, text))
	in_function = None
	continue

	if in_function is not None:
	text += line
	return functions


	def replace_functions(source, dest, replacements):
	out = open(dest, "w")
	skip = False
	in_function = None
	for line in open(source):
	marker = line.find(" -- Begin function ")
	if marker != -1:
	if in_function is not None:
	warn("Missing end of function %s" % (in_function,))
	funcname = line[marker + 19:-1]
	in_function = funcname
	replacement = replacements.get(in_function)
	if replacement is not None:
	out.write(replacement)
	skip = True
	else:
	marker = line.find(" -- End function")
	if marker != -1:
	in_function = None
	if skip:
	skip = False
	continue

	if not skip:
	out.write(line)


	def testrun(files):
	linkline = "%s %s" % (LINKTEST, " ".join(files),)
	res = subprocess.call(linkline, shell=True)
	if res != 0:
	announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
	return False
	else:
	announce_result("ok")
	return True


	def prepare_files(gooddir, baddir, rspfile):
	files_a = []
	files_b = []

	if rspfile is not None:
	def get_basename(name):
	# remove prefix
	if name.startswith(gooddir):
	return name[len(gooddir):]
	if name.startswith(baddir):
	return name[len(baddir):]
	assert False, ""

	with open(rspfile, "r") as rf:
	for line in rf.read().splitlines():
	for obj in line.split():
	assert not os.path.isabs(obj), "TODO: support abs path"
	files_a.append(gooddir + "/" + obj)
	files_b.append(baddir + "/" + obj)
	else:
	get_basename = lambda name: os.path.basename(name)
	files_a = find(gooddir, "*")
	files_b = find(baddir, "*")

	basenames_a = set(map(get_basename, files_a))
	basenames_b = set(map(get_basename, files_b))

	for name in files_b:
	basename = get_basename(name)
	if basename not in basenames_a:
	warn("There is no corresponding file to '%s' in %s" %
	(name, gooddir))
	choices = []
	skipped = []
	for name in files_a:
	basename = get_basename(name)
	if basename not in basenames_b:
	warn("There is no corresponding file to '%s' in %s" %
	(name, baddir))

	file_a = gooddir + "/" + basename
	file_b = baddir + "/" + basename
	if filecmp.cmp(file_a, file_b):
	skipped.append(basename)
	continue

	choice = (basename, (file_a, file_b))
	choices.append(choice)

	if len(skipped) > 0:
	info("Skipped (same content): %s" % format_namelist(skipped))

	def perform_test(picks):
	files = []
	# Note that we iterate over files_a so we don't change the order
	# (cannot use `picks` as it is a dictionary without order)
	for x in files_a:
	basename = get_basename(x)
	picked = picks.get(basename)
	if picked is None:
	assert basename in skipped
	files.append(x)
	else:
	files.append(picked)

	# If response file is used, create a temporary response file for the
	# picked files.
	if rspfile is not None:
	with tempfile.NamedTemporaryFile('w', suffix='.rsp',
	delete=False) as tf:
	tf.write(" ".join(files))
	tf.flush()
	ret = testrun([tf.name])
	os.remove(tf.name)
	return ret

	return testrun(files)

	return perform_test, choices


	def prepare_functions(to_check, gooddir, goodfile, badfile):
	files_good = find(gooddir, "*")

	functions_a = extract_functions(goodfile)
	functions_a_map = dict(functions_a)
	functions_b_map = dict(extract_functions(badfile))

	for name in functions_b_map.keys():
	if name not in functions_a_map:
	warn("Function '%s' missing from good file" % name)
	choices = []
	skipped = []
	for name, candidate_a in functions_a:
	candidate_b = functions_b_map.get(name)
	if candidate_b is None:
	warn("Function '%s' missing from bad file" % name)
	continue
	if candidate_a == candidate_b:
	skipped.append(name)
	continue
	choice = name, (candidate_a, candidate_b)
	choices.append(choice)

	if len(skipped) > 0:
	info("Skipped (same content): %s" % format_namelist(skipped))

	combined_file = '/tmp/combined2.s'
	files = []
	found_good_file = False
	for c in files_good:
	if os.path.basename(c) == to_check:
	found_good_file = True
	files.append(combined_file)
	continue
	files.append(c)
	assert found_good_file

	def perform_test(picks):
	for name, x in picks.items():
	assert x == functions_a_map[name] or x == functions_b_map[name]
	replace_functions(goodfile, combined_file, picks)
	return testrun(files)
	return perform_test, choices


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--a', dest='dir_a', default='before')
	parser.add_argument('--b', dest='dir_b', default='after')
	parser.add_argument('--rsp', default=None)
	parser.add_argument('--test', default='./link_test')
	parser.add_argument('--insane', help='Skip sanity check',
	action='store_true')
	parser.add_argument('--seq',
	help='Check sequentially instead of bisection',
	action='store_true')
	parser.add_argument('file', metavar='file', nargs='?')
	config = parser.parse_args()

	gooddir = config.dir_a
	baddir = config.dir_b
	rspfile = config.rsp
	global LINKTEST
	LINKTEST = config.test

	# Preparation phase: Creates a dictionary mapping names to a list of two
	# choices each. The bisection algorithm will pick one choice for each name
	# and then run the perform_test function on it.
	if config.file is not None:
	goodfile = gooddir + "/" + config.file
	badfile = baddir + "/" + config.file
	perform_test, choices = prepare_functions(config.file, gooddir,
	goodfile, badfile)
	else:
	perform_test, choices = prepare_files(gooddir, baddir, rspfile)

	info("%d bisection choices" % len(choices))

	# "Checking whether build environment is sane ..."
	if not config.insane:
	if not os.access(LINKTEST, os.X_OK):
	error("Expect '%s' to be present and executable" % (LINKTEST,))
	exit(1)

	check_sanity(choices, perform_test)

	if config.seq:
	known_good = check_sequentially(choices, perform_test)
	else:
	known_good = check_bisect(choices, perform_test)

	stderr.write("")
	if len(known_good) != len(choices):
	stderr.write("== Failing ==\n")
	for name, _ in choices:
	if name not in known_good:
	stderr.write("%s\n" % name)
	else:
	# This shouldn't happen when the sanity check works...
	# Maybe link_test isn't deterministic?
	stderr.write("Could not identify failing parts?!?")


	if __name__ == '__main__':
	main()