blob: 4ca3ca4f730e97e0dcdd7d043fa8750feec1de32 [file] [log] [blame]
#!/usr/bin/env python
# Given a previous good compile narrow down miscompiles.
# Expects two directories named "before" and "after" each containing a set of
# assembly or object files where the "after" version is assumed to be broken.
# You also have to provide a script called "link_test". It is called with a
# list of files which should be linked together and result tested. "link_test"
# should returns with exitcode 0 if the linking and testing succeeded.
# If a response file is provided, only the object files that are listed in the
# file are inspected. In addition, the "link_test" is called with a temporary
# response file representing one iteration of bisection.
# operates by taking all files from the "before" directory and
# in each step replacing one of them with a file from the "bad" directory.
# Additionally you can perform the same steps with a single .s file. In this
# mode functions are identified by " -- Begin function FunctionName" and
# " -- End function" markers. The then takes all
# function from the file in the "before" directory and replaces one function
# with the corresponding function from the "bad" file in each step.
# Example usage to identify miscompiled files:
# 1. Create a link_test script, make it executable. Simple Example:
# clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
# 2. Run the script to figure out which files are miscompiled:
# > ./
# somefile.s: ok
# someotherfile.s: skipped: same content
# anotherfile.s: failed: './link_test' exitcode != 0
# ...
# Example usage to identify miscompiled functions inside a file:
# 3. Run the tests on a single file (assuming before/file.s and
# after/file.s exist)
# > ./ file.s
# funcname1 [0/XX]: ok
# funcname2 [1/XX]: ok
# funcname3 [2/XX]: skipped: same content
# funcname4 [3/XX]: failed: './link_test' exitcode != 0
# ...
from fnmatch import filter
from sys import stderr
import argparse
import filecmp
import os
import subprocess
import sys
import tempfile
# Specify LINKTEST via `--test`. Default value is './link_test'.
ESCAPE = "\033[%sm"
RED = ESCAPE % "31"
FAILED = RED + "failed" + NORMAL
def find(dir, file_filter=None):
files = [
for walkdir in os.walk(dir)
for file in walkdir[2]
if file_filter is not None:
files = filter(files, file_filter)
return sorted(files)
def error(message):
stderr.write("Error: %s\n" % (message,))
def warn(message):
stderr.write("Warning: %s\n" % (message,))
def info(message):
stderr.write("Info: %s\n" % (message,))
def announce_test(name):
stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
def announce_result(result):
def format_namelist(l):
result = ", ".join(l[0:3])
if len(l) > 3:
result += "... (%d total)" % len(l)
return result
def check_sanity(choices, perform_test):
announce_test("sanity check A")
all_a = {name: a_b[0] for name, a_b in choices}
res_a = perform_test(all_a)
if res_a is not True:
error("Picking all choices from A failed to pass the test")
announce_test("sanity check B (expecting failure)")
all_b = {name: a_b[1] for name, a_b in choices}
res_b = perform_test(all_b)
if res_b is not False:
error("Picking all choices from B did unexpectedly pass the test")
def check_sequentially(choices, perform_test):
known_good = set()
all_a = {name: a_b[0] for name, a_b in choices}
n = 1
for name, a_b in sorted(choices):
picks = dict(all_a)
picks[name] = a_b[1]
announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
n += 1
res = perform_test(picks)
if res is True:
return known_good
def check_bisect(choices, perform_test):
known_good = set()
if len(choices) == 0:
return known_good
choice_map = dict(choices)
all_a = {name: a_b[0] for name, a_b in choices}
def test_partition(partition, upcoming_partition):
# Compute the maximum number of checks we have to do in the worst case.
max_remaining_steps = len(partition) * 2 - 1
if upcoming_partition is not None:
max_remaining_steps += len(upcoming_partition) * 2 - 1
for x in partitions_to_split:
max_remaining_steps += (len(x) - 1) * 2
picks = dict(all_a)
for x in partition:
picks[x] = choice_map[x][1]
announce_test("checking %s [<=%d remaining]" %
(format_namelist(partition), max_remaining_steps))
res = perform_test(picks)
if res is True:
elif len(partition) > 1:
partitions_to_split.insert(0, partition)
# - We could optimize based on the knowledge that when splitting a failed
# partition into two and one side checks out okay then we can deduce that
# the other partition must be a failure.
all_choice_names = [name for name, _ in choices]
partitions_to_split = [all_choice_names]
while len(partitions_to_split) > 0:
partition = partitions_to_split.pop()
middle = len(partition) // 2
left = partition[0:middle]
right = partition[middle:]
if len(left) > 0:
test_partition(left, right)
assert len(right) > 0
test_partition(right, None)
return known_good
def extract_functions(file):
functions = []
in_function = None
for line in open(file):
marker = line.find(" -- Begin function ")
if marker != -1:
if in_function is not None:
warn("Missing end of function %s" % (in_function,))
funcname = line[marker + 19:-1]
in_function = funcname
text = line
marker = line.find(" -- End function")
if marker != -1:
text += line
functions.append((in_function, text))
in_function = None
if in_function is not None:
text += line
return functions
def replace_functions(source, dest, replacements):
out = open(dest, "w")
skip = False
in_function = None
for line in open(source):
marker = line.find(" -- Begin function ")
if marker != -1:
if in_function is not None:
warn("Missing end of function %s" % (in_function,))
funcname = line[marker + 19:-1]
in_function = funcname
replacement = replacements.get(in_function)
if replacement is not None:
skip = True
marker = line.find(" -- End function")
if marker != -1:
in_function = None
if skip:
skip = False
if not skip:
def testrun(files):
linkline = "%s %s" % (LINKTEST, " ".join(files),)
res =, shell=True)
if res != 0:
announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
return False
return True
def prepare_files(gooddir, baddir, rspfile):
files_a = []
files_b = []
if rspfile is not None:
def get_basename(name):
# remove prefix
if name.startswith(gooddir):
return name[len(gooddir):]
if name.startswith(baddir):
return name[len(baddir):]
assert False, ""
with open(rspfile, "r") as rf:
for line in
for obj in line.split():
assert not os.path.isabs(obj), "TODO: support abs path"
files_a.append(gooddir + "/" + obj)
files_b.append(baddir + "/" + obj)
get_basename = lambda name: os.path.basename(name)
files_a = find(gooddir, "*")
files_b = find(baddir, "*")
basenames_a = set(map(get_basename, files_a))
basenames_b = set(map(get_basename, files_b))
for name in files_b:
basename = get_basename(name)
if basename not in basenames_a:
warn("There is no corresponding file to '%s' in %s" %
(name, gooddir))
choices = []
skipped = []
for name in files_a:
basename = get_basename(name)
if basename not in basenames_b:
warn("There is no corresponding file to '%s' in %s" %
(name, baddir))
file_a = gooddir + "/" + basename
file_b = baddir + "/" + basename
if filecmp.cmp(file_a, file_b):
choice = (basename, (file_a, file_b))
if len(skipped) > 0:
info("Skipped (same content): %s" % format_namelist(skipped))
def perform_test(picks):
files = []
# Note that we iterate over files_a so we don't change the order
# (cannot use `picks` as it is a dictionary without order)
for x in files_a:
basename = get_basename(x)
picked = picks.get(basename)
if picked is None:
assert basename in skipped
# If response file is used, create a temporary response file for the
# picked files.
if rspfile is not None:
with tempfile.NamedTemporaryFile('w', suffix='.rsp',
delete=False) as tf:
tf.write(" ".join(files))
ret = testrun([])
return ret
return testrun(files)
return perform_test, choices
def prepare_functions(to_check, gooddir, goodfile, badfile):
files_good = find(gooddir, "*")
functions_a = extract_functions(goodfile)
functions_a_map = dict(functions_a)
functions_b_map = dict(extract_functions(badfile))
for name in functions_b_map.keys():
if name not in functions_a_map:
warn("Function '%s' missing from good file" % name)
choices = []
skipped = []
for name, candidate_a in functions_a:
candidate_b = functions_b_map.get(name)
if candidate_b is None:
warn("Function '%s' missing from bad file" % name)
if candidate_a == candidate_b:
choice = name, (candidate_a, candidate_b)
if len(skipped) > 0:
info("Skipped (same content): %s" % format_namelist(skipped))
combined_file = '/tmp/combined2.s'
files = []
found_good_file = False
for c in files_good:
if os.path.basename(c) == to_check:
found_good_file = True
assert found_good_file
def perform_test(picks):
for name, x in picks.items():
assert x == functions_a_map[name] or x == functions_b_map[name]
replace_functions(goodfile, combined_file, picks)
return testrun(files)
return perform_test, choices
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--a', dest='dir_a', default='before')
parser.add_argument('--b', dest='dir_b', default='after')
parser.add_argument('--rsp', default=None)
parser.add_argument('--test', default='./link_test')
parser.add_argument('--insane', help='Skip sanity check',
help='Check sequentially instead of bisection',
parser.add_argument('file', metavar='file', nargs='?')
config = parser.parse_args()
gooddir = config.dir_a
baddir = config.dir_b
rspfile = config.rsp
LINKTEST = config.test
# Preparation phase: Creates a dictionary mapping names to a list of two
# choices each. The bisection algorithm will pick one choice for each name
# and then run the perform_test function on it.
if config.file is not None:
goodfile = gooddir + "/" + config.file
badfile = baddir + "/" + config.file
perform_test, choices = prepare_functions(config.file, gooddir,
goodfile, badfile)
perform_test, choices = prepare_files(gooddir, baddir, rspfile)
info("%d bisection choices" % len(choices))
# "Checking whether build environment is sane ..."
if not config.insane:
if not os.access(LINKTEST, os.X_OK):
error("Expect '%s' to be present and executable" % (LINKTEST,))
check_sanity(choices, perform_test)
if config.seq:
known_good = check_sequentially(choices, perform_test)
known_good = check_bisect(choices, perform_test)
if len(known_good) != len(choices):
stderr.write("== Failing ==\n")
for name, _ in choices:
if name not in known_good:
stderr.write("%s\n" % name)
# This shouldn't happen when the sanity check works...
# Maybe link_test isn't deterministic?
stderr.write("Could not identify failing parts?!?")
if __name__ == '__main__':