|  | #!/usr/bin/env python | 
|  |  | 
|  | """A shuffle vector fuzz tester. | 
|  |  | 
|  | This is a python program to fuzz test the LLVM shufflevector instruction. It | 
|  | generates a function with a random sequnece of shufflevectors, maintaining the | 
|  | element mapping accumulated across the function. It then generates a main | 
|  | function which calls it with a different value in each element and checks that | 
|  | the result matches the expected mapping. | 
|  |  | 
|  | Take the output IR printed to stdout, compile it to an executable using whatever | 
|  | set of transforms you want to test, and run the program. If it crashes, it found | 
|  | a bug. | 
|  | """ | 
|  |  | 
|  | from __future__ import print_function | 
|  |  | 
|  | import argparse | 
|  | import itertools | 
|  | import random | 
|  | import sys | 
|  | import uuid | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | element_types = ["i8", "i16", "i32", "i64", "f32", "f64"] | 
|  | parser = argparse.ArgumentParser(description=__doc__) | 
|  | parser.add_argument( | 
|  | "-v", "--verbose", action="store_true", help="Show verbose output" | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--seed", default=str(uuid.uuid4()), help="A string used to seed the RNG" | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--max-shuffle-height", | 
|  | type=int, | 
|  | default=16, | 
|  | help="Specify a fixed height of shuffle tree to test", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--no-blends", | 
|  | dest="blends", | 
|  | action="store_false", | 
|  | help="Include blends of two input vectors", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--fixed-bit-width", | 
|  | type=int, | 
|  | choices=[128, 256], | 
|  | help="Specify a fixed bit width of vector to test", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--fixed-element-type", | 
|  | choices=element_types, | 
|  | help="Specify a fixed element type to test", | 
|  | ) | 
|  | parser.add_argument("--triple", help="Specify a triple string to include in the IR") | 
|  | args = parser.parse_args() | 
|  |  | 
|  | random.seed(args.seed) | 
|  |  | 
|  | if args.fixed_element_type is not None: | 
|  | element_types = [args.fixed_element_type] | 
|  |  | 
|  | if args.fixed_bit_width is not None: | 
|  | if args.fixed_bit_width == 128: | 
|  | width_map = {"i64": 2, "i32": 4, "i16": 8, "i8": 16, "f64": 2, "f32": 4} | 
|  | (width, element_type) = random.choice( | 
|  | [(width_map[t], t) for t in element_types] | 
|  | ) | 
|  | elif args.fixed_bit_width == 256: | 
|  | width_map = {"i64": 4, "i32": 8, "i16": 16, "i8": 32, "f64": 4, "f32": 8} | 
|  | (width, element_type) = random.choice( | 
|  | [(width_map[t], t) for t in element_types] | 
|  | ) | 
|  | else: | 
|  | sys.exit(1)  # Checked above by argument parsing. | 
|  | else: | 
|  | width = random.choice([2, 4, 8, 16, 32, 64]) | 
|  | element_type = random.choice(element_types) | 
|  |  | 
|  | element_modulus = { | 
|  | "i8": 1 << 8, | 
|  | "i16": 1 << 16, | 
|  | "i32": 1 << 32, | 
|  | "i64": 1 << 64, | 
|  | "f32": 1 << 32, | 
|  | "f64": 1 << 64, | 
|  | }[element_type] | 
|  |  | 
|  | shuffle_range = (2 * width) if args.blends else width | 
|  |  | 
|  | # Because undef (-1) saturates and is indistinguishable when testing the | 
|  | # correctness of a shuffle, we want to bias our fuzz toward having a decent | 
|  | # mixture of non-undef lanes in the end. With a deep shuffle tree, the | 
|  | # probabilies aren't good so we need to bias things. The math here is that if | 
|  | # we uniformly select between -1 and the other inputs, each element of the | 
|  | # result will have the following probability of being undef: | 
|  | # | 
|  | #   1 - (shuffle_range/(shuffle_range+1))^max_shuffle_height | 
|  | # | 
|  | # More generally, for any probability P of selecting a defined element in | 
|  | # a single shuffle, the end result is: | 
|  | # | 
|  | #   1 - P^max_shuffle_height | 
|  | # | 
|  | # The power of the shuffle height is the real problem, as we want: | 
|  | # | 
|  | #   1 - shuffle_range/(shuffle_range+1) | 
|  | # | 
|  | # So we bias the selection of undef at any given node based on the tree | 
|  | # height. Below, let 'A' be 'len(shuffle_range)', 'C' be 'max_shuffle_height', | 
|  | # and 'B' be the bias we use to compensate for | 
|  | # C '((A+1)*A^(1/C))/(A*(A+1)^(1/C))': | 
|  | # | 
|  | #   1 - (B * A)/(A + 1)^C = 1 - A/(A + 1) | 
|  | # | 
|  | # So at each node we use: | 
|  | # | 
|  | #   1 - (B * A)/(A + 1) | 
|  | # = 1 - ((A + 1) * A * A^(1/C))/(A * (A + 1) * (A + 1)^(1/C)) | 
|  | # = 1 - ((A + 1) * A^((C + 1)/C))/(A * (A + 1)^((C + 1)/C)) | 
|  | # | 
|  | # This is the formula we use to select undef lanes in the shuffle. | 
|  | A = float(shuffle_range) | 
|  | C = float(args.max_shuffle_height) | 
|  | undef_prob = 1.0 - ( | 
|  | ((A + 1.0) * pow(A, (C + 1.0) / C)) / (A * pow(A + 1.0, (C + 1.0) / C)) | 
|  | ) | 
|  |  | 
|  | shuffle_tree = [ | 
|  | [ | 
|  | [ | 
|  | -1 | 
|  | if random.random() <= undef_prob | 
|  | else random.choice(range(shuffle_range)) | 
|  | for _ in itertools.repeat(None, width) | 
|  | ] | 
|  | for _ in itertools.repeat(None, args.max_shuffle_height - i) | 
|  | ] | 
|  | for i in range(args.max_shuffle_height) | 
|  | ] | 
|  |  | 
|  | if args.verbose: | 
|  | # Print out the shuffle sequence in a compact form. | 
|  | print( | 
|  | ( | 
|  | 'Testing shuffle sequence "%s" (v%d%s):' | 
|  | % (args.seed, width, element_type) | 
|  | ), | 
|  | file=sys.stderr, | 
|  | ) | 
|  | for i, shuffles in enumerate(shuffle_tree): | 
|  | print("  tree level %d:" % (i,), file=sys.stderr) | 
|  | for j, s in enumerate(shuffles): | 
|  | print("    shuffle %d: %s" % (j, s), file=sys.stderr) | 
|  | print("", file=sys.stderr) | 
|  |  | 
|  | # Symbolically evaluate the shuffle tree. | 
|  | inputs = [ | 
|  | [int(j % element_modulus) for j in range(i * width + 1, (i + 1) * width + 1)] | 
|  | for i in range(args.max_shuffle_height + 1) | 
|  | ] | 
|  | results = inputs | 
|  | for shuffles in shuffle_tree: | 
|  | results = [ | 
|  | [ | 
|  | ( | 
|  | (results[i] if j < width else results[i + 1])[j % width] | 
|  | if j != -1 | 
|  | else -1 | 
|  | ) | 
|  | for j in s | 
|  | ] | 
|  | for i, s in enumerate(shuffles) | 
|  | ] | 
|  | if len(results) != 1: | 
|  | print("ERROR: Bad results: %s" % (results,), file=sys.stderr) | 
|  | sys.exit(1) | 
|  | result = results[0] | 
|  |  | 
|  | if args.verbose: | 
|  | print("Which transforms:", file=sys.stderr) | 
|  | print("  from: %s" % (inputs,), file=sys.stderr) | 
|  | print("  into: %s" % (result,), file=sys.stderr) | 
|  | print("", file=sys.stderr) | 
|  |  | 
|  | # The IR uses silly names for floating point types. We also need a same-size | 
|  | # integer type. | 
|  | integral_element_type = element_type | 
|  | if element_type == "f32": | 
|  | integral_element_type = "i32" | 
|  | element_type = "float" | 
|  | elif element_type == "f64": | 
|  | integral_element_type = "i64" | 
|  | element_type = "double" | 
|  |  | 
|  | # Now we need to generate IR for the shuffle function. | 
|  | subst = {"N": width, "T": element_type, "IT": integral_element_type} | 
|  | print( | 
|  | """ | 
|  | define internal fastcc <%(N)d x %(T)s> @test(%(arguments)s) noinline nounwind { | 
|  | entry:""" | 
|  | % dict( | 
|  | subst, | 
|  | arguments=", ".join( | 
|  | [ | 
|  | "<%(N)d x %(T)s> %%s.0.%(i)d" % dict(subst, i=i) | 
|  | for i in range(args.max_shuffle_height + 1) | 
|  | ] | 
|  | ), | 
|  | ) | 
|  | ) | 
|  |  | 
|  | for i, shuffles in enumerate(shuffle_tree): | 
|  | for j, s in enumerate(shuffles): | 
|  | print( | 
|  | """ | 
|  | %%s.%(next_i)d.%(j)d = shufflevector <%(N)d x %(T)s> %%s.%(i)d.%(j)d, <%(N)d x %(T)s> %%s.%(i)d.%(next_j)d, <%(N)d x i32> <%(S)s> | 
|  | """.strip( | 
|  | "\n" | 
|  | ) | 
|  | % dict( | 
|  | subst, | 
|  | i=i, | 
|  | next_i=i + 1, | 
|  | j=j, | 
|  | next_j=j + 1, | 
|  | S=", ".join( | 
|  | ["i32 " + (str(si) if si != -1 else "undef") for si in s] | 
|  | ), | 
|  | ) | 
|  | ) | 
|  |  | 
|  | print( | 
|  | """ | 
|  | ret <%(N)d x %(T)s> %%s.%(i)d.0 | 
|  | } | 
|  | """ | 
|  | % dict(subst, i=len(shuffle_tree)) | 
|  | ) | 
|  |  | 
|  | # Generate some string constants that we can use to report errors. | 
|  | for i, r in enumerate(result): | 
|  | if r != -1: | 
|  | s = ( | 
|  | "FAIL(%(seed)s): lane %(lane)d, expected %(result)d, found %%d\n\\0A" | 
|  | % {"seed": args.seed, "lane": i, "result": r} | 
|  | ) | 
|  | s += "".join(["\\00" for _ in itertools.repeat(None, 128 - len(s) + 2)]) | 
|  | print( | 
|  | """ | 
|  | @error.%(i)d = private unnamed_addr global [128 x i8] c"%(s)s" | 
|  | """.strip() | 
|  | % {"i": i, "s": s} | 
|  | ) | 
|  |  | 
|  | # Define a wrapper function which is marked 'optnone' to prevent | 
|  | # interprocedural optimizations from deleting the test. | 
|  | print( | 
|  | """ | 
|  | define internal fastcc <%(N)d x %(T)s> @test_wrapper(%(arguments)s) optnone noinline { | 
|  | %%result = call fastcc <%(N)d x %(T)s> @test(%(arguments)s) | 
|  | ret <%(N)d x %(T)s> %%result | 
|  | } | 
|  | """ | 
|  | % dict( | 
|  | subst, | 
|  | arguments=", ".join( | 
|  | [ | 
|  | "<%(N)d x %(T)s> %%s.%(i)d" % dict(subst, i=i) | 
|  | for i in range(args.max_shuffle_height + 1) | 
|  | ] | 
|  | ), | 
|  | ) | 
|  | ) | 
|  |  | 
|  | # Finally, generate a main function which will trap if any lanes are mapped | 
|  | # incorrectly (in an observable way). | 
|  | print( | 
|  | """ | 
|  | define i32 @main() { | 
|  | entry: | 
|  | ; Create a scratch space to print error messages. | 
|  | %%str = alloca [128 x i8] | 
|  | %%str.ptr = getelementptr inbounds [128 x i8], [128 x i8]* %%str, i32 0, i32 0 | 
|  |  | 
|  | ; Build the input vector and call the test function. | 
|  | %%v = call fastcc <%(N)d x %(T)s> @test_wrapper(%(inputs)s) | 
|  | ; We need to cast this back to an integer type vector to easily check the | 
|  | ; result. | 
|  | %%v.cast = bitcast <%(N)d x %(T)s> %%v to <%(N)d x %(IT)s> | 
|  | br label %%test.0 | 
|  | """ | 
|  | % dict( | 
|  | subst, | 
|  | inputs=", ".join( | 
|  | [ | 
|  | ( | 
|  | "<%(N)d x %(T)s> bitcast " | 
|  | "(<%(N)d x %(IT)s> <%(input)s> to <%(N)d x %(T)s>)" | 
|  | % dict( | 
|  | subst, | 
|  | input=", ".join( | 
|  | ["%(IT)s %(i)d" % dict(subst, i=i) for i in input] | 
|  | ), | 
|  | ) | 
|  | ) | 
|  | for input in inputs | 
|  | ] | 
|  | ), | 
|  | ) | 
|  | ) | 
|  |  | 
|  | # Test that each non-undef result lane contains the expected value. | 
|  | for i, r in enumerate(result): | 
|  | if r == -1: | 
|  | print( | 
|  | """ | 
|  | test.%(i)d: | 
|  | ; Skip this lane, its value is undef. | 
|  | br label %%test.%(next_i)d | 
|  | """ | 
|  | % dict(subst, i=i, next_i=i + 1) | 
|  | ) | 
|  | else: | 
|  | print( | 
|  | """ | 
|  | test.%(i)d: | 
|  | %%v.%(i)d = extractelement <%(N)d x %(IT)s> %%v.cast, i32 %(i)d | 
|  | %%cmp.%(i)d = icmp ne %(IT)s %%v.%(i)d, %(r)d | 
|  | br i1 %%cmp.%(i)d, label %%die.%(i)d, label %%test.%(next_i)d | 
|  |  | 
|  | die.%(i)d: | 
|  | ; Capture the actual value and print an error message. | 
|  | %%tmp.%(i)d = zext %(IT)s %%v.%(i)d to i2048 | 
|  | %%bad.%(i)d = trunc i2048 %%tmp.%(i)d to i32 | 
|  | call i32 (i8*, i8*, ...) @sprintf(i8* %%str.ptr, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @error.%(i)d, i32 0, i32 0), i32 %%bad.%(i)d) | 
|  | %%length.%(i)d = call i32 @strlen(i8* %%str.ptr) | 
|  | call i32 @write(i32 2, i8* %%str.ptr, i32 %%length.%(i)d) | 
|  | call void @llvm.trap() | 
|  | unreachable | 
|  | """ | 
|  | % dict(subst, i=i, next_i=i + 1, r=r) | 
|  | ) | 
|  |  | 
|  | print( | 
|  | """ | 
|  | test.%d: | 
|  | ret i32 0 | 
|  | } | 
|  |  | 
|  | declare i32 @strlen(i8*) | 
|  | declare i32 @write(i32, i8*, i32) | 
|  | declare i32 @sprintf(i8*, i8*, ...) | 
|  | declare void @llvm.trap() noreturn nounwind | 
|  | """ | 
|  | % (len(result),) | 
|  | ) | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | main() |