diff --git a/utils/shuffle_fuzz.py b/utils/shuffle_fuzz.py index a9330a2c50c..1f1a0478f2b 100755 --- a/utils/shuffle_fuzz.py +++ b/utils/shuffle_fuzz.py @@ -24,8 +24,10 @@ def main(): help='A string used to seed the RNG') parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output') - parser.add_argument('--fixed-num-shuffles', type=int, - help='Specify a fixed number of shuffles to test') + parser.add_argument('--max-shuffle-height', type=int, default=16, + help='Specify a fixed height of shuffle tree to test') + parser.add_argument('--no-blends', dest='blends', action='store_false', + help='Include blends of two input vectors') parser.add_argument('--fixed-bit-width', type=int, choices=[128, 256], help='Specify a fixed bit width of vector to test') parser.add_argument('--triple', @@ -49,33 +51,46 @@ def main(): width = random.choice([2, 4, 8, 16, 32, 64]) element_type = random.choice(['i8', 'i16', 'i32', 'i64', 'f32', 'f64']) - # FIXME: Support blends. - shuffle_indices = [-1] + range(width) + element_modulus = { + 'i8': 1 << 8, 'i16': 1 << 16, 'i32': 1 << 32, 'i64': 1 << 64, + 'f32': 1 << 32, 'f64': 1 << 64}[element_type] - if args.fixed_num_shuffles is not None: - num_shuffles = args.fixed_num_shuffles - else: - num_shuffles = random.randint(0, 16) + shuffle_range = (2 * width) if args.blends else width + shuffle_indices = [-1] + range(shuffle_range) - shuffles = [[random.choice(shuffle_indices) - for _ in itertools.repeat(None, width)] - for _ in itertools.repeat(None, num_shuffles)] + shuffle_tree = [[[random.choice(shuffle_indices) + for _ in itertools.repeat(None, width)] + for _ in itertools.repeat(None, args.max_shuffle_height - i)] + for i in xrange(args.max_shuffle_height)] if args.verbose: # Print out the shuffle sequence in a compact form. - print >>sys.stderr, 'Testing shuffle sequence "%s":' % (args.seed,) - for s in shuffles: - print >>sys.stderr, ' v%d%s: %s' % (width, element_type, s) + print >>sys.stderr, ('Testing shuffle sequence "%s" (v%d%s):' % + (args.seed, width, element_type)) + for i, shuffles in enumerate(shuffle_tree): + print >>sys.stderr, ' tree level %d:' % (i,) + for j, s in enumerate(shuffles): + print >>sys.stderr, ' shuffle %d: %s' % (j, s) print >>sys.stderr, '' - # Compute a round-trip of the shuffle. - result = range(1, width + 1) - for s in shuffles: - result = [result[i] if i != -1 else -1 for i in s] + # Symbolically evaluate the shuffle tree. + inputs = [[int(j % element_modulus) + for j in xrange(i * width + 1, (i + 1) * width + 1)] + for i in xrange(args.max_shuffle_height + 1)] + results = inputs + for shuffles in shuffle_tree: + results = [[((results[i] if j < width else results[i + 1])[j % width] + if j != -1 else -1) + for j in s] + for i, s in enumerate(shuffles)] + if len(results) != 1: + print >>sys.stderr, 'ERROR: Bad results: %s' % (results,) + sys.exit(1) + result = results[0] if args.verbose: print >>sys.stderr, 'Which transforms:' - print >>sys.stderr, ' from: %s' % (range(1, width + 1),) + print >>sys.stderr, ' from: %s' % (inputs,) print >>sys.stderr, ' into: %s' % (result,) print >>sys.stderr, '' @@ -92,22 +107,24 @@ def main(): # Now we need to generate IR for the shuffle function. subst = {'N': width, 'T': element_type, 'IT': integral_element_type} print """ -define internal <%(N)d x %(T)s> @test(<%(N)d x %(T)s> %%v) noinline nounwind { -entry:""" % subst +define internal fastcc <%(N)d x %(T)s> @test(%(arguments)s) noinline nounwind { +entry:""" % dict(subst, + arguments=', '.join( + ['<%(N)d x %(T)s> %%s.0.%(i)d' % dict(subst, i=i) + for i in xrange(args.max_shuffle_height + 1)])) - for i, s in enumerate(shuffles): + for i, shuffles in enumerate(shuffle_tree): + for j, s in enumerate(shuffles): print """ - %%s%(i)d = shufflevector <%(N)d x %(T)s> %(I)s, <%(N)d x %(T)s> undef, <%(N)d x i32> <%(S)s> -""".strip() % dict(subst, - i=i, - I=('%%s%d' % (i - 1)) if i != 0 else '%v', - S=', '.join(['i32 %s' % (str(si) if si != -1 else 'undef',) - for si in s])) + %%s.%(next_i)d.%(j)d = shufflevector <%(N)d x %(T)s> %%s.%(i)d.%(j)d, <%(N)d x %(T)s> %%s.%(i)d.%(next_j)d, <%(N)d x i32> <%(S)s> +""".strip('\n') % dict(subst, i=i, next_i=i + 1, j=j, next_j=j + 1, + S=', '.join(['i32 ' + (str(si) if si != -1 else 'undef') + for si in s])) print """ - ret <%(N)d x %(T)s> %%s%(i)d + ret <%(N)d x %(T)s> %%s.%(i)d.0 } -""" % dict(subst, i=len(shuffles) - 1) +""" % dict(subst, i=len(shuffle_tree)) # Generate some string constants that we can use to report errors. for i, r in enumerate(result): @@ -119,28 +136,39 @@ entry:""" % subst @error.%(i)d = private unnamed_addr global [128 x i8] c"%(s)s" """.strip() % {'i': i, 's': s} + # Define a wrapper function which is marked 'optnone' to prevent + # interprocedural optimizations from deleting the test. + print """ +define internal fastcc <%(N)d x %(T)s> @test_wrapper(%(arguments)s) optnone noinline { + %%result = call fastcc <%(N)d x %(T)s> @test(%(arguments)s) + ret <%(N)d x %(T)s> %%result +} +""" % dict(subst, + arguments=', '.join(['<%(N)d x %(T)s> %%s.%(i)d' % dict(subst, i=i) + for i in xrange(args.max_shuffle_height + 1)])) + # Finally, generate a main function which will trap if any lanes are mapped # incorrectly (in an observable way). print """ -define i32 @main() optnone noinline { +define i32 @main() { entry: ; Create a scratch space to print error messages. %%str = alloca [128 x i8] %%str.ptr = getelementptr inbounds [128 x i8]* %%str, i32 0, i32 0 ; Build the input vector and call the test function. - %%input = bitcast <%(N)d x %(IT)s> <%(input)s> to <%(N)d x %(T)s> - %%v = call <%(N)d x %(T)s> @test(<%(N)d x %(T)s> %%input) + %%v = call fastcc <%(N)d x %(T)s> @test_wrapper(%(inputs)s) ; We need to cast this back to an integer type vector to easily check the ; result. %%v.cast = bitcast <%(N)d x %(T)s> %%v to <%(N)d x %(IT)s> br label %%test.0 """ % dict(subst, - input=', '.join(['%(IT)s %(i)s' % dict(subst, i=i) - for i in xrange(1, width + 1)]), - result=', '.join(['%(IT)s %(i)s' % dict(subst, - i=i if i != -1 else 'undef') - for i in result])) + inputs=', '.join( + [('<%(N)d x %(T)s> bitcast ' + '(<%(N)d x %(IT)s> <%(input)s> to <%(N)d x %(T)s>)' % + dict(subst, input=', '.join(['%(IT)s %(i)d' % dict(subst, i=i) + for i in input]))) + for input in inputs])) # Test that each non-undef result lane contains the expected value. for i, r in enumerate(result):