1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-01 16:33:37 +01:00
llvm-mirror/test/Transforms/InstCombine/load-cmp.ll

113 lines
2.8 KiB
LLVM
Raw Normal View History

Teach instcombine to fold compares of loads from constant arrays with variable indices into a comparison of the index with a constant. The most common occurrence of this that I see by far is stuff like: if ("foobar"[i] == '\0') ... which we compile into: if (i == 6), saving a load and materialization of the global address. This also exposes loop trip count information to later passes in many cases. This triggers hundreds of times in xalancbmk, which is where I first noticed it, but it also triggers in many other apps. Here are a few interesting ones from various apps: @must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2] %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1] %17 = load ... %18 = icmp eq i8* %17, null ; <i1> [#uses=1] -> icmp eq i64 %indvar.i, 7 @yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2] %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1] %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1] load ... %64 = icmp eq i8 %58, 4 ; <i1> [#uses=1] -> icmp eq i64 %.pn, 35 ; <i1> [#uses=0] @gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767] %scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1] %425 = load %scevgep.i %426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0] -> false llvm-svn: 92411
2010-01-02 09:12:04 +01:00
; RUN: opt < %s -instcombine -S | FileCheck %s
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
i16 73, i16 82, i16 69, i16 68, i16 0]
Teach the table lookup optimization to generate range compares when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
2010-01-02 22:50:18 +01:00
@GD = internal constant [6 x double]
[double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
Teach instcombine to fold compares of loads from constant arrays with variable indices into a comparison of the index with a constant. The most common occurrence of this that I see by far is stuff like: if ("foobar"[i] == '\0') ... which we compile into: if (i == 6), saving a load and materialization of the global address. This also exposes loop trip count information to later passes in many cases. This triggers hundreds of times in xalancbmk, which is where I first noticed it, but it also triggers in many other apps. Here are a few interesting ones from various apps: @must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2] %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1] %17 = load ... %18 = icmp eq i8* %17, null ; <i1> [#uses=1] -> icmp eq i64 %indvar.i, 7 @yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2] %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1] %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1] load ... %64 = icmp eq i8 %58, 4 ; <i1> [#uses=1] -> icmp eq i64 %.pn, 35 ; <i1> [#uses=0] @gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767] %scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1] %425 = load %scevgep.i %426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0] -> false llvm-svn: 92411
2010-01-02 09:12:04 +01:00
define i1 @test1(i32 %X) {
%P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
Teach instcombine to fold compares of loads from constant arrays with variable indices into a comparison of the index with a constant. The most common occurrence of this that I see by far is stuff like: if ("foobar"[i] == '\0') ... which we compile into: if (i == 6), saving a load and materialization of the global address. This also exposes loop trip count information to later passes in many cases. This triggers hundreds of times in xalancbmk, which is where I first noticed it, but it also triggers in many other apps. Here are a few interesting ones from various apps: @must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2] %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1] %17 = load ... %18 = icmp eq i8* %17, null ; <i1> [#uses=1] -> icmp eq i64 %indvar.i, 7 @yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2] %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1] %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1] load ... %64 = icmp eq i8 %58, 4 ; <i1> [#uses=1] -> icmp eq i64 %.pn, 35 ; <i1> [#uses=0] @gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767] %scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1] %425 = load %scevgep.i %426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0] -> false llvm-svn: 92411
2010-01-02 09:12:04 +01:00
%Q = load i16* %P
%R = icmp eq i16 %Q, 0
ret i1 %R
; CHECK: @test1
; CHECK-NEXT: %R = icmp eq i32 %X, 9
; CHECK-NEXT: ret i1 %R
}
define i1 @test2(i32 %X) {
%P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
Teach instcombine to fold compares of loads from constant arrays with variable indices into a comparison of the index with a constant. The most common occurrence of this that I see by far is stuff like: if ("foobar"[i] == '\0') ... which we compile into: if (i == 6), saving a load and materialization of the global address. This also exposes loop trip count information to later passes in many cases. This triggers hundreds of times in xalancbmk, which is where I first noticed it, but it also triggers in many other apps. Here are a few interesting ones from various apps: @must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2] %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1] %17 = load ... %18 = icmp eq i8* %17, null ; <i1> [#uses=1] -> icmp eq i64 %indvar.i, 7 @yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2] %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1] %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1] load ... %64 = icmp eq i8 %58, 4 ; <i1> [#uses=1] -> icmp eq i64 %.pn, 35 ; <i1> [#uses=0] @gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767] %scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1] %425 = load %scevgep.i %426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0] -> false llvm-svn: 92411
2010-01-02 09:12:04 +01:00
%Q = load i16* %P
%R = icmp slt i16 %Q, 85
ret i1 %R
; CHECK: @test2
; CHECK-NEXT: %R = icmp ne i32 %X, 4
; CHECK-NEXT: ret i1 %R
}
define i1 @test3(i32 %X) {
%P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
%Q = load double* %P
%R = fcmp oeq double %Q, 1.0
ret i1 %R
; CHECK: @test3
Teach the table lookup optimization to generate range compares when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
2010-01-02 22:50:18 +01:00
; CHECK-NEXT: %R = icmp eq i32 %X, 1
; CHECK-NEXT: ret i1 %R
}
enhance the compare/load/index optimization to work on *any* load from a global with 32/64 elements or less (depending on whether i64 is native on the target), generating a bitshift idiom to determine the result. For example, on test4 we produce: define i1 @test4(i32 %X) { %1 = lshr i32 933, %X ; <i32> [#uses=1] %2 = and i32 %1, 1 ; <i32> [#uses=1] %R = icmp ne i32 %2, 0 ; <i1> [#uses=1] ret i1 %R } This triggers in a number of interesting cases, for example, here's an fp case: @A.3255 = internal constant [4 x double] [double 4.100000e+00, double -3.900000e+00, double -1.000000e+00, double 1.000000e+00], align 32 ; <[4 x double]*> [#uses=7] ... %7 = fcmp olt double %3, 0.000000e+00 In this case we make the slen2_tab global dead, which is nice: @slen2_tab = internal constant [16 x i32] [i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 2, i32 3, i32 1, i32 2, i32 3, i32 2, i32 3], align 32 ; <[16 x i32]*> [#uses=1] ... %204 = icmp eq i32 %46, 0 Perl has a bunch of these, also on the 'Perl_regkind' array: @Perl_yygindex = internal constant [51 x i16] [i16 0, i16 0, i16 0, i16 0, i16 374, i16 351, i16 0, i16 -12, i16 0, i16 946, i16 413, i16 -83, i16 0, i16 0, i16 0, i16 -311, i16 -13, i16 4007, i16 2893, i16 0, i16 0, i16 0, i16 0, i16 0, i16 372, i16 -8, i16 0, i16 0, i16 246, i16 -131, i16 43, i16 86, i16 208, i16 -45, i16 -169, i16 987, i16 0, i16 0, i16 0, i16 0, i16 308, i16 0, i16 -271, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0], align 32 ; <[51 x i16]*> [#uses=1] ... %1364 = icmp eq i16 %1361, 0 186.crafty really likes this on 64-bit machines, because it triggers on a bunch of globals like this: @white_outpost = internal constant [64 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\02\00\00\00\00\00\04\05\05\04\00\00\00\00\03\06\06\03\00\00\00\00\00\01\01\00\00\00\00\00\00\00\00\00\00\00", align 32 ; <[64 x i8]*> [#uses=2] However the big winner is 403.gcc, which triggers hundreds of times, eliminating all the accesses to the 57-element arrays 'mode_class', mode_unit_size, mode_bitsize, regclass_map, etc. go 64-bit machines :) llvm-svn: 92415
2010-01-02 09:56:52 +01:00
define i1 @test4(i32 %X) {
%P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
enhance the compare/load/index optimization to work on *any* load from a global with 32/64 elements or less (depending on whether i64 is native on the target), generating a bitshift idiom to determine the result. For example, on test4 we produce: define i1 @test4(i32 %X) { %1 = lshr i32 933, %X ; <i32> [#uses=1] %2 = and i32 %1, 1 ; <i32> [#uses=1] %R = icmp ne i32 %2, 0 ; <i1> [#uses=1] ret i1 %R } This triggers in a number of interesting cases, for example, here's an fp case: @A.3255 = internal constant [4 x double] [double 4.100000e+00, double -3.900000e+00, double -1.000000e+00, double 1.000000e+00], align 32 ; <[4 x double]*> [#uses=7] ... %7 = fcmp olt double %3, 0.000000e+00 In this case we make the slen2_tab global dead, which is nice: @slen2_tab = internal constant [16 x i32] [i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 2, i32 3, i32 1, i32 2, i32 3, i32 2, i32 3], align 32 ; <[16 x i32]*> [#uses=1] ... %204 = icmp eq i32 %46, 0 Perl has a bunch of these, also on the 'Perl_regkind' array: @Perl_yygindex = internal constant [51 x i16] [i16 0, i16 0, i16 0, i16 0, i16 374, i16 351, i16 0, i16 -12, i16 0, i16 946, i16 413, i16 -83, i16 0, i16 0, i16 0, i16 -311, i16 -13, i16 4007, i16 2893, i16 0, i16 0, i16 0, i16 0, i16 0, i16 372, i16 -8, i16 0, i16 0, i16 246, i16 -131, i16 43, i16 86, i16 208, i16 -45, i16 -169, i16 987, i16 0, i16 0, i16 0, i16 0, i16 308, i16 0, i16 -271, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0], align 32 ; <[51 x i16]*> [#uses=1] ... %1364 = icmp eq i16 %1361, 0 186.crafty really likes this on 64-bit machines, because it triggers on a bunch of globals like this: @white_outpost = internal constant [64 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\02\00\00\00\00\00\04\05\05\04\00\00\00\00\03\06\06\03\00\00\00\00\00\01\01\00\00\00\00\00\00\00\00\00\00\00", align 32 ; <[64 x i8]*> [#uses=2] However the big winner is 403.gcc, which triggers hundreds of times, eliminating all the accesses to the 57-element arrays 'mode_class', mode_unit_size, mode_bitsize, regclass_map, etc. go 64-bit machines :) llvm-svn: 92415
2010-01-02 09:56:52 +01:00
%Q = load i16* %P
%R = icmp sle i16 %Q, 73
ret i1 %R
; CHECK: @test4
; CHECK-NEXT: lshr i32 933, %X
; CHECK-NEXT: and i32 {{.*}}, 1
; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
; CHECK-NEXT: ret i1 %R
}
define i1 @test5(i32 %X) {
%P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16* %P
%R = icmp eq i16 %Q, 69
ret i1 %R
; CHECK: @test5
; CHECK-NEXT: icmp eq i32 %X, 2
; CHECK-NEXT: icmp eq i32 %X, 7
; CHECK-NEXT: %R = or i1
; CHECK-NEXT: ret i1 %R
}
Teach the table lookup optimization to generate range compares when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
2010-01-02 22:50:18 +01:00
define i1 @test6(i32 %X) {
%P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
Teach the table lookup optimization to generate range compares when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
2010-01-02 22:50:18 +01:00
%Q = load double* %P
%R = fcmp ogt double %Q, 0.0
ret i1 %R
; CHECK: @test6
; CHECK-NEXT: add i32 %X, -1
; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
; CHECK-NEXT: ret i1 %R
}
define i1 @test7(i32 %X) {
%P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
Teach the table lookup optimization to generate range compares when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
2010-01-02 22:50:18 +01:00
%Q = load double* %P
%R = fcmp olt double %Q, 0.0
ret i1 %R
; CHECK: @test7
; CHECK-NEXT: add i32 %X, -1
; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
; CHECK-NEXT: ret i1 %R
}
define i1 @test8(i32 %X) {
%P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16* %P
%R = and i16 %Q, 3
%S = icmp eq i16 %R, 0
ret i1 %S
; CHECK: @test8
; CHECK-NEXT: add i32 %X, -8
; CHECK-NEXT: icmp ult i32 {{.*}}, 2
; CHECK-NEXT: ret i1
}
@GA = internal constant [4 x { i32, i32 } ] [
{ i32, i32 } { i32 1, i32 0 },
{ i32, i32 } { i32 2, i32 1 },
{ i32, i32 } { i32 3, i32 1 },
{ i32, i32 } { i32 4, i32 0 }
]
define i1 @test9(i32 %X) {
%P = getelementptr inbounds [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
%Q = load i32* %P
%R = icmp eq i32 %Q, 1
ret i1 %R
; CHECK: @test9
; CHECK-NEXT: add i32 %X, -1
; CHECK-NEXT: icmp ult i32 {{.*}}, 2
; CHECK-NEXT: ret i1
}