mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
8017c13c9d
InstCombine operates on the basic premise that the operands of the currently processed instruction have already been simplified. It achieves this by pushing instructions to the worklist in reverse program order, so that instructions are popped off in program order. The worklist management in the main combining loop also makes sure to uphold this invariant. However, the same is not true for all the code that is performing manual worklist management. The largest problem (addressed in this patch) are instructions inserted by InstCombine's IRBuilder. These will be pushed onto the worklist in order of insertion (generally matching program order), which means that a) the users of the original instruction will be visited first, as they are pushed later in the main loop and b) the newly inserted instructions will be visited in reverse program order. This causes a number of problems: First, folds operate on instructions that have not had their operands simplified, which may result in optimizations being missed (ran into this in https://reviews.llvm.org/D72048#1800424, which was the original motivation for this patch). Additionally, this increases the amount of folds InstCombine has to perform, both within one iteration, and by increasing the number of total iterations. This patch addresses the issue by adding a Worklist.AddDeferred() method, which is used for instructions inserted by IRBuilder. These will only be added to the real worklist after the combine finished, and in reverse order, so they will end up processed in program order. I should note that the same should also be done to nearly all other uses of Worklist.Add(), but I'm starting with just this occurrence, which has by far the largest test fallout. Most of the test changes are due to https://bugs.llvm.org/show_bug.cgi?id=44521 or other cases where we don't canonicalize something. These are neutral. One regression has been addressed in D73575 and D73647. The remaining regression in an shl+sdiv fold can't really be fixed without dropping another transform, but does not seem particularly problematic in the first place. Differential Revision: https://reviews.llvm.org/D73411
351 lines
11 KiB
LLVM
351 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -instcombine -S < %s | FileCheck %s
|
|
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
@X = constant i32 42 ; <i32*> [#uses=2]
|
|
@X2 = constant i32 47 ; <i32*> [#uses=1]
|
|
@Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ] ; <[2 x { i32, float }]*> [#uses=2]
|
|
@Z = constant [2 x { i32, float }] zeroinitializer ; <[2 x { i32, float }]*> [#uses=1]
|
|
|
|
@GLOBAL = internal constant [4 x i32] zeroinitializer
|
|
|
|
|
|
define i32 @test1() {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: ret i32 42
|
|
;
|
|
%B = load i32, i32* @X ; <i32> [#uses=1]
|
|
ret i32 %B
|
|
}
|
|
|
|
define float @test2() {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: ret float 0x3FF3B2FEC0000000
|
|
;
|
|
%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1 ; <float*> [#uses=1]
|
|
%B = load float, float* %A ; <float> [#uses=1]
|
|
ret float %B
|
|
}
|
|
|
|
define i32 @test3() {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: ret i32 12
|
|
;
|
|
%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0 ; <i32*> [#uses=1]
|
|
%B = load i32, i32* %A ; <i32> [#uses=1]
|
|
ret i32 %B
|
|
}
|
|
|
|
define i32 @test4() {
|
|
; CHECK-LABEL: @test4(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0 ; <i32*> [#uses=1]
|
|
%B = load i32, i32* %A ; <i32> [#uses=1]
|
|
ret i32 %B
|
|
}
|
|
|
|
define i32 @test5(i1 %C) {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: [[Z:%.*]] = select i1 [[C:%.*]], i32 42, i32 47
|
|
; CHECK-NEXT: ret i32 [[Z]]
|
|
;
|
|
%Y = select i1 %C, i32* @X, i32* @X2 ; <i32*> [#uses=1]
|
|
%Z = load i32, i32* %Y ; <i32> [#uses=1]
|
|
ret i32 %Z
|
|
}
|
|
|
|
define i32 @test7(i32 %X) {
|
|
; CHECK-LABEL: @test7(
|
|
; CHECK-NEXT: store i32 undef, i32* null, align 536870912
|
|
; CHECK-NEXT: ret i32 undef
|
|
;
|
|
%V = getelementptr i32, i32* null, i32 %X ; <i32*> [#uses=1]
|
|
%R = load i32, i32* %V ; <i32> [#uses=1]
|
|
ret i32 %R
|
|
}
|
|
|
|
define i32 @test7_no_null_opt(i32 %X) #0 {
|
|
; CHECK-LABEL: @test7_no_null_opt(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
|
|
; CHECK-NEXT: [[V:%.*]] = getelementptr i32, i32* null, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[V]], align 4
|
|
; CHECK-NEXT: ret i32 [[R]]
|
|
;
|
|
%V = getelementptr i32, i32* null, i32 %X ; <i32*> [#uses=1]
|
|
%R = load i32, i32* %V ; <i32> [#uses=1]
|
|
ret i32 %R
|
|
}
|
|
attributes #0 = { "null-pointer-is-valid"="true" }
|
|
|
|
define i32 @test8(i32* %P) {
|
|
; CHECK-LABEL: @test8(
|
|
; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4
|
|
; CHECK-NEXT: ret i32 1
|
|
;
|
|
store i32 1, i32* %P
|
|
%X = load i32, i32* %P ; <i32> [#uses=1]
|
|
ret i32 %X
|
|
}
|
|
|
|
define i32 @test9(i32* %P) {
|
|
; CHECK-LABEL: @test9(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%X = load i32, i32* %P ; <i32> [#uses=1]
|
|
%Y = load i32, i32* %P ; <i32> [#uses=1]
|
|
%Z = sub i32 %X, %Y ; <i32> [#uses=1]
|
|
ret i32 %Z
|
|
}
|
|
|
|
define i32 @test10(i1 %C.upgrd.1, i32* %P, i32* %Q) {
|
|
; CHECK-LABEL: @test10(
|
|
; CHECK-NEXT: br i1 [[C_UPGRD_1:%.*]], label [[T:%.*]], label [[F:%.*]]
|
|
; CHECK: T:
|
|
; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4
|
|
; CHECK-NEXT: br label [[C:%.*]]
|
|
; CHECK: F:
|
|
; CHECK-NEXT: br label [[C]]
|
|
; CHECK: C:
|
|
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
br i1 %C.upgrd.1, label %T, label %F
|
|
T: ; preds = %0
|
|
store i32 1, i32* %Q
|
|
store i32 0, i32* %P
|
|
br label %C
|
|
F: ; preds = %0
|
|
store i32 0, i32* %P
|
|
br label %C
|
|
C: ; preds = %F, %T
|
|
%V = load i32, i32* %P ; <i32> [#uses=1]
|
|
ret i32 %V
|
|
}
|
|
|
|
define double @test11(double* %p) {
|
|
; CHECK-LABEL: @test11(
|
|
; CHECK-NEXT: [[T0:%.*]] = getelementptr double, double* [[P:%.*]], i64 1
|
|
; CHECK-NEXT: store double 2.000000e+00, double* [[T0]], align 8
|
|
; CHECK-NEXT: ret double 2.000000e+00
|
|
;
|
|
%t0 = getelementptr double, double* %p, i32 1
|
|
store double 2.0, double* %t0
|
|
%t1 = getelementptr double, double* %p, i32 1
|
|
%x = load double, double* %t1
|
|
ret double %x
|
|
}
|
|
|
|
define i32 @test12(i32* %P) {
|
|
; CHECK-LABEL: @test12(
|
|
; CHECK-NEXT: ret i32 123
|
|
;
|
|
%A = alloca i32
|
|
store i32 123, i32* %A
|
|
; Cast the result of the load not the source
|
|
%Q = bitcast i32* %A to i32*
|
|
%V = load i32, i32* %Q
|
|
ret i32 %V
|
|
}
|
|
|
|
define <16 x i8> @test13(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test13(
|
|
; CHECK-NEXT: ret <16 x i8> zeroinitializer
|
|
;
|
|
%tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
|
|
ret <16 x i8> %tmp
|
|
}
|
|
|
|
; This test must not have the store of %x forwarded to the load -- there is an
|
|
; intervening store if %y. However, the intervening store occurs with a different
|
|
; type and size and to a different pointer value. This is ensuring that none of
|
|
; those confuse the analysis into thinking that the second store does not alias
|
|
; the first.
|
|
|
|
define i8 @test14(i8 %x, i32 %y) {
|
|
; CHECK-LABEL: @test14(
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[A_I8:%.*]] = bitcast i32* [[A]] to i8*
|
|
; CHECK-NEXT: store i8 [[X:%.*]], i8* [[A_I8]], align 4
|
|
; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[A]], align 4
|
|
; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[A_I8]], align 4
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%a = alloca i32
|
|
%a.i8 = bitcast i32* %a to i8*
|
|
store i8 %x, i8* %a.i8
|
|
store i32 %y, i32* %a
|
|
%r = load i8, i8* %a.i8
|
|
ret i8 %r
|
|
}
|
|
|
|
@test15_global = external global i32
|
|
|
|
; Same test as @test14 essentially, but using a global instead of an alloca.
|
|
|
|
define i8 @test15(i8 %x, i32 %y) {
|
|
; CHECK-LABEL: @test15(
|
|
; CHECK-NEXT: store i8 [[X:%.*]], i8* bitcast (i32* @test15_global to i8*), align 4
|
|
; CHECK-NEXT: store i32 [[Y:%.*]], i32* @test15_global, align 4
|
|
; CHECK-NEXT: [[R:%.*]] = load i8, i8* bitcast (i32* @test15_global to i8*), align 4
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%g.i8 = bitcast i32* @test15_global to i8*
|
|
store i8 %x, i8* %g.i8
|
|
store i32 %y, i32* @test15_global
|
|
%r = load i8, i8* %g.i8
|
|
ret i8 %r
|
|
}
|
|
|
|
; Check that we canonicalize loads which are only stored to use integer types
|
|
; when there is a valid integer type.
|
|
|
|
define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) {
|
|
; CHECK-LABEL: @test16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
|
|
; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32*
|
|
; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32*
|
|
; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%x.cast = bitcast i8* %x to float*
|
|
%a.cast = bitcast i8* %a to float*
|
|
%b.cast = bitcast i8* %b to float*
|
|
%c.cast = bitcast i8* %c to i32*
|
|
|
|
%x1 = load float, float* %x.cast
|
|
store float %x1, float* %a.cast
|
|
store float %x1, float* %b.cast
|
|
|
|
%x2 = load float, float* %x.cast
|
|
store float %x2, float* %b.cast
|
|
%x2.cast = bitcast float %x2 to i32
|
|
store i32 %x2.cast, i32* %c.cast
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @test16-vect(i8* %x, i8* %a, i8* %b, i8* %c) {
|
|
; CHECK-LABEL: @test16-vect(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
|
|
; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32*
|
|
; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32*
|
|
; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32*
|
|
; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%x.cast = bitcast i8* %x to <4 x i8>*
|
|
%a.cast = bitcast i8* %a to <4 x i8>*
|
|
%b.cast = bitcast i8* %b to <4 x i8>*
|
|
%c.cast = bitcast i8* %c to i32*
|
|
|
|
%x1 = load <4 x i8>, <4 x i8>* %x.cast
|
|
store <4 x i8> %x1, <4 x i8>* %a.cast
|
|
store <4 x i8> %x1, <4 x i8>* %b.cast
|
|
|
|
%x2 = load <4 x i8>, <4 x i8>* %x.cast
|
|
store <4 x i8> %x2, <4 x i8>* %b.cast
|
|
%x2.cast = bitcast <4 x i8> %x2 to i32
|
|
store i32 %x2.cast, i32* %c.cast
|
|
|
|
ret void
|
|
}
|
|
|
|
|
|
; Check that in cases similar to @test16 we don't try to rewrite a load when
|
|
; its only use is a store but it is used as the pointer to that store rather
|
|
; than the value.
|
|
|
|
define void @test17(i8** %x, i8 %y) {
|
|
; CHECK-LABEL: @test17(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[X_LOAD:%.*]] = load i8*, i8** [[X:%.*]], align 8
|
|
; CHECK-NEXT: store i8 [[Y:%.*]], i8* [[X_LOAD]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%x.load = load i8*, i8** %x
|
|
store i8 %y, i8* %x.load
|
|
|
|
ret void
|
|
}
|
|
|
|
; Check that we don't try change the type of the load by inserting a bitcast
|
|
; generating invalid IR.
|
|
%swift.error = type opaque
|
|
declare void @useSwiftError(%swift.error** swifterror)
|
|
|
|
define void @test18(%swift.error** swifterror %err) {
|
|
; CHECK-LABEL: @test18(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SWIFTERROR:%.*]] = alloca swifterror %swift.error*, align 8
|
|
; CHECK-NEXT: store %swift.error* null, %swift.error** [[SWIFTERROR]], align 8
|
|
; CHECK-NEXT: call void @useSwiftError(%swift.error** nonnull swifterror [[SWIFTERROR]])
|
|
; CHECK-NEXT: [[ERR_RES:%.*]] = load %swift.error*, %swift.error** [[SWIFTERROR]], align 8
|
|
; CHECK-NEXT: store %swift.error* [[ERR_RES]], %swift.error** [[ERR:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%swifterror = alloca swifterror %swift.error*, align 8
|
|
store %swift.error* null, %swift.error** %swifterror, align 8
|
|
call void @useSwiftError(%swift.error** nonnull swifterror %swifterror)
|
|
%err.res = load %swift.error*, %swift.error** %swifterror, align 8
|
|
store %swift.error* %err.res, %swift.error** %err, align 8
|
|
ret void
|
|
}
|
|
|
|
; Make sure we preseve the type of the store to a swifterror pointer.
|
|
|
|
declare void @initi8(i8**)
|
|
define void @test19(%swift.error** swifterror %err) {
|
|
; CHECK-LABEL: @test19(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP:%.*]] = alloca i8*, align 8
|
|
; CHECK-NEXT: call void @initi8(i8** nonnull [[TMP]])
|
|
; CHECK-NEXT: [[SWIFTERROR:%.*]] = bitcast i8** [[TMP]] to %swift.error**
|
|
; CHECK-NEXT: [[ERR_RES:%.*]] = load %swift.error*, %swift.error** [[SWIFTERROR]], align 8
|
|
; CHECK-NEXT: store %swift.error* [[ERR_RES]], %swift.error** [[ERR:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%tmp = alloca i8*, align 8
|
|
call void @initi8(i8** %tmp)
|
|
%swifterror = bitcast i8** %tmp to %swift.error**
|
|
%err.res = load %swift.error*, %swift.error** %swifterror, align 8
|
|
store %swift.error* %err.res, %swift.error** %err, align 8
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't canonicalize accesses to scalable vectors.
|
|
define void @test20(<vscale x 4 x i8>* %x, <vscale x 4 x i8>* %y) {
|
|
; CHECK-LABEL: @test20(
|
|
; CHECK-NEXT: [[X_LOAD:%.*]] = load <vscale x 4 x i8>, <vscale x 4 x i8>* [[X:%.*]], align 1
|
|
; CHECK-NEXT: store <vscale x 4 x i8> [[X_LOAD]], <vscale x 4 x i8>* [[Y:%.*]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%x.load = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x, align 1
|
|
store <vscale x 4 x i8> %x.load, <vscale x 4 x i8>* %y, align 1
|
|
ret void
|
|
}
|