mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
[SLP] Fix for PR32038: extra add of PHI node when it is not required.
Summary: If horizontal reduction tree starts from the binary operation that is used in PHI node, but this PHI is not used in horizontal reduction, we may end up with extra addition of this PHI node after vectorization. Here is an example: ``` %phi = phi i32 [ %tmp, %end], ... ... %tmp = add i32 %tmp1, %tmp2 end: ``` after vectorization we always have something like: ``` %phi = phi i32 [ %tmp, %end], ... ... %red = extractelement <8 x 32> %vec.red, 0 %tmp = add i32 %red, %phi end: ``` even if `%phi` is not used in reduction tree. Patch considers these PHI nodes as extra arguments and considers them in the final result iff they really used in reduction. Reviewers: mkuper, hfinkel, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30409 llvm-svn: 296606
This commit is contained in:
parent
b9a27f087a
commit
f077ffe566
@ -4250,14 +4250,6 @@ class HorizontalReduction {
|
||||
MapVector<Instruction *, Value *> ExtraArgs;
|
||||
|
||||
BinaryOperator *ReductionRoot = nullptr;
|
||||
// After successfull horizontal reduction vectorization attempt for PHI node
|
||||
// vectorizer tries to update root binary op by combining vectorized tree and
|
||||
// the ReductionPHI node. But during vectorization this ReductionPHI can be
|
||||
// vectorized itself and replaced by the undef value, while the instruction
|
||||
// itself is marked for deletion. This 'marked for deletion' PHI node then can
|
||||
// be used in new binary operation, causing "Use still stuck around after Def
|
||||
// is destroyed" crash upon PHI node deletion.
|
||||
WeakVH ReductionPHI;
|
||||
|
||||
/// The opcode of the reduction.
|
||||
Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;
|
||||
@ -4318,7 +4310,6 @@ public:
|
||||
ReductionOpcode = B->getOpcode();
|
||||
ReducedValueOpcode = 0;
|
||||
ReductionRoot = B;
|
||||
ReductionPHI = Phi;
|
||||
|
||||
// We currently only support adds.
|
||||
if ((ReductionOpcode != Instruction::Add &&
|
||||
@ -4406,9 +4397,9 @@ public:
|
||||
Stack.push_back(std::make_pair(I, 0));
|
||||
continue;
|
||||
}
|
||||
// NextV is an extra argument for TreeN (its parent operation).
|
||||
markExtraArg(Stack.back(), NextV);
|
||||
}
|
||||
// NextV is an extra argument for TreeN (its parent operation).
|
||||
markExtraArg(Stack.back(), NextV);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -4497,12 +4488,7 @@ public:
|
||||
}
|
||||
}
|
||||
// Update users.
|
||||
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
|
||||
assert(ReductionRoot && "Need a reduction operation");
|
||||
ReductionRoot->setOperand(0, VectorizedTree);
|
||||
ReductionRoot->setOperand(1, ReductionPHI);
|
||||
} else
|
||||
ReductionRoot->replaceAllUsesWith(VectorizedTree);
|
||||
ReductionRoot->replaceAllUsesWith(VectorizedTree);
|
||||
}
|
||||
return VectorizedTree != nullptr;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ target triple = "aarch64--linux-gnu"
|
||||
@a = common global [80 x i8] zeroinitializer, align 16
|
||||
|
||||
; DEFAULT-LABEL: @PR28330(
|
||||
; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
|
||||
; DEFAULT: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
|
||||
; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
|
||||
@ -18,10 +18,10 @@ target triple = "aarch64--linux-gnu"
|
||||
; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
|
||||
; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
|
||||
; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17
|
||||
; DEFAULT: %bin.extra = add i32 %[[R6]], %tmp17
|
||||
;
|
||||
; GATHER-LABEL: @PR28330(
|
||||
; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
|
||||
; GATHER: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
|
||||
; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80
|
||||
; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80
|
||||
; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80
|
||||
@ -45,7 +45,7 @@ target triple = "aarch64--linux-gnu"
|
||||
; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
|
||||
; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
|
||||
; GATHER: %tmp34 = add i32 %[[R6]], %tmp17
|
||||
; GATHER: %bin.extra = add i32 %[[R6]], %tmp17
|
||||
;
|
||||
; MAX-COST-LABEL: @PR28330(
|
||||
; MAX-COST-NOT: shufflevector
|
||||
@ -98,7 +98,7 @@ define void @PR32038(i32 %n) {
|
||||
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; DEFAULT: for.body:
|
||||
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef
|
||||
; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
|
||||
@ -114,8 +114,8 @@ define void @PR32038(i32 %n) {
|
||||
; DEFAULT-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; DEFAULT-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
|
||||
; DEFAULT-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP3]], -5
|
||||
; DEFAULT-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
|
||||
; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5
|
||||
; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
|
||||
; DEFAULT-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; GATHER-LABEL: @PR32038(
|
||||
@ -138,7 +138,7 @@ define void @PR32038(i32 %n) {
|
||||
; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; GATHER: for.body:
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]]
|
||||
; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
|
||||
@ -169,8 +169,8 @@ define void @PR32038(i32 %n) {
|
||||
; GATHER-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; GATHER-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
|
||||
; GATHER-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP8]], -5
|
||||
; GATHER-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5
|
||||
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; GATHER-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; MAX-COST-LABEL: @PR32038(
|
||||
|
@ -14,7 +14,7 @@ define i32 @test(i32* nocapture readonly %p) {
|
||||
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* %p, i64 7
|
||||
; CHECK-NEXT: br label %for.body
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %bin.extra, %for.body ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %p to <8 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP1]]
|
||||
@ -32,10 +32,10 @@ define i32 @test(i32* nocapture readonly %p) {
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
|
||||
; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[TMP4]], [[SUM]]
|
||||
; CHECK-NEXT: br i1 true, label %for.end, label %for.body
|
||||
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP4]], [[SUM]]
|
||||
; CHECK: br i1 true, label %for.end, label %for.body
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret i32 [[ADD_7]]
|
||||
; CHECK-NEXT: ret i32 [[BIN_EXTRA]]
|
||||
;
|
||||
entry:
|
||||
%arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
|
||||
|
@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 {
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[TMP15]],
|
||||
; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]],
|
||||
; CHECK: ret i32 [[ADD52]]
|
||||
;
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user