1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

[SLP] Fix for PR32038: extra add of PHI node when it is not required.

Summary:
If horizontal reduction tree starts from the binary operation that is
used in PHI node, but this PHI is not used in horizontal reduction, we
may end up with extra addition of this PHI node after vectorization.
Here is an example:
```
%phi = phi i32 [ %tmp, %end], ...
...
%tmp = add i32 %tmp1, %tmp2
end:
```
after vectorization we always have something like:

```
%phi = phi i32 [ %tmp, %end], ...
...
%red = extractelement <8 x 32> %vec.red, 0
%tmp = add i32 %red, %phi
end:
```
even if `%phi` is not used in reduction tree. Patch considers these PHI
nodes as extra arguments and considers them in the final result iff they
really used in reduction.

Reviewers: mkuper, hfinkel, mzolotukhin

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D30409

llvm-svn: 296606
This commit is contained in:
Alexey Bataev 2017-03-01 10:50:44 +00:00
parent b9a27f087a
commit f077ffe566
4 changed files with 18 additions and 32 deletions

View File

@ -4250,14 +4250,6 @@ class HorizontalReduction {
MapVector<Instruction *, Value *> ExtraArgs;
BinaryOperator *ReductionRoot = nullptr;
// After successfull horizontal reduction vectorization attempt for PHI node
// vectorizer tries to update root binary op by combining vectorized tree and
// the ReductionPHI node. But during vectorization this ReductionPHI can be
// vectorized itself and replaced by the undef value, while the instruction
// itself is marked for deletion. This 'marked for deletion' PHI node then can
// be used in new binary operation, causing "Use still stuck around after Def
// is destroyed" crash upon PHI node deletion.
WeakVH ReductionPHI;
/// The opcode of the reduction.
Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;
@ -4318,7 +4310,6 @@ public:
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
ReductionRoot = B;
ReductionPHI = Phi;
// We currently only support adds.
if ((ReductionOpcode != Instruction::Add &&
@ -4406,9 +4397,9 @@ public:
Stack.push_back(std::make_pair(I, 0));
continue;
}
// NextV is an extra argument for TreeN (its parent operation).
markExtraArg(Stack.back(), NextV);
}
// NextV is an extra argument for TreeN (its parent operation).
markExtraArg(Stack.back(), NextV);
}
return true;
}
@ -4497,12 +4488,7 @@ public:
}
}
// Update users.
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
assert(ReductionRoot && "Need a reduction operation");
ReductionRoot->setOperand(0, VectorizedTree);
ReductionRoot->setOperand(1, ReductionPHI);
} else
ReductionRoot->replaceAllUsesWith(VectorizedTree);
ReductionRoot->replaceAllUsesWith(VectorizedTree);
}
return VectorizedTree != nullptr;
}

View File

@ -9,7 +9,7 @@ target triple = "aarch64--linux-gnu"
@a = common global [80 x i8] zeroinitializer, align 16
; DEFAULT-LABEL: @PR28330(
; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
; DEFAULT: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
@ -18,10 +18,10 @@ target triple = "aarch64--linux-gnu"
; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17
; DEFAULT: %bin.extra = add i32 %[[R6]], %tmp17
;
; GATHER-LABEL: @PR28330(
; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
; GATHER: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80
; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80
; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80
@ -45,7 +45,7 @@ target triple = "aarch64--linux-gnu"
; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
; GATHER: %tmp34 = add i32 %[[R6]], %tmp17
; GATHER: %bin.extra = add i32 %[[R6]], %tmp17
;
; MAX-COST-LABEL: @PR28330(
; MAX-COST-NOT: shufflevector
@ -98,7 +98,7 @@ define void @PR32038(i32 %n) {
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef
; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
@ -114,8 +114,8 @@ define void @PR32038(i32 %n) {
; DEFAULT-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; DEFAULT-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP3]], -5
; DEFAULT-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5
; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
; DEFAULT-NEXT: br label [[FOR_BODY]]
;
; GATHER-LABEL: @PR32038(
@ -138,7 +138,7 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
; GATHER: for.body:
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]]
; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
@ -169,8 +169,8 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; GATHER-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP8]], -5
; GATHER-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
; GATHER-NEXT: br label [[FOR_BODY]]
;
; MAX-COST-LABEL: @PR32038(

View File

@ -14,7 +14,7 @@ define i32 @test(i32* nocapture readonly %p) {
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* %p, i64 7
; CHECK-NEXT: br label %for.body
; CHECK: for.body:
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %bin.extra, %for.body ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %p to <8 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP1]]
@ -32,10 +32,10 @@ define i32 @test(i32* nocapture readonly %p) {
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[TMP4]], [[SUM]]
; CHECK-NEXT: br i1 true, label %for.end, label %for.body
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP4]], [[SUM]]
; CHECK: br i1 true, label %for.end, label %for.body
; CHECK: for.end:
; CHECK-NEXT: ret i32 [[ADD_7]]
; CHECK-NEXT: ret i32 [[BIN_EXTRA]]
;
entry:
%arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1

View File

@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 {
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[TMP15]],
; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]],
; CHECK: ret i32 [[ADD52]]
;
entry: