1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SLP] Fix the trunc instruction insertion problem

Current SLP pass has this piece of code that inserts a trunc instruction
after the vectorized instruction. In the case that the vectorized instruction
is a phi node and not the last phi node in the BB, the trunc instruction
will be inserted between two phi nodes, which will trigger verify problem
in debug version or unpredictable error in another pass.
This patch changes the algorithm to 'if the last vectorized instruction
is a phi, insert it after the last phi node in current BB' to fix this problem.
This commit is contained in:
Bu Le 2021-03-17 13:15:56 +03:00 committed by Anton Afanasyev
parent f84a9cd429
commit b2b1c4104c
2 changed files with 14 additions and 8 deletions

View File

@ -4909,8 +4909,14 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// sign extend the extracted values below.
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
if (auto *I = dyn_cast<Instruction>(VectorRoot))
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
// If current instr is a phi and not the last phi, insert it after the
// last phi node.
if (isa<PHINode>(I))
Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
else
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
}
auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -disable-verify -slp-vectorizer -S | FileCheck %s
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
@d = internal unnamed_addr global i32 5, align 4
@ -8,7 +8,7 @@ define dso_local void @l() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP12:%.*]], [[BB25:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ]
; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
@ -28,12 +28,12 @@ define dso_local void @l() local_unnamed_addr {
; CHECK: bb25:
; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ]
; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8>
; CHECK-NEXT: [[TMP12]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP11]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8>
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP16]]
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]