mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[SLP] Fix the trunc instruction insertion problem
Current SLP pass has this piece of code that inserts a trunc instruction after the vectorized instruction. In the case that the vectorized instruction is a phi node and not the last phi node in the BB, the trunc instruction will be inserted between two phi nodes, which will trigger verify problem in debug version or unpredictable error in another pass. This patch changes the algorithm to 'if the last vectorized instruction is a phi, insert it after the last phi node in current BB' to fix this problem.
This commit is contained in:
parent
f84a9cd429
commit
b2b1c4104c
@ -4909,8 +4909,14 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||
// sign extend the extracted values below.
|
||||
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
|
||||
if (MinBWs.count(ScalarRoot)) {
|
||||
if (auto *I = dyn_cast<Instruction>(VectorRoot))
|
||||
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
|
||||
if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
|
||||
// If current instr is a phi and not the last phi, insert it after the
|
||||
// last phi node.
|
||||
if (isa<PHINode>(I))
|
||||
Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
|
||||
else
|
||||
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
|
||||
}
|
||||
auto BundleWidth = VectorizableTree[0]->Scalars.size();
|
||||
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
|
||||
auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -disable-verify -slp-vectorizer -S | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
@d = internal unnamed_addr global i32 5, align 4
|
||||
|
||||
@ -8,7 +8,7 @@ define dso_local void @l() local_unnamed_addr {
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP12:%.*]], [[BB25:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ]
|
||||
; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
|
||||
@ -28,12 +28,12 @@ define dso_local void @l() local_unnamed_addr {
|
||||
; CHECK: bb25:
|
||||
; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8>
|
||||
; CHECK-NEXT: [[TMP12]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP11]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32
|
||||
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
|
||||
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP16]]
|
||||
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]
|
||||
|
Loading…
Reference in New Issue
Block a user