mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[SLP] Fix PR38339: Instruction does not dominate all uses!
Summary: If the ExtractElement instructions can be optimized out during the vectorization and we need to reshuffle the parent vector, this ShuffleInstruction may be inserted in the wrong place causing compiler to produce incorrect code. Reviewers: spatel, RKSimon, mkuper, hfinkel, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49928 llvm-svn: 338380
This commit is contained in:
parent
55c4b0aada
commit
97ffeb4b92
@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
// TODO: Merge this shuffle with the ReorderShuffleMask.
|
||||
if (!E->ReorderIndices.empty())
|
||||
Builder.SetInsertPoint(VL0);
|
||||
else if (auto *I = dyn_cast<Instruction>(V))
|
||||
Builder.SetInsertPoint(I->getParent(),
|
||||
std::next(I->getIterator()));
|
||||
else
|
||||
Builder.SetInsertPoint(&F->getEntryBlock(),
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
E->ReuseShuffleIndices, "shuffle");
|
||||
}
|
||||
|
29
test/Transforms/SLPVectorizer/AArch64/PR38339.ll
Normal file
29
test/Transforms/SLPVectorizer/AArch64/PR38339.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
|
||||
|
||||
define void @f1(<2 x i16> %x, i16* %a) {
|
||||
; CHECK-LABEL: @f1(
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
|
||||
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%t2 = extractelement <2 x i16> %x, i32 0
|
||||
%t3 = extractelement <2 x i16> %x, i32 1
|
||||
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
store i16 %t2, i16* %a
|
||||
store i16 %t2, i16* %ptr0
|
||||
store i16 %t3, i16* %ptr1
|
||||
store i16 %t3, i16* %ptr2
|
||||
store i16 %t2, i16* %ptr3
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user