1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[CodeGen] Update transformations to use poison for shufflevector/insertelem's initial vector elem

This patch is a part of D93817 and makes transformations in CodeGen use poison for shufflevector/insertelem's initial vector element.

The change in CodeGenPrepare.cpp is fine because the mask of shufflevector should be always zero.
It doesn't touch the second element (which is poison).

The change in InterleavedAccessPass.cpp is also fine becauses the mask is of the form <a, a+m, a+2m, .., a+km> where a+km is smaller than
the size of the first vector operand.
This is guaranteed by the caller of replaceBinOpShuffles, which is lowerInterleavedLoad.
It calls isDeInterleaveMask and isDeInterleaveMaskOfFactor to check the mask is the desirable form.
isDeInterleaveMask has the check that a+km is smaller than the vector size.
To check my understanding, I added an assertion & added a test to show that this optimization doesn't fire in such case.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D94056
This commit is contained in:
Juneyoung Lee 2021-01-10 17:22:54 +09:00
parent 074dac83dc
commit 71add9c8eb
3 changed files with 32 additions and 11 deletions

View File

@ -6699,6 +6699,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
/// in MVE takes a GPR (integer) register, and the instruction that incorporate /// in MVE takes a GPR (integer) register, and the instruction that incorporate
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask()))) m_Undef(), m_ZeroMask())))
return false; return false;
@ -6718,9 +6719,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
Builder.SetInsertPoint(SVI); Builder.SetInsertPoint(SVI);
Value *BC1 = Builder.CreateBitCast( Value *BC1 = Builder.CreateBitCast(
cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType); cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1, Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
(uint64_t)0);
Value *Shuffle = Builder.CreateShuffleVector(Insert, SVI->getShuffleMask());
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
SVI->replaceAllUsesWith(BC2); SVI->replaceAllUsesWith(BC2);

View File

@ -22,8 +22,8 @@
// //
// E.g. An interleaved load (Factor = 2): // E.g. An interleaved load (Factor = 2):
// %wide.vec = load <8 x i32>, <8 x i32>* %ptr // %wide.vec = load <8 x i32>, <8 x i32>* %ptr
// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6> // %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7> // %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
// //
// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2 // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
// intrinsic in ARM backend. // intrinsic in ARM backend.
@ -351,6 +351,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
Index)) Index))
return false; return false;
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
Indices.push_back(Index); Indices.push_back(Index);
} }
for (auto *Shuffle : BinOpShuffles) { for (auto *Shuffle : BinOpShuffles) {
@ -360,6 +361,8 @@ bool InterleavedAccess::lowerInterleavedLoad(
Index)) Index))
return false; return false;
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI) if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
Indices.push_back(Index); Indices.push_back(Index);
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI) if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
@ -394,13 +397,17 @@ bool InterleavedAccess::replaceBinOpShuffles(
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) { SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
for (auto *SVI : BinOpShuffles) { for (auto *SVI : BinOpShuffles) {
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0)); BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
Type *BIOp0Ty = BI->getOperand(0)->getType();
ArrayRef<int> Mask = SVI->getShuffleMask(); ArrayRef<int> Mask = SVI->getShuffleMask();
assert(all_of(Mask, [&](int Idx) {
return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
}));
auto *NewSVI1 = new ShuffleVectorInst( auto *NewSVI1 =
BI->getOperand(0), UndefValue::get(BI->getOperand(0)->getType()), Mask, new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
SVI->getName(), SVI); Mask, SVI->getName(), SVI);
auto *NewSVI2 = new ShuffleVectorInst( auto *NewSVI2 = new ShuffleVectorInst(
BI->getOperand(1), UndefValue::get(BI->getOperand(1)->getType()), Mask, BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
SVI->getName(), SVI); SVI->getName(), SVI);
Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
BI->getName(), SVI); BI->getName(), SVI);

View File

@ -10,8 +10,8 @@ define <2 x double> @shuffle_binop_fol(<4 x double>* %ptr) {
; CHECK-LABEL: @shuffle_binop_fol( ; CHECK-LABEL: @shuffle_binop_fol(
; CHECK-NEXT: vector.body.preheader: ; CHECK-NEXT: vector.body.preheader:
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2> ; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> undef, <2 x i32> <i32 0, i32 2> ; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] ; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]]
; CHECK-NEXT: ret <2 x double> [[FADD3]] ; CHECK-NEXT: ret <2 x double> [[FADD3]]
; ;
@ -22,6 +22,21 @@ vector.body.preheader:
ret <2 x double> %extracted ret <2 x double> %extracted
} }
define <2 x double> @shuffle_binop_fol_oob(<4 x double>* %ptr) {
; CHECK-LABEL: @shuffle_binop_fol_oob(
; CHECK-NEXT: vector.body.preheader:
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[EXTRACTED:%.*]] = shufflevector <4 x double> [[FADD]], <4 x double> undef, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: ret <2 x double> [[EXTRACTED]]
;
vector.body.preheader:
%wide.load = load <4 x double>, <4 x double>* %ptr, align 8
%fadd = fadd <4 x double> %wide.load, <double 1.0, double 1.0, double 1.0, double 1.0>
%extracted = shufflevector <4 x double> %fadd, <4 x double> undef, <2 x i32> <i32 0, i32 4>
ret <2 x double> %extracted
}
; No interleaved load instruction is generated, but the extractelement ; No interleaved load instruction is generated, but the extractelement
; instructions are updated to use the shuffle instead of the load. ; instructions are updated to use the shuffle instead of the load.
define void @shuffle_extract(<4 x double>* %ptr, i1 %c) { define void @shuffle_extract(<4 x double>* %ptr, i1 %c) {