mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[InstCombine][SSE] Added support to VPERMD/VPERMPS to shuffle combine to accept UNDEF elements.
llvm-svn: 268206
This commit is contained in:
parent
e0944ffa06
commit
b50a7dc851
@ -696,25 +696,30 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
|
||||
if (!V)
|
||||
return nullptr;
|
||||
|
||||
VectorType *VecTy = cast<VectorType>(II.getType());
|
||||
auto *VecTy = cast<VectorType>(II.getType());
|
||||
auto *MaskEltTy = Type::getInt32Ty(II.getContext());
|
||||
unsigned Size = VecTy->getNumElements();
|
||||
assert(Size == 8 && "Unexpected shuffle mask size");
|
||||
|
||||
// Initialize the resulting shuffle mask to all zeroes.
|
||||
uint32_t Indexes[8] = {0};
|
||||
// Construct a shuffle mask from constant integers or UNDEFs.
|
||||
Constant *Indexes[8] = {NULL};
|
||||
|
||||
for (unsigned I = 0; I < Size; ++I) {
|
||||
Constant *COp = V->getAggregateElement(I);
|
||||
if (!COp || !isa<ConstantInt>(COp))
|
||||
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
|
||||
return nullptr;
|
||||
|
||||
if (isa<UndefValue>(COp)) {
|
||||
Indexes[I] = UndefValue::get(MaskEltTy);
|
||||
continue;
|
||||
}
|
||||
|
||||
APInt Index = cast<ConstantInt>(COp)->getValue();
|
||||
Index = Index.getLoBits(3);
|
||||
Indexes[I] = (uint32_t)Index.getZExtValue();
|
||||
Index = Index.zextOrTrunc(32).getLoBits(3);
|
||||
Indexes[I] = ConstantInt::get(MaskEltTy, Index);
|
||||
}
|
||||
|
||||
auto ShuffleMask =
|
||||
ConstantDataVector::get(II.getContext(), makeArrayRef(Indexes, Size));
|
||||
auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
|
||||
auto V1 = II.getArgOperand(0);
|
||||
auto V2 = UndefValue::get(VecTy);
|
||||
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
||||
|
@ -61,12 +61,12 @@ define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
|
||||
ret <8 x float> %a
|
||||
}
|
||||
|
||||
; FIXME: Verify that instcombine is able to fold constant shuffles with undef mask elements.
|
||||
; Verify that instcombine is able to fold constant shuffles with undef mask elements.
|
||||
|
||||
define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
|
||||
; CHECK-LABEL: @undef_test_vpermd(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
||||
; CHECK-NEXT: ret <8 x i32> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
|
||||
;
|
||||
%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
||||
ret <8 x i32> %a
|
||||
@ -74,8 +74,8 @@ define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
|
||||
|
||||
define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
|
||||
; CHECK-LABEL: @undef_test_vpermps(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
||||
; CHECK-NEXT: ret <8 x float> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
; CHECK-NEXT: ret <8 x float> [[TMP1]]
|
||||
;
|
||||
%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
||||
ret <8 x float> %a
|
||||
|
Loading…
x
Reference in New Issue
Block a user