mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][AVX] matchShuffleWithSHUFPD - add support for zeroable operands
Determine if all of the uses of LHS/RHS operands can be replaced with a zero vector. llvm-svn: 372013
This commit is contained in:
parent
4c69ced591
commit
64045ff52d
@ -15437,11 +15437,18 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
|
||||
}
|
||||
|
||||
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||
bool &ForceV1Zero, bool &ForceV2Zero,
|
||||
unsigned &ShuffleImm, ArrayRef<int> Mask) {
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
assert(VT.getScalarSizeInBits() == 64 &&
|
||||
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
|
||||
"Unexpected data type for VSHUFPD");
|
||||
assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) &&
|
||||
"Illegal shuffle mask");
|
||||
|
||||
bool ZeroLane[2] = { true, true };
|
||||
for (int i = 0; i < NumElts; ++i)
|
||||
ZeroLane[i & 1] &= isUndefOrZero(Mask[i]);
|
||||
|
||||
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
|
||||
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
|
||||
@ -15449,7 +15456,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||
bool ShufpdMask = true;
|
||||
bool CommutableMask = true;
|
||||
for (int i = 0; i < NumElts; ++i) {
|
||||
if (Mask[i] == SM_SentinelUndef)
|
||||
if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1])
|
||||
continue;
|
||||
if (Mask[i] < 0)
|
||||
return false;
|
||||
@ -15462,26 +15469,39 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||
ShuffleImm |= (Mask[i] % 2) << i;
|
||||
}
|
||||
|
||||
if (ShufpdMask)
|
||||
return true;
|
||||
if (CommutableMask) {
|
||||
std::swap(V1, V2);
|
||||
return true;
|
||||
}
|
||||
if (!ShufpdMask && !CommutableMask)
|
||||
return false;
|
||||
|
||||
return false;
|
||||
if (!ShufpdMask && CommutableMask)
|
||||
std::swap(V1, V2);
|
||||
|
||||
ForceV1Zero = ZeroLane[0];
|
||||
ForceV2Zero = ZeroLane[1];
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2, SelectionDAG &DAG) {
|
||||
assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
|
||||
static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Original,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&
|
||||
"Unexpected data type for VSHUFPD");
|
||||
|
||||
SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
|
||||
|
||||
unsigned Immediate = 0;
|
||||
if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
|
||||
bool ForceV1Zero = false, ForceV2Zero = false;
|
||||
if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate,
|
||||
Mask))
|
||||
return SDValue();
|
||||
|
||||
// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
|
||||
if (ForceV1Zero)
|
||||
V1 = getZeroVector(VT, Subtarget, DAG, DL);
|
||||
if (ForceV2Zero)
|
||||
V2 = getZeroVector(VT, Subtarget, DAG, DL);
|
||||
|
||||
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
|
||||
DAG.getConstant(Immediate, DL, MVT::i8));
|
||||
}
|
||||
@ -15551,7 +15571,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
return Blend;
|
||||
|
||||
// Check if the blend happens to exactly fit that of SHUFPD.
|
||||
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
|
||||
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Op;
|
||||
|
||||
// If we have one input in place, then we can permute the other input and
|
||||
@ -16298,7 +16319,8 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
return Unpck;
|
||||
|
||||
// Check if the blend happens to exactly fit that of SHUFPD.
|
||||
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
|
||||
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Op;
|
||||
|
||||
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
|
||||
@ -32405,7 +32427,11 @@ static bool matchBinaryPermuteShuffle(
|
||||
((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
|
||||
(MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
|
||||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
|
||||
if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
|
||||
bool ForceV1Zero = false, ForceV2Zero = false;
|
||||
if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero,
|
||||
PermuteImm, Mask)) {
|
||||
V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
|
||||
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
|
||||
Shuffle = X86ISD::SHUFP;
|
||||
ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
|
||||
return true;
|
||||
|
@ -658,74 +658,21 @@ define <4 x double> @shuffle_v4f64_0456(<4 x double> %a, <4 x double> %b) {
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0z3z:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
||||
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v4f64_0z3z:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
||||
; AVX2-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v4f64_0z3z:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4f64_0z3z:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
||||
; AVX512VL-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4f64_0z3z:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v4f64_0z3z:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 0, i32 4, i32 3, i32 4>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_1z2z:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v4f64_1z2z:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v4f64_1z2z:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4f64_1z2z:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4f64_1z2z:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v4f64_1z2z:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[2]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 1, i32 4, i32 2, i32 4>
|
||||
ret <4 x double> %1
|
||||
}
|
||||
@ -1776,9 +1723,8 @@ define <4 x i64> @shuffle_v4i64_1230(<4 x i64> %a) {
|
||||
define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_z0z3:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
||||
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v4i64_z0z3:
|
||||
@ -1812,11 +1758,7 @@ define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_1z2z:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v4i64_1z2z:
|
||||
|
@ -841,19 +841,11 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_1z2z5z6z(<8 x double> %a, <8 x double> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_1z2z5z6z:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [1,8,2,8,5,8,6,8]
|
||||
; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_1z2z5z6z:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,8,0,2,0,8,0,5,0,8,0,6,0,8,0]
|
||||
; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
; ALL-LABEL: shuffle_v8f64_1z2z5z6z:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[2],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; ALL-NEXT: ret{{[l|q]}}
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> <double 0.000000e+00, double undef, double undef, double undef, double undef, double undef, double undef, double undef>, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 5, i32 8, i32 6, i32 8>
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
@ -1767,8 +1759,8 @@ define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
|
||||
;
|
||||
; ALL-LABEL: shuffle_v8f64_0z2z4z6z:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; ALL-NEXT: ret{{[l|q]}}
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
|
||||
ret <8 x double> %shuffle
|
||||
@ -1809,8 +1801,8 @@ define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
|
||||
;
|
||||
; ALL-LABEL: shuffle_v8f64_z9zbzdzf:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[7]
|
||||
; ALL-NEXT: ret{{[l|q]}}
|
||||
%shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
|
||||
ret <8 x double> %shuffle
|
||||
|
Loading…
x
Reference in New Issue
Block a user