mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Fold CONCAT(VPERMV3(X,Y,M0),VPERMV3(Z,W,M1)) -> VPERMV3(CONCAT(X,Z),CONCAT(Y,W),CONCAT(M0,M1))
Further prep work toward supporting different subvector sizes in combineX86ShufflesRecursively
This commit is contained in:
parent
3423fa8d78
commit
7df5c0cf1d
@ -48813,6 +48813,38 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
||||
return DAG.getBitcast(VT, Res);
|
||||
}
|
||||
break;
|
||||
case X86ISD::VPERMV3:
|
||||
if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
|
||||
MVT OpVT = Op0.getSimpleValueType();
|
||||
int NumSrcElts = OpVT.getVectorNumElements();
|
||||
SmallVector<int, 64> ConcatMask;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
bool IsUnary;
|
||||
SmallVector<int, 64> SubMask;
|
||||
SmallVector<SDValue, 2> SubOps;
|
||||
if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps,
|
||||
SubMask, IsUnary))
|
||||
break;
|
||||
for (int M : SubMask) {
|
||||
if (0 <= M) {
|
||||
M += M < NumSrcElts ? 0 : NumSrcElts;
|
||||
M += i * NumSrcElts;
|
||||
}
|
||||
ConcatMask.push_back(M);
|
||||
}
|
||||
}
|
||||
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
|
||||
SDValue Src0 = concatSubVectors(Ops[0].getOperand(0),
|
||||
Ops[1].getOperand(0), DAG, DL);
|
||||
SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
|
||||
Ops[1].getOperand(2), DAG, DL);
|
||||
MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
|
||||
SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
|
||||
return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRAI:
|
||||
case X86ISD::VSRLI:
|
||||
|
@ -145,11 +145,10 @@ define <32 x i16> @concat_trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
; AVX512-NEXT: vpsrad $23, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = ashr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
|
||||
%2 = ashr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
|
||||
@ -166,11 +165,10 @@ define <32 x i16> @concat_trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
; AVX512-NEXT: vpsrld $23, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = lshr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
|
||||
%2 = lshr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
|
||||
@ -183,26 +181,25 @@ define <32 x i16> @concat_trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
|
||||
; AVX512F-LABEL: concat_trunc_packsswb_512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512F-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: concat_trunc_packsswb_512:
|
||||
@ -211,11 +208,10 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = ashr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
@ -228,26 +224,25 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
|
||||
; AVX512F-LABEL: concat_trunc_packuswb_512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512F-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: concat_trunc_packuswb_512:
|
||||
@ -256,11 +251,10 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = lshr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
|
Loading…
x
Reference in New Issue
Block a user