1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[X86] Remove VPTESTM/VPTESTNM ISD opcodes. Use isel patterns matching cmpm eq/ne with immallzeros.

llvm-svn: 323612
This commit is contained in:
Craig Topper 2018-01-28 00:56:30 +00:00
parent 03e2665282
commit 54f7fd3255
7 changed files with 120 additions and 149 deletions

View File

@ -451,8 +451,7 @@ namespace {
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMU ||
Opcode == X86ISD::CMPM_RND) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be

View File

@ -5043,8 +5043,6 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
switch (Opcode) {
default:
return false;
case X86ISD::TESTM:
case X86ISD::TESTNM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
case X86ISD::CMPM_RND:
@ -14639,9 +14637,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
assert(Cond.getValueType().getScalarSizeInBits() ==
VT.getScalarSizeInBits() &&
"Should have a size-matched integer condition!");
// Build a mask by testing the condition against itself (tests for zero).
// Build a mask by testing the condition against zero.
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
SDValue Mask = DAG.getNode(X86ISD::CMPM, dl, MaskVT, Cond,
getZeroVector(VT, Subtarget, DAG, dl),
DAG.getConstant(4, dl, MVT::i8));
// Now return a new VSELECT using the mask.
return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
}
@ -16609,7 +16609,9 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
In = DAG.getNode(ISD::SHL, DL, InVT, In,
DAG.getConstant(ShiftInx, DL, InVT));
}
return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
return DAG.getNode(X86ISD::CMPM, DL, VT, In,
getZeroVector(InVT, Subtarget, DAG, DL),
DAG.getConstant(4, DL, MVT::i8));
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@ -17766,26 +17768,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
if (Swap)
std::swap(Op0, Op1);
// See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
if (SSECC == 4 || SSECC == 0) {
SDValue A = peekThroughBitcasts(Op0);
if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
ISD::isBuildVectorAllZeros(Op1.getNode())) {
MVT VT0 = Op0.getSimpleValueType();
SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
dl, VT, RHS, LHS);
}
// If this is just a comparison with 0 without an AND, we can just use
// the same input twice to avoid creating a zero vector.
if (ISD::isBuildVectorAllZeros(Op1.getNode())) {
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
dl, VT, Op0, Op0);
}
}
unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU
: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
@ -25365,8 +25347,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
@ -37674,28 +37654,6 @@ static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue combineTestM(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
MVT VT = N->getSimpleValueType(0);
SDLoc DL(N);
// TEST (AND a, b) ,(AND a, b) -> TEST a, b
if (Op0 == Op1 && Op1->getOpcode() == ISD::AND)
return DAG.getNode(X86ISD::TESTM, DL, VT, Op0->getOperand(0),
Op0->getOperand(1));
// TEST op0, BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero)
// TEST BUILD_VECTOR(all_zero), op1 -> BUILD_VECTOR(all_zero)
if (ISD::isBuildVectorAllZeros(Op0.getNode()) ||
ISD::isBuildVectorAllZeros(Op1.getNode()))
return getZeroVector(VT, Subtarget, DAG, DL);
return SDValue();
}
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
@ -38001,7 +37959,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MSCATTER:
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget);
case X86ISD::TESTM: return combineTestM(N, DAG, Subtarget);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
}

View File

@ -368,10 +368,6 @@ namespace llvm {
// Vector packed fp sign bitwise comparisons.
TESTP,
// Vector "test" in AVX-512, the result is in a mask vector.
TESTM,
TESTNM,
// OR/AND test for masks.
KORTEST,
KTEST,

View File

@ -2084,6 +2084,8 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 0))>;
def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 4))>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 6))>;
@ -5197,42 +5199,57 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, X86VectorVTInfo _> {
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, X86VectorVTInfo _, string Suffix> {
let ExeDomain = _.ExeDomain in {
let isCommutable = 1 in
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
(OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV), itins.rr>,
EVEX_4V, Sched<[itins.Sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
(OpNode (bitconvert
(_.i64VT (and _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2))))),
_.ImmAllZerosV),
itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Patterns for compare with 0 that just use the same source twice.
def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
(_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
_.RC:$src, _.RC:$src))>;
def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
(_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
_.KRC:$mask, _.RC:$src, _.RC:$src))>;
}
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))),
(OpNode (and _.RC:$src1,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV),
itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _, string Suffix> {
def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
@ -5242,7 +5259,8 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
(OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
@ -5251,19 +5269,38 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src2, _.SubRegIdx)),
_.KRC)>;
def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx),
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx)),
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx),
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx)),
_.KRC)>;
}
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, AVX512VLVectorVTInfo _,
string Suffix> {
let Predicates = [HasAVX512] in
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
@ -5272,7 +5309,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins> {
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
avx512vl_i32_info, "D">;
@ -5281,41 +5318,41 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
PatFrag OpNode, OpndItins itins> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
}
}
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
SDNode OpNode, OpndItins itins> :
PatFrag OpNode, OpndItins itins> :
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
SSE_BIT_ITINS_P>, T8PD;
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
SSE_BIT_ITINS_P>, T8XS;

View File

@ -234,10 +234,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
SDTCisSameNumEltsAs<0, 1>]>;
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
@ -248,8 +244,6 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
def X86movmsk : SDNode<"X86ISD::MOVMSK",
SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;

View File

@ -7,8 +7,8 @@
define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_sext_v8i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@ -17,8 +17,8 @@ define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
;
; AVX512VL-LABEL: testv8i1_sext_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
@ -42,10 +42,9 @@ define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
@ -59,10 +58,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
;
; AVX512VL-LABEL: testv16i1_sext_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@ -92,10 +90,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
@ -106,10 +103,9 @@ define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
;
; AVX512VL-LABEL: testv16i1_sext_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
@ -137,8 +133,8 @@ define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_zext_v8i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vzeroupper
@ -146,8 +142,8 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
;
; AVX512VL-LABEL: testv8i1_zext_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
@ -170,10 +166,9 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: movl {{.*}}(%rip), %eax
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@ -188,10 +183,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
;
; AVX512VL-LABEL: testv16i1_zext_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@ -221,10 +215,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: movl {{.*}}(%rip), %eax
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@ -235,10 +228,9 @@ define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
;
; AVX512VL-LABEL: testv16i1_zext_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0

View File

@ -11,10 +11,9 @@
define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 x i32>* %b) {
; AVX256VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX256VL: # %bb.0:
; AVX256VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX256VL-NEXT: vmovdqa (%rsi), %ymm1
; AVX256VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
; AVX256VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
@ -45,10 +44,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
;
; AVX512VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
@ -61,10 +59,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
;
; AVX256VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX256VLBW: # %bb.0:
; AVX256VLBW-NEXT: vmovdqa (%rdi), %ymm0
; AVX256VLBW-NEXT: vmovdqa (%rsi), %ymm1
; AVX256VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX256VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k1
; AVX256VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX256VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX256VLBW-NEXT: vpmovm2w %k1, %ymm0
; AVX256VLBW-NEXT: vpmovm2w %k0, %ymm1
; AVX256VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
@ -76,10 +73,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
;
; AVX512VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VLBW-NEXT: vmovdqa (%rsi), %ymm1
; AVX512VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k2
; AVX512VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VLBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]