mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Recommitting rL305465 after fixing bug in TableGen in rL306251 & rL306371
[X86][AVX512] Improve lowering of AVX512 compare intrinsics (remove redundant shift left+right instructions). AVX512 compare instructions return v*i1 types. In cases where the number of elements in the returned value are less than 8, clang adds zeroes to get a mask of v8i1 type. Later on it's replaced with CONCAT_VECTORS, which then is lowered to many DAG nodes including insert/extract element and shift right/left nodes. The fact that AVX512 compare instructions put the result in a k register and zeroes all its upper bits allows us to remove the extra nodes simply by copying the result to the required register class. When lowering, identify these cases and transform them into an INSERT_SUBVECTOR node (marked legal), then catch this pattern in instructions selection phase and transform it into one avx512 cmp instruction. Differential Revision: https://reviews.llvm.org/D33188 llvm-svn: 306402
This commit is contained in:
parent
ff222f7817
commit
dce4b8be7b
@ -5065,6 +5065,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
|
||||
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
|
||||
}
|
||||
|
||||
// Return true if the instruction zeroes the unused upper part of the
|
||||
// destination and accepts mask.
|
||||
static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86ISD::PCMPEQM:
|
||||
case X86ISD::PCMPGTM:
|
||||
case X86ISD::CMPM:
|
||||
case X86ISD::CMPMU:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert i1-subvector to i1-vector.
|
||||
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
@ -5097,6 +5111,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
// 3. Subvector should be inserted in the middle (for example v2i1
|
||||
// to v16i1, index 2)
|
||||
|
||||
// If this node widens - by concatenating zeroes - the type of the result
|
||||
// of a node with instruction that zeroes all upper (irrelevant) bits of the
|
||||
// output register, mark this node as legal to enable replacing them with
|
||||
// the v8i1 version of the previous instruction during instruction selection.
|
||||
// For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
|
||||
// while zeroing all the upper remaining 60 bits of the register. if the
|
||||
// result of such instruction is inserted into an allZeroVector, then we can
|
||||
// safely remove insert_vector (in instruction selection) as the cmp instr
|
||||
// already zeroed the rest of the register.
|
||||
if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
|
||||
(isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
|
||||
(SubVec.getOpcode() == ISD::AND &&
|
||||
(isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
|
||||
isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
|
||||
return Op;
|
||||
|
||||
// extend to natively supported kshift
|
||||
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
|
||||
MVT WideOpVT = OpVT;
|
||||
@ -7919,6 +7949,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
||||
return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
|
||||
}
|
||||
|
||||
// Return true if all the operands of the given CONCAT_VECTORS node are zeros
|
||||
// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
|
||||
static bool isExpandWithZeros(const SDValue &Op) {
|
||||
assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
"Expand with zeros only possible in CONCAT_VECTORS nodes!");
|
||||
|
||||
for (unsigned i = 1; i < Op.getNumOperands(); i++)
|
||||
if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if the given node is a type promotion (by concatenating i1
|
||||
// zeros) of the result of a node that already zeros all upper bits of
|
||||
// k-register.
|
||||
static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
|
||||
unsigned Opc = Op.getOpcode();
|
||||
|
||||
assert(Opc == ISD::CONCAT_VECTORS &&
|
||||
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
|
||||
"Unexpected node to check for type promotion!");
|
||||
|
||||
// As long as we are concatenating zeros to the upper part of a previous node
|
||||
// result, climb up the tree until a node with different opcode is
|
||||
// encountered
|
||||
while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
|
||||
if (Opc == ISD::INSERT_SUBVECTOR) {
|
||||
if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
|
||||
Op.getConstantOperandVal(2) == 0)
|
||||
Op = Op.getOperand(1);
|
||||
else
|
||||
return SDValue();
|
||||
} else { // Opc == ISD::CONCAT_VECTORS
|
||||
if (isExpandWithZeros(Op))
|
||||
Op = Op.getOperand(0);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
Opc = Op.getOpcode();
|
||||
}
|
||||
|
||||
// Check if the first inserted node zeroes the upper bits, or an 'and' result
|
||||
// of a node that zeros the upper bits (its masked version).
|
||||
if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
|
||||
(Op.getOpcode() == ISD::AND &&
|
||||
(isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
|
||||
isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
|
||||
return Op;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG & DAG) {
|
||||
@ -7929,6 +8013,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
|
||||
assert(isPowerOf2_32(NumOfOperands) &&
|
||||
"Unexpected number of operands in CONCAT_VECTORS");
|
||||
|
||||
// If this node promotes - by concatenating zeroes - the type of the result
|
||||
// of a node with instruction that zeroes all upper (irrelevant) bits of the
|
||||
// output register, mark it as legal and catch the pattern in instruction
|
||||
// selection to avoid emitting extra insturctions (for zeroing upper bits).
|
||||
if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
|
||||
SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
|
||||
SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
|
||||
ZeroC);
|
||||
}
|
||||
|
||||
SDValue Undef = DAG.getUNDEF(ResVT);
|
||||
if (NumOfOperands > 2) {
|
||||
// Specialize the cases when all, or all but one, of the operands are undef.
|
||||
|
@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
|
||||
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
|
||||
v2f64x_info>;
|
||||
|
||||
class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
|
||||
ValueType _vt> {
|
||||
RegisterClass KRC = _krc;
|
||||
RegisterClass KRCWM = _krcwm;
|
||||
ValueType KVT = _vt;
|
||||
}
|
||||
|
||||
def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
|
||||
def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
|
||||
def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
|
||||
def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
|
||||
def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
|
||||
def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
|
||||
|
||||
// This multiclass generates the masking variants from the non-masking
|
||||
// variant. It only provides the assembly pieces for the masking variants.
|
||||
// It assumes custom ISel patterns for masking which can be provided as
|
||||
@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
|
||||
avx512vl_i64_info, HasAVX512>,
|
||||
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(COPY_TO_REGCLASS (VPCMPGTDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
|
||||
|
||||
def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(COPY_TO_REGCLASS (VPCMPEQDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
|
||||
multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
SDNode OpNode, string InstrStr,
|
||||
list<Predicate> Preds> {
|
||||
let Predicates = Preds in {
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and _.KRCWM:$mask,
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
|
||||
_.RC:$src1, _.RC:$src2),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and (_.KVT _.KRCWM:$mask),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert
|
||||
(_.LdFrag addr:$src2))))))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
|
||||
_.RC:$src1, addr:$src2),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
SDNode OpNode, string InstrStr,
|
||||
list<Predicate> Preds>
|
||||
: avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
|
||||
let Predicates = Preds in {
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and (_.KVT _.KRCWM:$mask),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2)))))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
|
||||
_.RC:$src1, addr:$src2),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
// VPCMPEQB - i8
|
||||
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQBZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
// VPCMPEQW - i16
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQWZ", [HasBWI]>;
|
||||
|
||||
// VPCMPEQD - i32
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQDZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPEQQ - i64
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
|
||||
"VPCMPEQQZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPGTB - i8
|
||||
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTBZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
// VPCMPGTW - i16
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTWZ", [HasBWI]>;
|
||||
|
||||
// VPCMPGTD - i32
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTDZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPGTQ - i64
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
|
||||
"VPCMPGTQZ", [HasAVX512]>;
|
||||
|
||||
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
|
||||
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
|
||||
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
SDNode OpNode, string InstrStr,
|
||||
list<Predicate> Preds> {
|
||||
let Predicates = Preds in {
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
|
||||
_.RC:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2))),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and _.KRCWM:$mask,
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
imm:$cc))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
|
||||
_.RC:$src1,
|
||||
_.RC:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and (_.KVT _.KRCWM:$mask),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert
|
||||
(_.LdFrag addr:$src2))),
|
||||
imm:$cc)))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
|
||||
_.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
SDNode OpNode, string InstrStr,
|
||||
list<Predicate> Preds>
|
||||
: avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
|
||||
let Predicates = Preds in {
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (and (_.KVT _.KRCWM:$mask),
|
||||
(_.KVT (OpNode (_.VT _.RC:$src1),
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2)),
|
||||
imm:$cc)))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
|
||||
_.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
// VPCMPB - i8
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPBZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPBZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPBZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
// VPCMPW - i16
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
|
||||
"VPCMPWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPWZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPWZ256", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPWZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
|
||||
"VPCMPWZ", [HasBWI]>;
|
||||
|
||||
// VPCMPD - i32
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
|
||||
"VPCMPDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
|
||||
"VPCMPDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPDZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
|
||||
"VPCMPDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPDZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
|
||||
"VPCMPDZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
|
||||
"VPCMPDZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPQ - i64
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
|
||||
"VPCMPQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
|
||||
"VPCMPQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
|
||||
"VPCMPQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPQZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
|
||||
"VPCMPQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
|
||||
"VPCMPQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
|
||||
"VPCMPQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
|
||||
"VPCMPQZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
|
||||
"VPCMPQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
|
||||
"VPCMPQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
|
||||
"VPCMPQZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPUB - i8
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUBZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUBZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUBZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
// VPCMPUW - i16
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUWZ128", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUWZ128", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUWZ256", [HasBWI, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUWZ256", [HasBWI, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUWZ", [HasBWI]>;
|
||||
|
||||
// VPCMPUD - i32
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
|
||||
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUDZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUDZ", [HasAVX512]>;
|
||||
|
||||
// VPCMPUQ - i64
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
|
||||
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
|
||||
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
|
||||
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
|
||||
"VPCMPUQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
|
||||
"VPCMPUQZ", [HasAVX512]>;
|
||||
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
|
||||
"VPCMPUQZ", [HasAVX512]>;
|
||||
|
||||
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
|
||||
|
||||
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
|
||||
@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
|
||||
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
|
||||
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrri
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VPCMPDZrri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VPCMPUDZrri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
string InstrStr, list<Predicate> Preds> {
|
||||
let Predicates = Preds in {
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (X86cmpm (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
|
||||
_.RC:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (X86cmpm (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2))),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (X86cmpm (_.VT _.RC:$src1),
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
|
||||
imm:$cc)),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
|
||||
addr:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
|
||||
string InstrStr, list<Predicate> Preds>
|
||||
: avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
|
||||
|
||||
let Predicates = Preds in
|
||||
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
|
||||
(_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
imm:$cc,
|
||||
(i32 FROUND_NO_EXC))),
|
||||
(i64 0)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
|
||||
_.RC:$src2,
|
||||
imm:$cc),
|
||||
NewInf.KRC)>;
|
||||
}
|
||||
|
||||
|
||||
// VCMPPS - f32
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
|
||||
[HasAVX512]>;
|
||||
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
|
||||
[HasAVX512]>;
|
||||
|
||||
// VCMPPD - f64
|
||||
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
|
||||
[HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
|
||||
[HasAVX512, HasVLX]>;
|
||||
|
||||
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
|
||||
[HasAVX512]>;
|
||||
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
|
||||
[HasAVX512]>;
|
||||
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
|
||||
[HasAVX512]>;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// FPClass
|
||||
@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
|
||||
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
|
||||
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
|
||||
|
||||
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
|
||||
def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
|
||||
|
||||
def : Pat<(insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(i64 0)),
|
||||
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
(i8 8)), (i8 8))>;
|
||||
|
||||
def : Pat<(insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 (and VK8:$mask,
|
||||
(OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
|
||||
(i64 0)),
|
||||
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
|
||||
(COPY_TO_REGCLASS VK8:$mask, VK16),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
(i8 8)), (i8 8))>;
|
||||
}
|
||||
|
||||
multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
|
||||
AVX512VLVectorVTInfo _> {
|
||||
def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
|
||||
def : Pat<(insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
|
||||
(i64 0)),
|
||||
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc),
|
||||
(i8 8)), (i8 8))>;
|
||||
|
||||
def : Pat<(insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 (and VK8:$mask,
|
||||
(OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
|
||||
(i64 0)),
|
||||
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
|
||||
(COPY_TO_REGCLASS VK8:$mask, VK16),
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc),
|
||||
(i8 8)), (i8 8))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
|
||||
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
|
||||
|
||||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
|
||||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
|
||||
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
|
||||
}
|
||||
|
||||
// Mask setting all 0s or 1s
|
||||
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
|
||||
let Predicates = [HasAVX512] in
|
||||
|
@ -1004,8 +1004,6 @@ define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1018,8 +1016,6 @@ define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1058,8 +1054,6 @@ define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1072,8 +1066,6 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1087,8 +1079,6 @@ define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1101,8 +1091,6 @@ define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1116,10 +1104,6 @@ define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1132,10 +1116,6 @@ define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1149,8 +1129,6 @@ define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1163,8 +1141,6 @@ define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1178,10 +1154,6 @@ define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -1194,10 +1166,6 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
@ -5164,23 +5132,11 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_cmp_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k4 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5219,43 +5175,31 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_cmp_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x1f,0xf0,0x02]
|
||||
; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x29,0xc1]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k7 {%k6} ## encoding: [0x62,0xf2,0xf5,0x2e,0x37,0xf8]
|
||||
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k4 {%k6} ## encoding: [0x62,0xf3,0xf5,0x2e,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x37,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
|
||||
@ -5283,23 +5227,11 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_ucmp_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5338,43 +5270,31 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_ucmp_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05]
|
||||
; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x29,0xc1]
|
||||
; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k7 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xf9,0x01]
|
||||
; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k1 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
|
||||
@ -5402,23 +5322,11 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_cmp_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k4 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5457,43 +5365,31 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_cmp_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x1f,0xf0,0x02]
|
||||
; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x76,0xc1]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k7 {%k6} ## encoding: [0x62,0xf1,0x75,0x0e,0x66,0xf8]
|
||||
; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k1 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k4 {%k6} ## encoding: [0x62,0xf3,0x75,0x0e,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x66,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf9]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
|
||||
@ -5521,23 +5417,11 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_ucmp_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5576,43 +5460,31 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_ucmp_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05]
|
||||
; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x76,0xc1]
|
||||
; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k7 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xf9,0x01]
|
||||
; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k1 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
|
||||
@ -5640,35 +5512,11 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK-LABEL: test_cmp_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
|
||||
; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k4 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5707,57 +5555,33 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_cmp_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0]
|
||||
; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x1f,0xf0,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf9]
|
||||
; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x29,0xc1]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k6 {%k7} ## encoding: [0x62,0xf2,0xf5,0x0f,0x37,0xf0]
|
||||
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k4 {%k7} ## encoding: [0x62,0xf3,0xf5,0x0f,0x1f,0xe0,0x02]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x37,0xe9]
|
||||
; CHECK-NEXT: kshiftlw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
|
||||
@ -5785,35 +5609,11 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK-LABEL: test_ucmp_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
|
||||
; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: kshiftlw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
|
||||
@ -5852,57 +5652,33 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_ucmp_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
|
||||
; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
|
||||
; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
|
||||
; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
|
||||
; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
|
||||
; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
|
||||
; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
|
||||
; CHECK-NEXT: kshiftlw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
|
||||
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
|
||||
; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x29,0xc1]
|
||||
; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf1,0x01]
|
||||
; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc9,0x02]
|
||||
; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
|
||||
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04]
|
||||
; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe1,0x05]
|
||||
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe9,0x06]
|
||||
; CHECK-NEXT: kshiftlw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
|
||||
; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
|
||||
; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
|
||||
; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
|
||||
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
|
||||
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
|
||||
|
13485
test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
Normal file
13485
test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -265,9 +265,7 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger)
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
|
||||
; SKX-NEXT: kshiftlb $6, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $6, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
|
||||
; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
@ -295,9 +293,7 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) {
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
|
||||
; SKX-NEXT: kshiftlb $6, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $6, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
|
||||
; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
|
@ -462,9 +462,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $14, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
@ -550,9 +548,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $14, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
@ -601,9 +597,7 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $14, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
|
||||
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
@ -645,9 +639,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $14, %k0, %k1
|
||||
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
|
Loading…
x
Reference in New Issue
Block a user