1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

[X86][AVX512] Improve lowering of AVX512 compare intrinsics (remove redundant shift left+right instructions).

AVX512 compare instructions return v*i1 types.
In cases where the number of elements in the returned value are less than 8, clang adds zeroes to get a mask of v8i1 type.
Later on it's replaced with CONCAT_VECTORS, which then is lowered to many DAG nodes including insert/extract element and shift right/left nodes.
The fact that AVX512 compare instructions put the result in a k register and zeroes all its upper bits allows us to remove the extra nodes simply by copying the result to the required register class.

When lowering, identify these cases and transform them into an INSERT_SUBVECTOR node (marked legal), then catch this pattern in instructions selection phase and transform it into one avx512 cmp instruction.

Differential Revision: https://reviews.llvm.org/D33188

llvm-svn: 305465
This commit is contained in:
Ayman Musa 2017-06-15 13:02:37 +00:00
parent 0107d0a6ae
commit 3898c6f400
6 changed files with 14204 additions and 74 deletions

View File

@ -5059,6 +5059,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
// Return true if the instruction zeroes the unused upper part of the
// destination and accepts mask.
static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
switch (Opcode) {
default:
return false;
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
return true;
}
}
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@ -5091,6 +5105,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// 3. Subvector should be inserted in the middle (for example v2i1
// to v16i1, index 2)
// If this node widens - by concatenating zeroes - the type of the result
// of a node with instruction that zeroes all upper (irrelevant) bits of the
// output register, mark this node as legal to enable replacing them with
// the v8i1 version of the previous instruction during instruction selection.
// For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
// while zeroing all the upper remaining 60 bits of the register. if the
// result of such instruction is inserted into an allZeroVector, then we can
// safely remove insert_vector (in instruction selection) as the cmp instr
// already zeroed the rest of the register.
if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
(isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
(SubVec.getOpcode() == ISD::AND &&
(isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
return Op;
// extend to natively supported kshift
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
MVT WideOpVT = OpVT;
@ -7913,6 +7943,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
// Return true if all the operands of the given CONCAT_VECTORS node are zeros
// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
static bool isExpandWithZeros(const SDValue &Op) {
assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
"Expand with zeros only possible in CONCAT_VECTORS nodes!");
for (unsigned i = 1; i < Op.getNumOperands(); i++)
if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
return false;
return true;
}
// Returns true if the given node is a type promotion (by concatenating i1
// zeros) of the result of a node that already zeros all upper bits of
// k-register.
static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
unsigned Opc = Op.getOpcode();
assert(Opc == ISD::CONCAT_VECTORS &&
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Unexpected node to check for type promotion!");
// As long as we are concatenating zeros to the upper part of a previous node
// result, climb up the tree until a node with different opcode is
// encountered
while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
if (Opc == ISD::INSERT_SUBVECTOR) {
if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
Op.getConstantOperandVal(2) == 0)
Op = Op.getOperand(1);
else
return SDValue();
} else { // Opc == ISD::CONCAT_VECTORS
if (isExpandWithZeros(Op))
Op = Op.getOperand(0);
else
return SDValue();
}
Opc = Op.getOpcode();
}
// Check if the first inserted node zeroes the upper bits, or an 'and' result
// of a node that zeros the upper bits (its masked version).
if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
(Op.getOpcode() == ISD::AND &&
(isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
return Op;
}
return SDValue();
}
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
@ -7923,6 +8007,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
// If this node promotes - by concatenating zeroes - the type of the result
// of a node with instruction that zeroes all upper (irrelevant) bits of the
// output register, mark it as legal and catch the pattern in instruction
// selection to avoid emitting extra insturctions (for zeroing upper bits).
if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
ZeroC);
}
SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
// Specialize the cases when all, or all but one, of the operands are undef.

View File

@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
ValueType _vt> {
RegisterClass KRC = _krc;
RegisterClass KRCWM = _krcwm;
ValueType KVT = _vt;
}
def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(COPY_TO_REGCLASS (VPCMPGTDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(COPY_TO_REGCLASS (VPCMPEQDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
_.RC:$src1, addr:$src2),
NewInf.KRC)>;
}
}
multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds>
: avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
_.RC:$src1, addr:$src2),
NewInf.KRC)>;
}
}
// VPCMPEQB - i8
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQBZ256", [HasBWI, HasVLX]>;
// VPCMPEQW - i16
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ", [HasBWI]>;
// VPCMPEQD - i32
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ", [HasAVX512]>;
// VPCMPEQQ - i64
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
// VPCMPGTB - i8
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTBZ256", [HasBWI, HasVLX]>;
// VPCMPGTW - i16
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ", [HasBWI]>;
// VPCMPGTD - i32
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ", [HasAVX512]>;
// VPCMPGTQ - i64
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
_.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))),
imm:$cc)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
_.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds>
: avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
imm:$cc)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
_.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
// VPCMPB - i8
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
"VPCMPBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
"VPCMPBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
"VPCMPBZ256", [HasBWI, HasVLX]>;
// VPCMPW - i16
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
"VPCMPWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
"VPCMPWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
"VPCMPWZ", [HasBWI]>;
// VPCMPD - i32
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
"VPCMPDZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
"VPCMPDZ", [HasAVX512]>;
// VPCMPQ - i64
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
// VPCMPUB - i8
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
"VPCMPUBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
"VPCMPUBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
"VPCMPUBZ256", [HasBWI, HasVLX]>;
// VPCMPUW - i16
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
"VPCMPUWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
"VPCMPUWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
"VPCMPUWZ", [HasBWI]>;
// VPCMPUD - i32
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
"VPCMPUDZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
"VPCMPUDZ", [HasAVX512]>;
// VPCMPUQ - i64
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VCMPPSZrri
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VPCMPDZrri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VPCMPUDZrri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
string InstrStr, list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
string InstrStr, list<Predicate> Preds>
: avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
let Predicates = Preds in
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
}
// VCMPPS - f32
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
[HasAVX512]>;
// VCMPPD - f64
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
[HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
(i8 8)), (i8 8))>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (and VK8:$mask,
(OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
(COPY_TO_REGCLASS VK8:$mask, VK16),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
(i8 8)), (i8 8))>;
}
multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
AVX512VLVectorVTInfo _> {
def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc),
(i8 8)), (i8 8))>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (and VK8:$mask,
(OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS VK8:$mask, VK16),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc),
(i8 8)), (i8 8))>;
}
let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
}
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in

View File

@ -1004,8 +1004,6 @@ define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1018,8 +1016,6 @@ define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1058,8 +1054,6 @@ define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1072,8 +1066,6 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1087,8 +1079,6 @@ define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1101,8 +1091,6 @@ define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1116,10 +1104,6 @@ define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1132,10 +1116,6 @@ define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1149,8 +1129,6 @@ define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1163,8 +1141,6 @@ define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1178,10 +1154,6 @@ define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -1194,10 +1166,6 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]

File diff suppressed because it is too large Load Diff

View File

@ -265,9 +265,7 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger)
; SKX: # BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1}
; SKX-NEXT: retq
;
@ -295,9 +293,7 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) {
; SKX: # BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1}
; SKX-NEXT: retq
;

View File

@ -462,9 +462,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
; SKX-NEXT: kshiftrw $14, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
@ -550,9 +548,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
; SKX-NEXT: kshiftrw $14, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
@ -601,9 +597,7 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
; SKX-NEXT: kshiftrw $14, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
@ -645,9 +639,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
; SKX-NEXT: kshiftrw $14, %k0, %k1
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer