1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

AVX-512: Added all forms of FP compare instructions for KNL and SKX.

Added intrinsics for the instructions. CC parameter of the intrinsics was changed from i8 to i32 according to the spec.

By Igor Breger (igor.breger@intel.com)

llvm-svn: 236714
This commit is contained in:
Elena Demikhovsky 2015-05-07 11:24:42 +00:00
parent e49117b828
commit 28f6bb84a5
17 changed files with 1345 additions and 317 deletions

View File

@ -4183,13 +4183,33 @@ let TargetPrefix = "x86" in {
}
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
def int_x86_avx512_mask_cmp_ps_512 :
GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_pd_512 :
GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_ps_256 :
GCCBuiltin<"__builtin_ia32_cmpps256_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_pd_256 :
GCCBuiltin<"__builtin_ia32_cmppd256_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_ps_128 :
GCCBuiltin<"__builtin_ia32_cmpps128_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_cmp_pd_128 :
GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_movntdqa :
GCCBuiltin<"__builtin_ia32_movntdqa512">,
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}

View File

@ -210,14 +210,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name == "x86.avx2.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
if (Name == "x86.avx512.mask.cmp.ps.512")
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
NewFn);
if (Name == "x86.avx512.mask.cmp.pd.512")
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
NewFn);
if (Name == "x86.avx512.mask.cmp.b.512")
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
NewFn);
@ -799,21 +791,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
}
case Intrinsic::x86_avx512_mask_cmp_ps_512:
case Intrinsic::x86_avx512_mask_cmp_pd_512: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
// Replace the last argument with a trunc.
Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
CallInst *NewCall = Builder.CreateCall(NewFn, Args);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
return;
}
}
}

View File

@ -1414,7 +1414,8 @@ std::unique_ptr<X86Operand>
X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
consumeToken(); // Eat "{"
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
@ -1436,6 +1437,13 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
MCConstantExpr::Create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
}
if(Tok.getIdentifier().equals("sae")){
Parser.Lex(); // Eat the sae
if (!getLexer().is(AsmToken::RCurly))
return ErrorOperand(Tok.getLoc(), "Expected } at this point");
Parser.Lex(); // Eat "}"
return X86Operand::CreateToken("{sae}", consumedToken);
}
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
}
/// ParseIntelMemOperand - Parse intel style memory operand.

View File

@ -14926,12 +14926,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask.getValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
SDValue CC = Op.getOperand(3);
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
if (IntrData->Opc1 != 0) {
SDValue Rnd = Op.getOperand(5);
if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
X86::STATIC_ROUNDING::CUR_DIRECTION)
Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC, Rnd);
}
//default rounding mode
if(!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC);
} else {
assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2));
Op.getOperand(2));
}
SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,

View File

@ -308,6 +308,8 @@ namespace llvm {
/// integer signed and unsigned data types.
CMPM,
CMPMU,
// Vector comparison with rounding mode for FP values
CMPM_RND,
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,

View File

@ -305,8 +305,8 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
Pattern, itin>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
"$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#Round#"}",
MaskingPattern, itin>, EVEX_K;
}
@ -335,6 +335,14 @@ multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
(and _.KRCWM:$mask, RHS),
Round, itin>;
multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm> :
AVX512_maskable_custom_cmp<O, F, Outs,
Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[],[],"", NoItinerary>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
@ -1590,53 +1598,97 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
// avx512_cmp_packed - compare packed instructions
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
X86MemOperand x86memop, ValueType vt,
string suffix, Domain d> {
def rri : AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
let hasSideEffects = 0 in
def rrib: AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
"\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
[], d>, EVEX_B;
def rmi : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set KRC:$dst,
(X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>;
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,EVEX_B;
}
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
!strconcat("vcmp", suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
!strconcat("vcmp", suffix,
"\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"),
[], d>, EVEX_B;
let mayLoad = 1 in
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
!strconcat("vcmp", suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">;
let mayLoad = 1 in {
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
}
}
}
multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2,{sae}",
(X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))>, EVEX_B;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc,{sae}, $src2, $src1",
"$src1, $src2,{sae}, $cc">, EVEX_B;
}
}
multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcmp_common<_.info512>,
avx512_vcmp_sae<_.info512>, EVEX_V512;
}
let Predicates = [HasAVX512,HasVLX] in {
defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
}
}
defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
"ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
"pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
EVEX_CD8<64, CD8VF>;
defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (VCMPPSZrri
@ -1654,30 +1706,7 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
FROUND_NO_EXC)),
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR16)>;
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
(v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
FROUND_NO_EXC)),
(COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR8)>;
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
FROUND_CURRENT)),
(COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR16)>;
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
(v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
FROUND_CURRENT)),
(COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR8)>;
//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers

View File

@ -747,6 +747,14 @@ class AVX512BIi8Base : PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512PSIi8Base : PS {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
}
class AVX512PDIi8Base : PD {
Domain ExeDomain = SSEPackedDouble;
ImmType ImmT = Imm8;
}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,

View File

@ -147,14 +147,21 @@ def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
def X86CmpMaskCCRound :
SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
SDTCisInt<4>]>;
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,

View File

@ -277,18 +277,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
X86ISD::CMPM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
X86ISD::CMPM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,

View File

@ -392,17 +392,17 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8)
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32)
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4)
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32)
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
; cvt intrinsics
define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {

View File

@ -1,36 +1,37 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test1:
; KNL: ## BB#0:
; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test2:
; KNL: ## BB#0:
; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test3:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -38,98 +39,120 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test4_unsigned:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test4_unsigned:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test5:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
; CHECK-LABEL: test6_unsigned:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test6_unsigned:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
; KNL-LABEL: test7:
; KNL: ## BB#0:
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; SKX-LABEL: test7:
; SKX: ## BB#0:
; SKX: vxorps %xmm2, %xmm2, %xmm2
; SKX: vcmpltps %xmm2, %xmm0, %k1
; SKX: vmovaps %xmm0, %xmm1 {%k1}
; SKX: vmovaps %zmm1, %zmm0
; SKX: retq
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
ret <4 x float>%c
}
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
; KNL-LABEL: test8:
; KNL: ## BB#0:
; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; SKX-LABEL: test8:
; SKX: ## BB#0:
; SKX: vxorpd %xmm2, %xmm2, %xmm2
; SKX: vcmpltpd %xmm2, %xmm0, %k1
; SKX: vmovapd %xmm0, %xmm1 {%k1}
; SKX: vmovaps %zmm1, %zmm0
; SKX: retq
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
}
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: test9:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; CHECK-NEXT: retq
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; CHECK-LABEL: test10:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; CHECK-NEXT: retq
; KNL-LABEL: test10:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
; SKX-LABEL: test10:
; SKX: ## BB#0:
; SKX: vcmpeqps %ymm1, %ymm0, %k1
; SKX: vmovaps %ymm0, %ymm1 {%k1}
; SKX: vmovaps %zmm1, %zmm0
; SKX: retq
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
ret <8 x float> %max
}
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: test11_unsigned:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
; KNL-LABEL: test11_unsigned:
; KNL: ## BB#0:
; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; KNL-NEXT: retq
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@ -137,25 +160,25 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
; CHECK-NEXT: retq
; KNL-LABEL: test12:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
; CHECK-LABEL: test13:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; KNL-LABEL: test13:
; KNL: ## BB#0:
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: retq
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@ -163,14 +186,14 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
; CHECK-LABEL: test14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: knotw %k0, %k1
; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; KNL-LABEL: test14:
; KNL: ## BB#0:
; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: knotw %k0, %k1
; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@ -180,14 +203,14 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
; CHECK-LABEL: test15:
; CHECK: ## BB#0:
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: knotw %k0, %k1
; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; KNL-LABEL: test15:
; KNL: ## BB#0:
; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: knotw %k0, %k1
; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@ -197,24 +220,24 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test16:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test17:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -222,12 +245,12 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test18:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -235,12 +258,12 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test19:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -248,13 +271,13 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test20:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test20:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@ -263,13 +286,13 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test21:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test21:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
; KNL-NEXT: vmovaps %zmm2, %zmm0
; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@ -278,13 +301,13 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
}
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test22:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test22:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
@ -294,13 +317,13 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
}
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test23:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test23:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
@ -310,12 +333,12 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-LABEL: test24:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test24:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@ -325,12 +348,12 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
}
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test25:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@ -340,13 +363,13 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
}
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test26:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test26:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@ -358,13 +381,13 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
}
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test27:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
; KNL-LABEL: test27:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@ -375,10 +398,10 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
; CHECK-LABEL: test28
; CHECK: vpcmpgtq
; CHECK: vpcmpgtq
; CHECK: kxorw
; KNL-LABEL: test28
; KNL: vpcmpgtq
; KNL: vpcmpgtq
; KNL: kxorw
define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
@ -387,14 +410,188 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
ret <8 x i32> %resse
}
; CHECK-LABEL: test29
; CHECK: vpcmpgtd
; CHECK: vpcmpgtd
; CHECK: kxnorw
; KNL-LABEL: test29
; KNL: vpcmpgtd
; KNL: vpcmpgtd
; KNL: kxnorw
define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <16 x i1>%res to <16 x i8>
ret <16 x i8> %resse
}
}
define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; SKX-LABEL: test30:
; SKX: vcmpeqpd %ymm1, %ymm0, %k1
; SKX: vmovapd %ymm0, %ymm1 {%k1}
%mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
ret <4 x double> %max
}
define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
; SKX-LABEL: test31:
; SKX: vcmpltpd (%rdi), %xmm0, %k1
; SKX: vmovapd %xmm0, %xmm1 {%k1}
%y = load <2 x double>, <2 x double>* %yp, align 4
%mask = fcmp olt <2 x double> %x, %y
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
; SKX-LABEL: test32:
; SKX: vcmpltpd (%rdi), %ymm0, %k1
; SKX: vmovapd %ymm0, %ymm1 {%k1}
%y = load <4 x double>, <4 x double>* %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
; SKX-LABEL: test33:
; SKX: vcmpltpd (%rdi), %zmm0, %k1
; SKX: vmovapd %zmm0, %zmm1 {%k1}
%y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
; SKX-LABEL: test34:
; SKX: vcmpltps (%rdi), %xmm0, %k1
; SKX: vmovaps %xmm0, %xmm1 {%k1}
%y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
; SKX-LABEL: test35:
; SKX: vcmpltps (%rdi), %ymm0, %k1
; SKX: vmovaps %ymm0, %ymm1 {%k1}
%y = load <8 x float>, <8 x float>* %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
; SKX-LABEL: test36:
; SKX: vcmpltps (%rdi), %zmm0, %k1
; SKX: vmovaps %zmm0, %zmm1 {%k1}
%y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
; SKX-LABEL: test37:
; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
; SKX: vmovapd %zmm0, %zmm1 {%k1}
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask = fcmp ogt <8 x double> %shuffle, %x
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
; SKX-LABEL: test38:
; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
; SKX: vmovapd %ymm0, %ymm1 {%k1}
%a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
%mask = fcmp ogt <4 x double> %shuffle, %x
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
; SKX-LABEL: test39:
; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
; SKX: vmovapd %xmm0, %xmm1 {%k1}
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%mask = fcmp ogt <2 x double> %shuffle, %x
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
; SKX-LABEL: test40:
; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
; SKX: vmovaps %zmm0, %zmm1 {%k1}
%a = load float, float* %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
%shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <16 x float> %shuffle, %x
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
; SKX-LABEL: test41:
; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
; SKX: vmovaps %ymm0, %ymm1 {%k1}
%a = load float, float* %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
%shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <8 x float> %shuffle, %x
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
; SKX-LABEL: test42:
; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
; SKX: vmovaps %xmm0, %xmm1 {%k1}
%a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <4 x float> %shuffle, %x
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
; SKX-LABEL: test43:
; SKX: vpmovw2m %xmm2, %k1
; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
; SKX: vmovapd %zmm0, %zmm1 {%k1}
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask_cmp = fcmp ogt <8 x double> %shuffle, %x
%mask = and <8 x i1> %mask_cmp, %mask_in
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}

View File

@ -2261,3 +2261,31 @@ define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8
}
declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
%res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
%res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)

View File

@ -137,5 +137,5 @@
# CHECK: vpcmpd $8, %zmm10, %zmm25, %k5
0x62 0xd3 0x35 0x40 0x1f 0xea 0x8
# CHECK: vcmppd {sae}, $127, %zmm27, %zmm11, %k4
# CHECK: vcmppd $127,{sae}, %zmm27, %zmm11, %k4
0x62 0x91 0xa5 0x58 0xc2 0xe3 0x7f

View File

@ -5983,3 +5983,132 @@ vpermilps 0x400(%rbx), %zmm2, %zmm3
// CHECK: vpermilpd
// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x05,0x53,0x10,0x23]
vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: vcmppd $171, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
vcmppd $0xab, %zmm26, %zmm12, %k2
// CHECK: vcmppd $171, %zmm26, %zmm12, %k2 {%k3}
// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
vcmppd $0xab, %zmm26, %zmm12, %k2 {%k3}
// CHECK: vcmppd $171,{sae}, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
vcmppd $0xab,{sae}, %zmm26, %zmm12, %k2
// CHECK: vcmppd $123, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
vcmppd $0x7b, %zmm26, %zmm12, %k2
// CHECK: vcmppd $123,{sae}, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
vcmppd $0x7b,{sae}, %zmm26, %zmm12, %k2
// CHECK: vcmppd $123, (%rcx), %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
vcmppd $0x7b, (%rcx), %zmm12, %k2
// CHECK: vcmppd $123, 291(%rax,%r14,8), %zmm12, %k2
// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd $0x7b, 291(%rax,%r14,8), %zmm12, %k2
// CHECK: vcmppd $123, (%rcx){1to8}, %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
vcmppd $0x7b, (%rcx){1to8}, %zmm12, %k2
// CHECK: vcmppd $123, 8128(%rdx), %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
vcmppd $0x7b, 8128(%rdx), %zmm12, %k2
// CHECK: vcmppd $123, 8192(%rdx), %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmppd $0x7b, 8192(%rdx), %zmm12, %k2
// CHECK: vcmppd $123, -8192(%rdx), %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
vcmppd $0x7b, -8192(%rdx), %zmm12, %k2
// CHECK: vcmppd $123, -8256(%rdx), %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmppd $0x7b, -8256(%rdx), %zmm12, %k2
// CHECK: vcmppd $123, 1016(%rdx){1to8}, %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
vcmppd $0x7b, 1016(%rdx){1to8}, %zmm12, %k2
// CHECK: vcmppd $123, 1024(%rdx){1to8}, %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
vcmppd $0x7b, 1024(%rdx){1to8}, %zmm12, %k2
// CHECK: vcmppd $123, -1024(%rdx){1to8}, %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
vcmppd $0x7b, -1024(%rdx){1to8}, %zmm12, %k2
// CHECK: vcmppd $123, -1032(%rdx){1to8}, %zmm12, %k2
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd $0x7b, -1032(%rdx){1to8}, %zmm12, %k2
// CHECK: vcmpps $171, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
vcmpps $0xab, %zmm22, %zmm17, %k2
// CHECK: vcmpps $171, %zmm22, %zmm17, %k2 {%k3}
// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
vcmpps $0xab, %zmm22, %zmm17, %k2 {%k3}
// CHECK: vcmpps $171,{sae}, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
vcmpps $0xab,{sae}, %zmm22, %zmm17, %k2
// CHECK: vcmpps $123, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
vcmpps $0x7b, %zmm22, %zmm17, %k2
// CHECK: vcmpps $123,{sae}, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
vcmpps $0x7b,{sae}, %zmm22, %zmm17, %k2
// CHECK: vcmpps $123, (%rcx), %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
vcmpps $0x7b, (%rcx), %zmm17, %k2
// CHECK: vcmpps $123, 291(%rax,%r14,8), %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps $0x7b, 291(%rax,%r14,8), %zmm17, %k2
// CHECK: vcmpps $123, (%rcx){1to16}, %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
vcmpps $0x7b, (%rcx){1to16}, %zmm17, %k2
// CHECK: vcmpps $123, 8128(%rdx), %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
vcmpps $0x7b, 8128(%rdx), %zmm17, %k2
// CHECK: vcmpps $123, 8192(%rdx), %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmpps $0x7b, 8192(%rdx), %zmm17, %k2
// CHECK: vcmpps $123, -8192(%rdx), %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
vcmpps $0x7b, -8192(%rdx), %zmm17, %k2
// CHECK: vcmpps $123, -8256(%rdx), %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmpps $0x7b, -8256(%rdx), %zmm17, %k2
// CHECK: vcmpps $123, 508(%rdx){1to16}, %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
vcmpps $0x7b, 508(%rdx){1to16}, %zmm17, %k2
// CHECK: vcmpps $123, 512(%rdx){1to16}, %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
vcmpps $0x7b, 512(%rdx){1to16}, %zmm17, %k2
// CHECK: vcmpps $123, -512(%rdx){1to16}, %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
vcmpps $0x7b, -512(%rdx){1to16}, %zmm17, %k2
// CHECK: vcmpps $123, -516(%rdx){1to16}, %zmm17, %k2
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps $0x7b, -516(%rdx){1to16}, %zmm17, %k2

View File

@ -1,34 +1,175 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -mcpu=knl --show-encoding %s | FileCheck %s
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 -mcpu=knl --show-encoding %s | FileCheck %s
// CHECK: vaddps (%rax), %zmm1, %zmm1
// CHECK: vaddps zmm1 , zmm1, zmmword ptr [rax]
// CHECK: encoding: [0x62,0xf1,0x74,0x48,0x58,0x08]
vaddps zmm1, zmm1, zmmword ptr [rax]
// CHECK: vaddpd %zmm2, %zmm1, %zmm1
// CHECK: vaddpd zmm1 , zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
vaddpd zmm1,zmm1,zmm2
// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5}
// CHECK: vaddpd zmm1 {k5}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x4d,0x58,0xca]
vaddpd zmm1{k5},zmm1,zmm2
// CHECK: vaddpd %zmm2, %zmm1, %zmm1 {%k5} {z}
// CHECK: vaddpd zmm1 {k5} {z}, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0xcd,0x58,0xca]
vaddpd zmm1{k5} {z},zmm1,zmm2
// CHECK: vaddpd {rn-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vaddpd zmm1 , zmm1, zmm2, {rn-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x18,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rn-sae}
// CHECK: vaddpd {ru-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vaddpd zmm1 , zmm1, zmm2, {ru-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x58,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{ru-sae}
// CHECK: vaddpd {rd-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vaddpd zmm1 , zmm1, zmm2, {rd-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x38,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rd-sae}
// CHECK: vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vaddpd zmm1 , zmm1, zmm2, {rz-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x78,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: vcmppd k2 , zmm12, zmm26, 171
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
vcmppd k2,zmm12,zmm26,0xab
// CHECK: vcmppd k2 {k3}, zmm12, zmm26, 171
// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
vcmppd k2{k3},zmm12,zmm26,0xab
// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 171
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
vcmppd k2,zmm12,zmm26,{sae},0xab
// CHECK: vcmppd k2 , zmm12, zmm26, 123
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
vcmppd k2 ,zmm12,zmm26,0x7b
// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 123
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
vcmppd k2,zmm12,zmm26,{sae},0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
vcmppd k2,zmm12,zmmword PTR [rcx],0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k2 ,zmm12,zmmword PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmppd k2 , zmm12, qword ptr [rcx]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
vcmppd k2,zmm12,QWORD PTR [rcx]{1to8},0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8128], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx+0x1fc0],0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8192], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx+0x2000],0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8192], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx-0x2000],0x7b
// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8256], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx-0x2040],0x7b
// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1016]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx+0x3f8]{1to8},0x7b
// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1024]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx+0x400]{1to8},0x7b
// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1024]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx-0x400]{1to8},0x7b
// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1032]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx-0x408]{1to8},0x7b
// CHECK: vcmpps k2 , zmm17, zmm22, 171
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
vcmpps k2,zmm17,zmm22,0xab
// CHECK: vcmpps k2 {k3}, zmm17, zmm22, 171
// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
vcmpps k2{k3},zmm17,zmm22,0xab
// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 171
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
vcmpps k2,zmm17,zmm22,{sae},0xab
// CHECK: vcmpps k2 , zmm17, zmm22, 123
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
vcmpps k2,zmm17,zmm22,0x7b
// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 123
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
vcmpps k2,zmm17,zmm22,{sae},0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
vcmpps k2,zmm17,zmmword PTR [rcx],0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k2,zmm17,zmmword PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmpps k2 , zmm17, dword ptr [rcx]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
vcmpps k2,zmm17,DWORD PTR [rcx]{1to16},0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8128], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx+0x1fc0],0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8192], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx+0x2000],0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8192], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx-0x2000],0x7b
// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8256], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx-0x2040],0x7b
// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 508]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx+0x1fc]{1to16},0x7b
// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 512]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx+0x200]{1to16},0x7b
// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 512]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx-0x200]{1to16},0x7b
// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 516]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b

View File

@ -0,0 +1,225 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
// CHECK: vcmppd k3 , xmm27, xmm23, 171
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
vcmppd k3,xmm27,xmm23,0xab
// CHECK: vcmppd k3 {k5}, xmm27, xmm23, 171
// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
vcmppd k3{k5},xmm27,xmm23,0xab
// CHECK: vcmppd k3 , xmm27, xmm23, 123
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
vcmppd k3,xmm27,xmm23,0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rcx],0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmppd k3 , xmm27, qword ptr [rcx]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
vcmppd k3,xmm27,QWORD PTR [rcx]{1to2},0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2032], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx+0x7f0],0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2048], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx+0x800],0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2048], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx-0x800],0x7b
// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2064], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx-0x810],0x7b
// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1016]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx+0x3f8]{1to2},0x7b
// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1024]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx+0x400]{1to2},0x7b
// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1024]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx-0x400]{1to2},0x7b
// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1032]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx-0x408]{1to2},0x7b
// CHECK: vcmppd k4 , ymm17, ymm27, 171
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
vcmppd k4,ymm17,ymm27,0xab
// CHECK: vcmppd k4 {k7}, ymm17, ymm27, 171
// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
vcmppd k4{k7},ymm17,ymm27,0xab
// CHECK: vcmppd k4 , ymm17, ymm27, 123
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
vcmppd k4,ymm17,ymm27,0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rcx],0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmppd k4 , ymm17, qword ptr [rcx]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
vcmppd k4,ymm17,QWORD PTR [rcx]{1to4},0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4064], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx+0xfe0],0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4096], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx+0x1000],0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4096], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1000],0x7b
// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4128], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1020],0x7b
// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1016]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx+0x3f8]{1to4},0x7b
// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1024]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx+0x400]{1to4},0x7b
// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1024]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx-0x400]{1to4},0x7b
// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1032]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx-0x408]{1to4},0x7b
// CHECK: vcmpps k4 , xmm29, xmm28, 171
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
vcmpps k4,xmm29,xmm28,0xab
// CHECK: vcmpps k4 {k2}, xmm29, xmm28, 171
// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
vcmpps k4{k2},xmm29,xmm28,0xab
// CHECK: vcmpps k4 , xmm29, xmm28, 123
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
vcmpps k4,xmm29,xmm28,0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rcx],0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmpps k4 , xmm29, dword ptr [rcx]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
vcmpps k4,xmm29,DWORD PTR [rcx]{1to4},0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2032], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx+0x7f0],0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2048], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx+0x800],0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2048], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx-0x800],0x7b
// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2064], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx-0x810],0x7b
// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 508]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx+0x1fc]{1to4},0x7b
// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 512]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx+0x200]{1to4},0x7b
// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 512]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx-0x200]{1to4},0x7b
// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 516]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx-0x204]{1to4},0x7b
// CHECK: vcmpps k4 , ymm19, ymm18, 171
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
vcmpps k4,ymm19,ymm18,0xab
// CHECK: vcmpps k4 {k1}, ymm19, ymm18, 171
// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
vcmpps k4{k1},ymm19,ymm18,0xab
// CHECK: vcmpps k4 , ymm19, ymm18, 123
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
vcmpps k4,ymm19,ymm18,0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rcx],0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rax+r14*8+0x123],0x7b
// CHECK: vcmpps k4 , ymm19, dword ptr [rcx]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
vcmpps k4,ymm19,DWORD PTR [rcx]{1to8},0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4064], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx+0xfe0],0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4096], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx+0x1000],0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4096], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1000],0x7b
// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4128], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1020],0x7b
// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 508]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx+0x1fc]{1to8},0x7b
// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 512]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx+0x200]{1to8},0x7b
// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 512]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx-0x200]{1to8},0x7b
// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 516]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx-0x204]{1to8},0x7b

View File

@ -9179,3 +9179,237 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff]
vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
// CHECK: vcmppd $171, %xmm23, %xmm27, %k3
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
vcmppd $0xab, %xmm23, %xmm27, %k3
// CHECK: vcmppd $171, %xmm23, %xmm27, %k3 {%k5}
// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
vcmppd $0xab, %xmm23, %xmm27, %k3 {%k5}
// CHECK: vcmppd $123, %xmm23, %xmm27, %k3
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
vcmppd $0x7b, %xmm23, %xmm27, %k3
// CHECK: vcmppd $123, (%rcx), %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
vcmppd $0x7b, (%rcx), %xmm27, %k3
// CHECK: vcmppd $123, 291(%rax,%r14,8), %xmm27, %k3
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd $0x7b, 291(%rax,%r14,8), %xmm27, %k3
// CHECK: vcmppd $123, (%rcx){1to2}, %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
vcmppd $0x7b, (%rcx){1to2}, %xmm27, %k3
// CHECK: vcmppd $123, 2032(%rdx), %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
vcmppd $0x7b, 2032(%rdx), %xmm27, %k3
// CHECK: vcmppd $123, 2048(%rdx), %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
vcmppd $0x7b, 2048(%rdx), %xmm27, %k3
// CHECK: vcmppd $123, -2048(%rdx), %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
vcmppd $0x7b, -2048(%rdx), %xmm27, %k3
// CHECK: vcmppd $123, -2064(%rdx), %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vcmppd $0x7b, -2064(%rdx), %xmm27, %k3
// CHECK: vcmppd $123, 1016(%rdx){1to2}, %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
vcmppd $0x7b, 1016(%rdx){1to2}, %xmm27, %k3
// CHECK: vcmppd $123, 1024(%rdx){1to2}, %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
vcmppd $0x7b, 1024(%rdx){1to2}, %xmm27, %k3
// CHECK: vcmppd $123, -1024(%rdx){1to2}, %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
vcmppd $0x7b, -1024(%rdx){1to2}, %xmm27, %k3
// CHECK: vcmppd $123, -1032(%rdx){1to2}, %xmm27, %k3
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd $0x7b, -1032(%rdx){1to2}, %xmm27, %k3
// CHECK: vcmppd $171, %ymm27, %ymm17, %k4
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
vcmppd $0xab, %ymm27, %ymm17, %k4
// CHECK: vcmppd $171, %ymm27, %ymm17, %k4 {%k7}
// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
vcmppd $0xab, %ymm27, %ymm17, %k4 {%k7}
// CHECK: vcmppd $123, %ymm27, %ymm17, %k4
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
vcmppd $0x7b, %ymm27, %ymm17, %k4
// CHECK: vcmppd $123, (%rcx), %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
vcmppd $0x7b, (%rcx), %ymm17, %k4
// CHECK: vcmppd $123, 291(%rax,%r14,8), %ymm17, %k4
// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd $0x7b, 291(%rax,%r14,8), %ymm17, %k4
// CHECK: vcmppd $123, (%rcx){1to4}, %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
vcmppd $0x7b, (%rcx){1to4}, %ymm17, %k4
// CHECK: vcmppd $123, 4064(%rdx), %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
vcmppd $123, 4064(%rdx), %ymm17, %k4
// CHECK: vcmppd $123, 4096(%rdx), %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmppd $0x7b, 4096(%rdx), %ymm17, %k4
// CHECK: vcmppd $123, -4096(%rdx), %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
vcmppd $0x7b, -4096(%rdx), %ymm17, %k4
// CHECK: vcmppd $123, -4128(%rdx), %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmppd $0x7b, -4128(%rdx), %ymm17, %k4
// CHECK: vcmppd $123, 1016(%rdx){1to4}, %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
vcmppd $0x7b, 1016(%rdx){1to4}, %ymm17, %k4
// CHECK: vcmppd $123, 1024(%rdx){1to4}, %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
vcmppd $0x7b, 1024(%rdx){1to4}, %ymm17, %k4
// CHECK: vcmppd $123, -1024(%rdx){1to4}, %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
vcmppd $0x7b, -1024(%rdx){1to4}, %ymm17, %k4
// CHECK: vcmppd $123, -1032(%rdx){1to4}, %ymm17, %k4
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd $0x7b, -1032(%rdx){1to4}, %ymm17, %k4
// CHECK: vcmpps $171, %xmm28, %xmm29, %k4
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
vcmpps $0xab, %xmm28, %xmm29, %k4
// CHECK: vcmpps $171, %xmm28, %xmm29, %k4 {%k2}
// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
vcmpps $0xab, %xmm28, %xmm29, %k4 {%k2}
// CHECK: vcmpps $123, %xmm28, %xmm29, %k4
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
vcmpps $0x7b, %xmm28, %xmm29, %k4
// CHECK: vcmpps $123, (%rcx), %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
vcmpps $0x7b, (%rcx), %xmm29, %k4
// CHECK: vcmpps $123, 291(%rax,%r14,8), %xmm29, %k4
// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps $0x7b, 291(%rax,%r14,8), %xmm29, %k4
// CHECK: vcmpps $123, (%rcx){1to4}, %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
vcmpps $0x7b, (%rcx){1to4}, %xmm29, %k4
// CHECK: vcmpps $123, 2032(%rdx), %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
vcmpps $0x7b, 2032(%rdx), %xmm29, %k4
// CHECK: vcmpps $123, 2048(%rdx), %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
vcmpps $0x7b, 2048(%rdx), %xmm29, %k4
// CHECK: vcmpps $123, -2048(%rdx), %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
vcmpps $0x7b, -2048(%rdx), %xmm29, %k4
// CHECK: vcmpps $123, -2064(%rdx), %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
vcmpps $0x7b, -2064(%rdx), %xmm29, %k4
// CHECK: vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
vcmpps $123, 508(%rdx){1to4}, %xmm29, %k4
// CHECK: vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps $123, 512(%rdx){1to4}, %xmm29, %k4
// CHECK: vcmpps $123, -512(%rdx){1to4}, %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
vcmpps $0x7b, -512(%rdx){1to4}, %xmm29, %k4
// CHECK: vcmpps $123, -516(%rdx){1to4}, %xmm29, %k4
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps $0x7b, -516(%rdx){1to4}, %xmm29, %k4
// CHECK: vcmpps $171, %ymm18, %ymm19, %k4
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
vcmpps $0xab, %ymm18, %ymm19, %k4
// CHECK: vcmpps $171, %ymm18, %ymm19, %k4 {%k1}
// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
vcmpps $0xab, %ymm18, %ymm19, %k4 {%k1}
// CHECK: vcmpps $123, %ymm18, %ymm19, %k4
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
vcmpps $0x7b, %ymm18, %ymm19, %k4
// CHECK: vcmpps $123, (%rcx), %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
vcmpps $0x7b, (%rcx), %ymm19, %k4
// CHECK: vcmpps $123, 291(%rax,%r14,8), %ymm19, %k4
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps $0x7b, 291(%rax,%r14,8), %ymm19, %k4
// CHECK: vcmpps $123, (%rcx){1to8}, %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
vcmpps $0x7b, (%rcx){1to8}, %ymm19, %k4
// CHECK: vcmpps $123, 4064(%rdx), %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
vcmpps $0x7b, 4064(%rdx), %ymm19, %k4
// CHECK: vcmpps $123, 4096(%rdx), %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmpps $0x7b, 4096(%rdx), %ymm19, %k4
// CHECK: vcmpps $123, -4096(%rdx), %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
vcmpps $0x7b, -4096(%rdx), %ymm19, %k4
// CHECK: vcmpps $123, -4128(%rdx), %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmpps $0x7b, -4128(%rdx), %ymm19, %k4
// CHECK: vcmpps $123, 508(%rdx){1to8}, %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
vcmpps $0x7b, 508(%rdx){1to8}, %ymm19, %k4
// CHECK: vcmpps $123, 512(%rdx){1to8}, %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps $0x7b, 512(%rdx){1to8}, %ymm19, %k4
// CHECK: vcmpps $123, -512(%rdx){1to8}, %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
vcmpps $0x7b, -512(%rdx){1to8}, %ymm19, %k4
// CHECK: vcmpps $123, -516(%rdx){1to8}, %ymm19, %k4
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps $0x7b, -516(%rdx){1to8}, %ymm19, %k4