1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[X86] Remove X86ISD::PCMPGTM/PCMPEQM and instead just use X86ISD::PCMPM and pattern match the immediate value during isel.

Legalization is still biased to turn LT compares in to GT by swapping operands to avoid needing extra isel patterns to commute.

I'm hoping to remove TESTM/TESTNM next and this should simplify that by making EQ/NE more similar.

llvm-svn: 323604
This commit is contained in:
Craig Topper 2018-01-27 20:19:02 +00:00
parent 115c4b7a8c
commit 262a106269
8 changed files with 69 additions and 79 deletions

View File

@ -451,8 +451,7 @@ namespace {
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
Opcode == X86ISD::CMPM_RND) {
// We can get 256-bit 8 element types here without VLX being enabled. When

View File

@ -5045,8 +5045,6 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
return false;
case X86ISD::TESTM:
case X86ISD::TESTNM:
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
case X86ISD::CMPM_RND:
@ -14408,8 +14406,8 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumElems = VT.getVectorNumElements();
if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
(Subtarget.hasDQI() && (NumElems < 32)))
return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, ExtVT),
Shuffle);
return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, ExtVT),
Shuffle, DAG.getConstant(6, DL, MVT::i8));
return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
}
@ -16565,8 +16563,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(ShiftInx, DL, ExtVT));
In = DAG.getBitcast(InVT, In);
}
return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, InVT),
In);
return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, InVT),
In, DAG.getConstant(6, DL, MVT::i8));
}
// Use TESTD/Q, extended vector to packed dword/qword.
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@ -17750,43 +17748,39 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
unsigned Opc = 0;
bool Unsigned = false;
bool Swap = false;
unsigned SSECC;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: SSECC = 4; break;
case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETULT: SSECC = 1; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
case ISD::SETULT: SSECC = 1; Unsigned = true; break;
case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
case ISD::SETULE: Unsigned = true; LLVM_FALLTHROUGH;
case ISD::SETUGT:
case ISD::SETGT: SSECC = 6; break;
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE:
case ISD::SETLE: SSECC = 2; break;
}
if (Swap)
std::swap(Op0, Op1);
// See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) {
if (SSECC == 4 || SSECC == 0) {
SDValue A = peekThroughBitcasts(Op0);
if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
ISD::isBuildVectorAllZeros(Op1.getNode())) {
MVT VT0 = Op0.getSimpleValueType();
SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM,
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
dl, VT, RHS, LHS);
}
}
if (Opc)
return DAG.getNode(Opc, dl, VT, Op0, Op1);
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU
: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, dl, MVT::i8));
}
@ -22767,7 +22761,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
if (VT.is512BitVector()) {
assert(VT == MVT::v64i8 && "Unexpected element type!");
SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
SDValue CMP = DAG.getNode(X86ISD::CMPM, dl, MVT::v64i1, Zeros, R,
DAG.getConstant(6, dl, MVT::i8));
return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
}
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
@ -23214,8 +23209,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
Sel = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT,
DAG.getConstant(0, dl, VT), Sel);
Sel = DAG.getNode(X86ISD::CMPM, dl, MaskVT,
DAG.getConstant(0, dl, VT), Sel,
DAG.getConstant(6, dl, MVT::i8));
return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
} else if (Subtarget.hasSSE41()) {
// On SSE41 targets we make use of the fact that VSELECT lowers
@ -25342,8 +25338,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";

View File

@ -329,8 +329,6 @@ namespace llvm {
// Vector integer comparisons.
PCMPEQ, PCMPGT,
// Vector integer comparisons, the result is in a mask vector.
PCMPEQM, PCMPGTM,
// v8i16 Horizontal minimum and position.
PHMINPOS,

View File

@ -1994,7 +1994,7 @@ let Predicates = [HasAVX512] in {
SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
let isCommutable = IsCommutable in
def rr : AVX512BI<opc, MRMSrcReg,
@ -2027,7 +2027,7 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
def rmb : AVX512BI<opc, MRMSrcMem,
@ -2051,7 +2051,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
@ -2067,7 +2067,7 @@ multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins,
PatFrag OpNode, OpndItins itins,
AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
@ -2082,6 +2082,11 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
}
}
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 0))>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 6))>;
// FIXME: Is there a better scheduler itinerary for VPCMP?
defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
@ -2950,10 +2955,10 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr,
multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrr)
@ -2962,7 +2967,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(OpNode (Narrow.VT Narrow.RC:$src1),
(Frag (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrrk)

View File

@ -153,12 +153,6 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
def X86IntCmpMask : SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>,
SDTCisSameNumEltsAs<0, 1>]>;
def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,

View File

@ -502,8 +502,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; KNL-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; KNL-NEXT: vzeroupper
@ -511,8 +511,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
;
; SKX-LABEL: test4:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@ -523,8 +523,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@ -536,8 +536,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; AVX512DQ-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
; AVX512DQ-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; AVX512DQ-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512DQ-NEXT: vzeroupper
@ -556,8 +556,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; KNL-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; KNL-NEXT: vzeroupper
@ -565,8 +565,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
;
; SKX-LABEL: test5:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: retq
;
@ -576,8 +576,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@ -589,8 +589,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; AVX512DQ-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
; AVX512DQ-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k0 {%k1}
; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512DQ-NEXT: vzeroupper

View File

@ -7031,16 +7031,16 @@ entry:
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
; GENERIC-LABEL: test4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test4:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1} # sched: [3:1.00]
; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@ -7054,15 +7054,15 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
; GENERIC-LABEL: vcmp_test5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test5:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1} # sched: [3:1.00]
; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%x_gt_y = icmp slt <2 x i64> %x, %y

View File

@ -428,9 +428,9 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm3
; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1
; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; NoVLX-NEXT: retq
@ -457,9 +457,9 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64
; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm3
; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; NoVLX-NEXT: retq
@ -987,9 +987,9 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm3
; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1
; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; NoVLX-NEXT: retq
@ -1016,9 +1016,9 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64
; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm3
; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; NoVLX-NEXT: retq