1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] Don't widen 128/256-bit strict compares with vXi1 result to 512-bits on KNL.

If we widen the compare we might trigger a spurious exception from
the garbage data.

We have two choices here. Explicitly force the upper bits to zero.
Or use a legacy VEX vcmpps/pd instruction and convert the XMM/YMM
result to mask register.

I've chosen to go with the second option. I'm not sure which is
really best. In some cases we could get rid of the zeroing since
the producing instruction probably already zeroed it. But we lose
the ability to fold a load. So which is best is dependent on
surrounding code.

Differential Revision: https://reviews.llvm.org/D74522
This commit is contained in:
Craig Topper 2020-02-13 11:10:57 -08:00
parent 2357c0bf62
commit ab2e139a07
4 changed files with 474 additions and 651 deletions

View File

@ -21645,8 +21645,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
// If we have a strict compare with a vXi1 result and the input is 128/256
// bits we can't use a masked compare unless we have VLX. If we use a wider
// compare like we do for non-strict, we might trigger spurious exceptions
// from the upper elements. Instead emit a AVX compare and convert to mask.
unsigned Opc;
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
(!IsStrict || Subtarget.hasVLX() ||
Op0.getSimpleValueType().is512BitVector())) {
assert(VT.getVectorNumElements() <= 16);
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
@ -21742,10 +21748,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
// result type of SETCC. The bitcast is expected to be optimized away
// during combining/isel.
Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
if (VT.getSizeInBits() > Op.getSimpleValueType().getSizeInBits()) {
// We emitted a compare with an XMM/YMM result. Finish converting to a
// mask register using a vptestm.
EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();
Cmp = DAG.getBitcast(CastVT, Cmp);
Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp,
DAG.getConstant(0, dl, CastVT), ISD::SETNE);
} else {
// If this is SSE/AVX CMPP, bitcast the result back to integer to match
// the result type of SETCC. The bitcast is expected to be optimized
// away during combining/isel.
Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
}
if (IsStrict)
return DAG.getMergeValues({Cmp, Chain}, dl);

View File

@ -3232,8 +3232,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -3250,8 +3250,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
timm:$cc), Narrow.KRC)>;
// Broadcast load.
def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
(Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -3266,8 +3266,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, timm:$cc), Narrow.KRC)>;
// Commuted with broadcast load.
def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
(Narrow.VT Narrow.RC:$src1), timm:$cc)),
def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
(Narrow.VT Narrow.RC:$src1), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff