1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[PowerPC] Exploit power9 new instruction setb

Check the expected pattens feeding to SELECT_CC like:
   (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
   (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
   (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq)
   (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq)
Further transform the sequence to comparison + setb if hits.

Differential Revision: https://reviews.llvm.org/D53275

llvm-svn: 349445
This commit is contained in:
Kewen Lin 2018-12-18 07:53:26 +00:00
parent d17ee0cd8e
commit d15f5b20c6
5 changed files with 579 additions and 379 deletions

View File

@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
(instregex "SETB(8)?$"),
XSTDIVDP,
XSTSQRTDP,
XSXSIGDP,
XSCVSPDPN,
SETB,
BPERMD
)>;

View File

@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
STATISTIC(NumP9Setb,
"Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@ -4137,6 +4139,146 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
bool &NeedSwapOps, bool &IsUnCmp) {
assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue TrueRes = N->getOperand(2);
SDValue FalseRes = N->getOperand(3);
ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
if (!TrueConst)
return false;
assert((N->getSimpleValueType(0) == MVT::i64 ||
N->getSimpleValueType(0) == MVT::i32) &&
"Expecting either i64 or i32 here.");
// We are looking for any of:
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
int64_t TrueResVal = TrueConst->getSExtValue();
if ((TrueResVal < -1 || TrueResVal > 1) ||
(TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
(TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
(TrueResVal == 0 &&
(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
return false;
bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
if (SetOrSelCC.getOpcode() != ISD::SETCC &&
SetOrSelCC.getOpcode() != ISD::SELECT_CC)
return false;
// Without this setb optimization, the outer SELECT_CC will be manually
// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
// transforms pseduo instruction to isel instruction. When there are more than
// one use for result like zext/sext, with current optimization we only see
// isel is replaced by setb but can't see any significant gain. Since
// setb has longer latency than original isel, we should avoid this. Another
// point is that setb requires comparison always kept, it can break the
// oppotunity to get the comparison away if we have in future.
if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
return false;
SDValue InnerLHS = SetOrSelCC.getOperand(0);
SDValue InnerRHS = SetOrSelCC.getOperand(1);
ISD::CondCode InnerCC =
cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
// If the inner comparison is a select_cc, make sure the true/false values are
// 1/-1 and canonicalize it if needed.
if (InnerIsSel) {
ConstantSDNode *SelCCTrueConst =
dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
ConstantSDNode *SelCCFalseConst =
dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
if (!SelCCTrueConst || !SelCCFalseConst)
return false;
int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
// The values must be -1/1 (requiring a swap) or 1/-1.
if (SelCCTVal == -1 && SelCCFVal == 1) {
std::swap(InnerLHS, InnerRHS);
} else if (SelCCTVal != 1 || SelCCFVal != -1)
return false;
}
// Canonicalize unsigned case
if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
IsUnCmp = true;
InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
}
bool InnerSwapped = false;
if (LHS == InnerRHS && RHS == InnerLHS)
InnerSwapped = true;
else if (LHS != InnerLHS || RHS != InnerRHS)
return false;
switch (CC) {
// (select_cc lhs, rhs, 0, \
// (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
case ISD::SETEQ:
if (!InnerIsSel)
return false;
if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
return false;
NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
break;
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
case ISD::SETULT:
if (!IsUnCmp && InnerCC != ISD::SETNE)
return false;
IsUnCmp = true;
LLVM_FALLTHROUGH;
case ISD::SETLT:
if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
(InnerCC == ISD::SETLT && InnerSwapped))
NeedSwapOps = (TrueResVal == 1);
else
return false;
break;
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
case ISD::SETUGT:
if (!IsUnCmp && InnerCC != ISD::SETNE)
return false;
IsUnCmp = true;
LLVM_FALLTHROUGH;
case ISD::SETGT:
if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
(InnerCC == ISD::SETGT && InnerSwapped))
NeedSwapOps = (TrueResVal == -1);
else
return false;
break;
default:
return false;
}
LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
LLVM_DEBUG(N->dump());
return true;
}
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@ -4599,6 +4741,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getValueType() == MVT::i1)
break;
if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
bool NeedSwapOps = false;
bool IsUnCmp = false;
if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (NeedSwapOps)
std::swap(LHS, RHS);
// Make use of SelectCC to generate the comparison to set CR bits, for
// equality comparisons having one literal operand, SelectCC probably
// doesn't need to materialize the whole literal and just use xoris to
// check it first, it leads the following comparison result can't
// exactly represent GT/LT relationship. So to avoid this we specify
// SETGT/SETUGT here instead of SETEQ.
SDValue GenCC =
SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
CurDAG->SelectNodeTo(
N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
N->getValueType(0), GenCC);
NumP9Setb++;
return;
}
}
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))

View File

@ -777,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC)
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),

View File

@ -3488,6 +3488,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;

File diff suppressed because it is too large Load Diff