mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AArch64: Implement conditional compare sequence matching.
This is a new iteration of the reverted r238793 / http://reviews.llvm.org/D8232 which wrongly assumed that any and/or trees can be represented by conditional compare sequences, however there are some restrictions to that. This version fixes this and adds comments that explain exactly what types of and/or trees can actually be implemented as conditional compare sequences. Related to http://llvm.org/PR20927, rdar://18326194 Differential Revision: http://reviews.llvm.org/D10579 llvm-svn: 242436
This commit is contained in:
parent
d96b39afde
commit
0a9f87ba79
@ -76,6 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
|
||||
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
|
||||
cl::init(false));
|
||||
|
||||
/// Value type used for condition codes.
|
||||
static const MVT MVT_CC = MVT::i32;
|
||||
|
||||
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
const AArch64Subtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
@ -809,6 +812,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
|
||||
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
|
||||
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
|
||||
case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
|
||||
case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
|
||||
case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
|
||||
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
|
||||
case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
|
||||
case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
|
||||
@ -1167,14 +1173,224 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
LHS = LHS.getOperand(0);
|
||||
}
|
||||
|
||||
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
|
||||
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
|
||||
.getValue(1);
|
||||
}
|
||||
|
||||
/// \defgroup AArch64CCMP CMP;CCMP matching
|
||||
///
|
||||
/// These functions deal with the formation of CMP;CCMP;... sequences.
|
||||
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
|
||||
/// a comparison. They set the NZCV flags to a predefined value if their
|
||||
/// predicate is false. This allows to express arbitrary conjunctions, for
|
||||
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
|
||||
/// expressed as:
|
||||
/// cmp A
|
||||
/// ccmp B, inv(CB), CA
|
||||
/// check for CB flags
|
||||
///
|
||||
/// In general we can create code for arbitrary "... (and (and A B) C)"
|
||||
/// sequences. We can also implement some "or" expressions, because "(or A B)"
|
||||
/// is equivalent to "not (and (not A) (not B))" and we can implement some
|
||||
/// negation operations:
|
||||
/// We can negate the results of a single comparison by inverting the flags
|
||||
/// used when the predicate fails and inverting the flags tested in the next
|
||||
/// instruction; We can also negate the results of the whole previous
|
||||
/// conditional compare sequence by inverting the flags tested in the next
|
||||
/// instruction. However there is no way to negate the result of a partial
|
||||
/// sequence.
|
||||
///
|
||||
/// Therefore on encountering an "or" expression we can negate the subtree on
|
||||
/// one side and have to be able to push the negate to the leafs of the subtree
|
||||
/// on the other side (see also the comments in code). As complete example:
|
||||
/// "or (or (setCA (cmp A)) (setCB (cmp B)))
|
||||
/// (and (setCC (cmp C)) (setCD (cmp D)))"
|
||||
/// is transformed to
|
||||
/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
|
||||
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
|
||||
/// and implemented as:
|
||||
/// cmp C
|
||||
/// ccmp D, inv(CD), CC
|
||||
/// ccmp A, CA, inv(CD)
|
||||
/// ccmp B, CB, inv(CA)
|
||||
/// check for CB flags
|
||||
/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
|
||||
/// by conditional compare sequences.
|
||||
/// @{
|
||||
|
||||
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as apropriate.
|
||||
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
|
||||
ISD::CondCode CC, SDValue CCOp,
|
||||
SDValue Condition, unsigned NZCV,
|
||||
SDLoc DL, SelectionDAG &DAG) {
|
||||
unsigned Opcode = 0;
|
||||
if (LHS.getValueType().isFloatingPoint())
|
||||
Opcode = AArch64ISD::FCCMP;
|
||||
else if (RHS.getOpcode() == ISD::SUB) {
|
||||
SDValue SubOp0 = RHS.getOperand(0);
|
||||
if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0))
|
||||
if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
// See emitComparison() on why we can only do this for SETEQ and SETNE.
|
||||
Opcode = AArch64ISD::CCMN;
|
||||
RHS = RHS.getOperand(1);
|
||||
}
|
||||
}
|
||||
if (Opcode == 0)
|
||||
Opcode = AArch64ISD::CCMP;
|
||||
|
||||
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
|
||||
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
|
||||
}
|
||||
|
||||
/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
|
||||
/// CanPushNegate is set to true if we can push a negate operation through
|
||||
/// the tree in a was that we are left with AND operations and negate operations
|
||||
/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
|
||||
/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
|
||||
/// brought into such a form.
|
||||
static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
|
||||
unsigned Depth = 0) {
|
||||
if (!Val.hasOneUse())
|
||||
return false;
|
||||
unsigned Opcode = Val->getOpcode();
|
||||
if (Opcode == ISD::SETCC) {
|
||||
CanPushNegate = true;
|
||||
return true;
|
||||
}
|
||||
// Protect against stack overflow.
|
||||
if (Depth > 15)
|
||||
return false;
|
||||
if (Opcode == ISD::AND || Opcode == ISD::OR) {
|
||||
SDValue O0 = Val->getOperand(0);
|
||||
SDValue O1 = Val->getOperand(1);
|
||||
bool CanPushNegateL;
|
||||
if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
|
||||
return false;
|
||||
bool CanPushNegateR;
|
||||
if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
|
||||
return false;
|
||||
// We cannot push a negate through an AND operation (it would become an OR),
|
||||
// we can however change a (not (or x y)) to (and (not x) (not y)) if we can
|
||||
// push the negate through the x/y subtrees.
|
||||
CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
|
||||
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
|
||||
/// Tries to transform the given i1 producing node @p Val to a series compare
|
||||
/// and conditional compare operations. @returns an NZCV flags producing node
|
||||
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
|
||||
/// transformation was not possible.
|
||||
/// On recursive invocations @p PushNegate may be set to true to have negation
|
||||
/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
|
||||
/// for the comparisons in the current subtree; @p Depth limits the search
|
||||
/// depth to avoid stack overflow.
|
||||
static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
|
||||
AArch64CC::CondCode &OutCC, bool PushNegate = false,
|
||||
SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
|
||||
unsigned Depth = 0) {
|
||||
// We're at a tree leaf, produce a conditional comparison operation.
|
||||
unsigned Opcode = Val->getOpcode();
|
||||
if (Opcode == ISD::SETCC) {
|
||||
SDValue LHS = Val->getOperand(0);
|
||||
SDValue RHS = Val->getOperand(1);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
|
||||
bool isInteger = LHS.getValueType().isInteger();
|
||||
if (PushNegate)
|
||||
CC = getSetCCInverse(CC, isInteger);
|
||||
SDLoc DL(Val);
|
||||
// Determine OutCC and handle FP special case.
|
||||
if (isInteger) {
|
||||
OutCC = changeIntCCToAArch64CC(CC);
|
||||
} else {
|
||||
assert(LHS.getValueType().isFloatingPoint());
|
||||
AArch64CC::CondCode ExtraCC;
|
||||
changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
|
||||
// Surpisingly some floating point conditions can't be tested with a
|
||||
// single condition code. Construct an additional comparison in this case.
|
||||
// See comment below on how we deal with OR conditions.
|
||||
if (ExtraCC != AArch64CC::AL) {
|
||||
SDValue ExtraCmp;
|
||||
if (!CCOp.getNode())
|
||||
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
|
||||
else {
|
||||
SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
|
||||
// Note that we want the inverse of ExtraCC, so NZCV is not inversed.
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
|
||||
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
|
||||
NZCV, DL, DAG);
|
||||
}
|
||||
CCOp = ExtraCmp;
|
||||
Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
|
||||
OutCC = AArch64CC::getInvertedCondCode(OutCC);
|
||||
}
|
||||
}
|
||||
|
||||
// Produce a normal comparison if we are first in the chain
|
||||
if (!CCOp.getNode())
|
||||
return emitComparison(LHS, RHS, CC, DL, DAG);
|
||||
// Otherwise produce a ccmp.
|
||||
SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
|
||||
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
|
||||
return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
|
||||
DAG);
|
||||
} else if (Opcode != ISD::AND && Opcode != ISD::OR)
|
||||
return SDValue();
|
||||
|
||||
assert((Opcode == ISD::OR || !PushNegate)
|
||||
&& "Can only push negate through OR operation");
|
||||
|
||||
// Check if both sides can be transformed.
|
||||
SDValue LHS = Val->getOperand(0);
|
||||
SDValue RHS = Val->getOperand(1);
|
||||
bool CanPushNegateL;
|
||||
if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
|
||||
return SDValue();
|
||||
bool CanPushNegateR;
|
||||
if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
|
||||
return SDValue();
|
||||
|
||||
// Do we need to negate our operands?
|
||||
bool NegateOperands = Opcode == ISD::OR;
|
||||
// We can negate the results of all previous operations by inverting the
|
||||
// predicate flags giving us a free negation for one side. For the other side
|
||||
// we need to be able to push the negation to the leafs of the tree.
|
||||
if (NegateOperands) {
|
||||
if (!CanPushNegateL && !CanPushNegateR)
|
||||
return SDValue();
|
||||
// Order the side where we can push the negate through to LHS.
|
||||
if (!CanPushNegateL && CanPushNegateR) {
|
||||
std::swap(LHS, RHS);
|
||||
CanPushNegateL = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit RHS. If we want to negate the tree we only need to push a negate
|
||||
// through if we are already in a PushNegate case, otherwise we can negate
|
||||
// the "flags to test" afterwards.
|
||||
AArch64CC::CondCode RHSCC;
|
||||
SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
|
||||
CCOp, Predicate, Depth+1);
|
||||
if (NegateOperands && !PushNegate)
|
||||
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
|
||||
// Emit LHS. We must push the negate through if we need to negate it.
|
||||
SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
|
||||
CmpR, RHSCC, Depth+1);
|
||||
// If we transformed an OR to and AND then we have to negate the result
|
||||
// (or absorb a PushNegate resulting in a double negation).
|
||||
if (Opcode == ISD::OR && !PushNegate)
|
||||
OutCC = AArch64CC::getInvertedCondCode(OutCC);
|
||||
return CmpL;
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
|
||||
SDValue Cmp;
|
||||
AArch64CC::CondCode AArch64CC;
|
||||
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
|
||||
EVT VT = RHS.getValueType();
|
||||
uint64_t C = RHSC->getZExtValue();
|
||||
@ -1229,47 +1445,56 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
}
|
||||
}
|
||||
}
|
||||
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
|
||||
// For the i8 operand, the largest immediate is 255, so this can be easily
|
||||
// encoded in the compare instruction. For the i16 operand, however, the
|
||||
// largest immediate cannot be encoded in the compare.
|
||||
// Therefore, use a sign extending load and cmn to avoid materializing the -1
|
||||
// constant. For example,
|
||||
// movz w1, #65535
|
||||
// ldrh w0, [x0, #0]
|
||||
// cmp w0, w1
|
||||
// >
|
||||
// ldrsh w0, [x0, #0]
|
||||
// cmn w0, #1
|
||||
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
|
||||
// if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
|
||||
// both the LHS and RHS are truely zero extended and to make sure the
|
||||
// transformation is profitable.
|
||||
SDValue Cmp;
|
||||
AArch64CC::CondCode AArch64CC;
|
||||
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
|
||||
if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
|
||||
isa<LoadSDNode>(LHS)) {
|
||||
if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
|
||||
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
|
||||
LHS.getNode()->hasNUsesOfValue(1, 0)) {
|
||||
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
|
||||
if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
|
||||
SDValue SExt =
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
|
||||
DAG.getValueType(MVT::i16));
|
||||
Cmp = emitComparison(SExt,
|
||||
DAG.getConstant(ValueofRHS, dl,
|
||||
RHS.getValueType()),
|
||||
CC, dl, DAG);
|
||||
AArch64CC = changeIntCCToAArch64CC(CC);
|
||||
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
|
||||
return Cmp;
|
||||
}
|
||||
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
|
||||
|
||||
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
|
||||
// For the i8 operand, the largest immediate is 255, so this can be easily
|
||||
// encoded in the compare instruction. For the i16 operand, however, the
|
||||
// largest immediate cannot be encoded in the compare.
|
||||
// Therefore, use a sign extending load and cmn to avoid materializing the
|
||||
// -1 constant. For example,
|
||||
// movz w1, #65535
|
||||
// ldrh w0, [x0, #0]
|
||||
// cmp w0, w1
|
||||
// >
|
||||
// ldrsh w0, [x0, #0]
|
||||
// cmn w0, #1
|
||||
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
|
||||
// if and only if (sext LHS) == (sext RHS). The checks are in place to
|
||||
// ensure both the LHS and RHS are truely zero extended and to make sure the
|
||||
// transformation is profitable.
|
||||
if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
|
||||
cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
|
||||
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
|
||||
LHS.getNode()->hasNUsesOfValue(1, 0)) {
|
||||
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
|
||||
if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
|
||||
SDValue SExt =
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
|
||||
DAG.getValueType(MVT::i16));
|
||||
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
|
||||
RHS.getValueType()),
|
||||
CC, dl, DAG);
|
||||
AArch64CC = changeIntCCToAArch64CC(CC);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
|
||||
if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
|
||||
if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
|
||||
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
|
||||
}
|
||||
}
|
||||
}
|
||||
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
|
||||
AArch64CC = changeIntCCToAArch64CC(CC);
|
||||
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
|
||||
|
||||
if (!Cmp) {
|
||||
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
|
||||
AArch64CC = changeIntCCToAArch64CC(CC);
|
||||
}
|
||||
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
|
||||
return Cmp;
|
||||
}
|
||||
|
||||
@ -9294,3 +9519,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
||||
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
||||
return Ty->isArrayTy();
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
|
||||
EVT) const {
|
||||
return false;
|
||||
}
|
||||
|
@ -58,6 +58,11 @@ enum NodeType : unsigned {
|
||||
SBCS,
|
||||
ANDS,
|
||||
|
||||
// Conditional compares. Operands: left,right,falsecc,cc,flags
|
||||
CCMP,
|
||||
CCMN,
|
||||
FCCMP,
|
||||
|
||||
// Floating point comparison
|
||||
FCMP,
|
||||
|
||||
@ -516,6 +521,8 @@ private:
|
||||
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg) const override;
|
||||
|
||||
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
|
||||
};
|
||||
|
||||
namespace AArch64 {
|
||||
|
@ -525,6 +525,13 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
|
||||
let ParserMatchClass = Imm0_31Operand;
|
||||
}
|
||||
|
||||
// True if the 32-bit immediate is in the range [0,31]
|
||||
def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
|
||||
return ((uint64_t)Imm) < 32;
|
||||
}]> {
|
||||
let ParserMatchClass = Imm0_31Operand;
|
||||
}
|
||||
|
||||
// imm0_15 predicate - True if the immediate is in the range [0,15]
|
||||
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
|
||||
return ((uint64_t)Imm) < 16;
|
||||
@ -542,7 +549,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
|
||||
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
|
||||
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
|
||||
return ((uint32_t)Imm) < 16;
|
||||
}]>;
|
||||
}]> {
|
||||
let ParserMatchClass = Imm0_15Operand;
|
||||
}
|
||||
|
||||
// An arithmetic shifter operand:
|
||||
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
|
||||
@ -2108,9 +2117,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
|
||||
//---
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
||||
class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
|
||||
: I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
|
||||
asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
|
||||
class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
|
||||
string mnemonic, SDNode OpNode>
|
||||
: I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
|
||||
mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
|
||||
[(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
|
||||
(i32 imm:$cond), NZCV))]>,
|
||||
Sched<[WriteI, ReadI]> {
|
||||
let Uses = [NZCV];
|
||||
let Defs = [NZCV];
|
||||
@ -2130,19 +2142,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
|
||||
let Inst{3-0} = nzcv;
|
||||
}
|
||||
|
||||
multiclass CondSetFlagsImm<bit op, string asm> {
|
||||
def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
|
||||
let Inst{31} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
||||
class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
|
||||
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
|
||||
asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
|
||||
class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
|
||||
SDNode OpNode>
|
||||
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
|
||||
mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
|
||||
[(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
|
||||
(i32 imm:$cond), NZCV))]>,
|
||||
Sched<[WriteI, ReadI, ReadI]> {
|
||||
let Uses = [NZCV];
|
||||
let Defs = [NZCV];
|
||||
@ -2162,11 +2168,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
|
||||
let Inst{3-0} = nzcv;
|
||||
}
|
||||
|
||||
multiclass CondSetFlagsReg<bit op, string asm> {
|
||||
def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
|
||||
multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
|
||||
// immediate operand variants
|
||||
def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
|
||||
def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
|
||||
let Inst{31} = 1;
|
||||
}
|
||||
// register operand variants
|
||||
def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
|
||||
let Inst{31} = 1;
|
||||
}
|
||||
}
|
||||
@ -3974,11 +3988,14 @@ multiclass FPComparison<bit signalAllNans, string asm,
|
||||
//---
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
||||
class BaseFPCondComparison<bit signalAllNans,
|
||||
RegisterClass regtype, string asm>
|
||||
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
|
||||
asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
|
||||
class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
|
||||
string mnemonic, list<dag> pat>
|
||||
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
|
||||
mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
|
||||
Sched<[WriteFCmp]> {
|
||||
let Uses = [NZCV];
|
||||
let Defs = [NZCV];
|
||||
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
bits<4> nzcv;
|
||||
@ -3994,16 +4011,18 @@ class BaseFPCondComparison<bit signalAllNans,
|
||||
let Inst{3-0} = nzcv;
|
||||
}
|
||||
|
||||
multiclass FPCondComparison<bit signalAllNans, string asm> {
|
||||
let Defs = [NZCV], Uses = [NZCV] in {
|
||||
def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
|
||||
multiclass FPCondComparison<bit signalAllNans, string mnemonic,
|
||||
SDPatternOperator OpNode = null_frag> {
|
||||
def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
|
||||
[(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
|
||||
(i32 imm:$cond), NZCV))]> {
|
||||
let Inst{22} = 0;
|
||||
}
|
||||
|
||||
def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
|
||||
def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
|
||||
[(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
|
||||
(i32 imm:$cond), NZCV))]> {
|
||||
let Inst{22} = 1;
|
||||
}
|
||||
} // Defs = [NZCV], Uses = [NZCV]
|
||||
}
|
||||
|
||||
//---
|
||||
|
@ -66,6 +66,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisInt<3>,
|
||||
SDTCisVT<4, i32>]>;
|
||||
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
|
||||
[SDTCisVT<0, i32>,
|
||||
SDTCisInt<1>,
|
||||
SDTCisSameAs<1, 2>,
|
||||
SDTCisInt<3>,
|
||||
SDTCisInt<4>,
|
||||
SDTCisVT<5, i32>]>;
|
||||
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
|
||||
[SDTCisVT<0, i32>,
|
||||
SDTCisFP<1>,
|
||||
SDTCisSameAs<1, 2>,
|
||||
SDTCisInt<3>,
|
||||
SDTCisInt<4>,
|
||||
SDTCisVT<5, i32>]>;
|
||||
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
|
||||
[SDTCisFP<0>,
|
||||
SDTCisSameAs<0, 1>]>;
|
||||
@ -160,6 +174,10 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
|
||||
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
|
||||
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
|
||||
|
||||
def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
|
||||
def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
|
||||
def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
|
||||
|
||||
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
|
||||
|
||||
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
|
||||
@ -1020,13 +1038,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
|
||||
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conditionally set flags instructions.
|
||||
// Conditional comparison instructions.
|
||||
//===----------------------------------------------------------------------===//
|
||||
defm CCMN : CondSetFlagsImm<0, "ccmn">;
|
||||
defm CCMP : CondSetFlagsImm<1, "ccmp">;
|
||||
|
||||
defm CCMN : CondSetFlagsReg<0, "ccmn">;
|
||||
defm CCMP : CondSetFlagsReg<1, "ccmp">;
|
||||
defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
|
||||
defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conditional select instructions.
|
||||
@ -2556,7 +2571,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm FCCMPE : FPCondComparison<1, "fccmpe">;
|
||||
defm FCCMP : FPCondComparison<0, "fccmp">;
|
||||
defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating point conditional select instruction.
|
||||
|
@ -287,3 +287,99 @@ sw.bb.i.i:
|
||||
%code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
|
||||
br label %sw.bb.i.i
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_and
|
||||
define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
|
||||
; CHECK: cmp w1, #5
|
||||
; CHECK-NEXT: ccmp w0, w1, #0, ne
|
||||
; CHECK-NEXT: csel x0, x2, x3, lt
|
||||
; CHECK-NEXT: ret
|
||||
%1 = icmp slt i32 %w0, %w1
|
||||
%2 = icmp ne i32 5, %w1
|
||||
%3 = and i1 %1, %2
|
||||
%sel = select i1 %3, i64 %x2, i64 %x3
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_or
|
||||
define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
|
||||
; CHECK: cmp w1, #5
|
||||
; CHECK-NEXT: ccmp w0, w1, #8, eq
|
||||
; CHECK-NEXT: csel x0, x2, x3, lt
|
||||
; CHECK-NEXT: ret
|
||||
%1 = icmp slt i32 %w0, %w1
|
||||
%2 = icmp ne i32 5, %w1
|
||||
%3 = or i1 %1, %2
|
||||
%sel = select i1 %3, i64 %x2, i64 %x3
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_complicated
|
||||
define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
|
||||
; CHECK: ldr [[REG:d[0-9]+]],
|
||||
; CHECK: fcmp d0, d2
|
||||
; CHECK-NEXT: fmov d2, #13.00000000
|
||||
; CHECK-NEXT: fccmp d1, d2, #4, ne
|
||||
; CHECK-NEXT: fccmp d0, d1, #1, ne
|
||||
; CHECK-NEXT: fccmp d0, d1, #4, vc
|
||||
; CEHCK-NEXT: csel w0, w0, w1, eq
|
||||
%1 = fcmp one double %v1, %v2
|
||||
%2 = fcmp oeq double %v2, 13.0
|
||||
%3 = fcmp oeq double %v1, 42.0
|
||||
%or0 = or i1 %2, %3
|
||||
%or1 = or i1 %1, %or0
|
||||
%sel = select i1 %or1, i16 %a, i16 %b
|
||||
ret i16 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: gccbug
|
||||
define i64 @gccbug(i64 %x0, i64 %x1) {
|
||||
; CHECK: cmp x1, #0
|
||||
; CHECK-NEXT: ccmp x0, #2, #0, eq
|
||||
; CHECK-NEXT: ccmp x0, #4, #4, ne
|
||||
; CHECK-NEXT: orr w[[REGNUM:[0-9]+]], wzr, #0x1
|
||||
; CHECK-NEXT: cinc x0, x[[REGNUM]], eq
|
||||
; CHECK-NEXT: ret
|
||||
%cmp0 = icmp eq i64 %x1, 0
|
||||
%cmp1 = icmp eq i64 %x0, 2
|
||||
%cmp2 = icmp eq i64 %x0, 4
|
||||
|
||||
%or = or i1 %cmp2, %cmp1
|
||||
%and = and i1 %or, %cmp0
|
||||
|
||||
%sel = select i1 %and, i64 2, i64 1
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_ororand
|
||||
define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
|
||||
; CHECK: cmp w3, #4
|
||||
; CHECK-NEXT: ccmp w2, #2, #0, gt
|
||||
; CHECK-NEXT: ccmp w1, #13, #2, ge
|
||||
; CHECK-NEXT: ccmp w0, #0, #4, ls
|
||||
; CHECK-NEXT: csel w0, w3, wzr, eq
|
||||
; CHECK-NEXT: ret
|
||||
%c0 = icmp eq i32 %w0, 0
|
||||
%c1 = icmp ugt i32 %w1, 13
|
||||
%c2 = icmp slt i32 %w2, 2
|
||||
%c4 = icmp sgt i32 %w3, 4
|
||||
%or = or i1 %c0, %c1
|
||||
%and = and i1 %c2, %c4
|
||||
%or1 = or i1 %or, %and
|
||||
%sel = select i1 %or1, i32 %w3, i32 0
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_noccmp
|
||||
define i64 @select_noccmp(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
|
||||
; CHECK-NOT: CCMP
|
||||
%c0 = icmp slt i64 %v1, 0
|
||||
%c1 = icmp sgt i64 %v1, 13
|
||||
%c2 = icmp slt i64 %v3, 2
|
||||
%c4 = icmp sgt i64 %v3, 4
|
||||
%and0 = and i1 %c0, %c1
|
||||
%and1 = and i1 %c2, %c4
|
||||
%or = or i1 %and0, %and1
|
||||
%sel = select i1 %or, i64 0, i64 %r
|
||||
ret i64 %sel
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user