1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps

This is currently performed in SelectionDAGLegalize, here we make it also
happen in LegalizeVectorOps, allowing a target to lower the SETCC condition
codes first in LegalizeVectorOps and then lower to a custom node afterwards,
without having to duplicate all of the SETCC condition legalization in the
target specific lowering.

As a result of this, fixed length floating point SETCC nodes can now be
properly lowered for SVE.

Differential Revision: https://reviews.llvm.org/D98939
This commit is contained in:
Bradley Smith 2021-03-18 15:52:48 +00:00
parent c376195fed
commit 4cc2f2b476
10 changed files with 1048 additions and 238 deletions

View File

@ -4513,6 +4513,29 @@ public:
/// method accepts vectors as its arguments.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
/// Legalize a SETCC with given LHS and RHS and condition code CC on the
/// current target.
///
/// If the SETCC has been legalized using AND / OR, then the legalized node
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
/// will be set to false.
///
/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
/// then the values of LHS and RHS will be swapped, CC will be set to the
/// new condition, and NeedInvert will be set to false.
///
/// If the SETCC has been legalized using the inverse condcode, then LHS and
/// RHS will be unchanged, CC will set to the inverted condcode, and
/// NeedInvert will be set to true. The caller must invert the result of the
/// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap
/// the effect of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
SDValue &RHS, SDValue &CC, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain,
bool IsSignaling = false) const;
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//

View File

@ -133,10 +133,6 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
bool IsSignaling = false);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@ -1685,152 +1681,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Results.push_back(Tmp2);
}
/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
/// target.
///
/// If the SETCC has been legalized using AND / OR, then the legalized node
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
/// will be set to false.
///
/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
/// then the values of LHS and RHS will be swapped, CC will be set to the
/// new condition, and NeedInvert will be set to false.
///
/// If the SETCC has been legalized using the inverse condcode, then LHS and
/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
/// will be set to true. The caller must invert the result of the SETCC with
/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
/// of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
bool SelectionDAGLegalize::LegalizeSetCCCondCode(
EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
case TargetLowering::Expand: {
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
std::swap(LHS, RHS);
CC = DAG.getCondCode(InvCC);
return true;
}
// Swapping operands didn't work. Try inverting the condition.
bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT);
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
// on top of it.
InvCC = ISD::getSetCCSwappedOperands(InvCC);
NeedSwap = true;
}
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
CC = DAG.getCondCode(InvCC);
NeedInvert = true;
if (NeedSwap)
std::swap(LHS, RHS);
return true;
}
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETUO:
if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
break;
}
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
NeedInvert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
case ISD::SETONE:
case ISD::SETUEQ:
// If the SETUO or SETO CC isn't legal, we might be able to use
// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
// the operands.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
if (!TLI.isCondCodeLegal(CC2, OpVT) &&
(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
CC1 = ISD::SETOGT;
CC2 = ISD::SETOLT;
Opc = ISD::OR;
NeedInvert = ((unsigned)CCCode & 0x8U);
break;
}
LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULT:
case ISD::SETULE:
// If we are floating point, assign and break, otherwise fall through.
if (!OpVT.isInteger()) {
// We can use the 4th bit to tell if we are the unordered
// or ordered version of the opcode.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
break;
}
// Fallthrough if we are unsigned integer.
LLVM_FALLTHROUGH;
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
case ISD::SETNE:
case ISD::SETEQ:
// If all combinations of inverting the condition and swapping operands
// didn't work then we have no means to expand the condition.
llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
IsSignaling);
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
SetCC2.getValue(1));
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
return true;
}
}
return false;
}
/// Emit a store/load combination to the stack. This stores
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
@ -3729,8 +3579,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1 + Offset);
Tmp3 = Node->getOperand(2 + Offset);
bool Legalized =
LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
NeedInvert, dl, Chain, IsSignaling);
TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@ -3825,8 +3675,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (!Legalized) {
Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
Legalized = TLI.LegalizeSetCCCondCode(
DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@ -3860,8 +3711,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp4 = Node->getOperand(1); // CC
bool Legalized =
LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
Tmp3, Tmp4, NeedInvert, dl, Chain);
TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");

View File

@ -138,6 +138,7 @@ class VectorLegalizer {
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SELECT:
case ISD::VSELECT:
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
@ -495,6 +495,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(1).getValueType());
break;
case ISD::SETCC: {
MVT OpVT = Node->getOperand(0).getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
Action = TLI.getCondCodeAction(CCCode, OpVT);
if (Action == TargetLowering::Legal)
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
}
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@ -762,7 +770,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandFSUB(Node, Results);
return;
case ISD::SETCC:
Results.push_back(UnrollVSETCC(Node));
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
if (TLI.expandABS(Node, Tmp, DAG)) {
@ -1331,6 +1339,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
Results.push_back(Tmp);
}
void VectorLegalizer::ExpandSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
bool NeedInvert = false;
SDLoc dl(Node);
MVT OpVT = Node->getOperand(0).getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
Results.push_back(UnrollVSETCC(Node));
return;
}
SDValue Chain;
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue CC = Node->getOperand(2);
bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
RHS, CC, NeedInvert, dl, Chain);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
if (CC.getNode())
LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
Node->getFlags());
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
if (NeedInvert)
LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
} else {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
LHS =
DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
LHS->setFlags(Node->getFlags());
}
Results.push_back(LHS);
}
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;

View File

@ -8698,3 +8698,137 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
MachinePointerInfo::getUnknownStack(MF));
}
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain,
bool IsSignaling) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default:
llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
case TargetLowering::Expand: {
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
std::swap(LHS, RHS);
CC = DAG.getCondCode(InvCC);
return true;
}
// Swapping operands didn't work. Try inverting the condition.
bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT);
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
// on top of it.
InvCC = ISD::getSetCCSwappedOperands(InvCC);
NeedSwap = true;
}
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
CC = DAG.getCondCode(InvCC);
NeedInvert = true;
if (NeedSwap)
std::swap(LHS, RHS);
return true;
}
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default:
llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETUO:
if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
CC1 = ISD::SETUNE;
CC2 = ISD::SETUNE;
Opc = ISD::OR;
break;
}
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
NeedInvert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ;
CC2 = ISD::SETOEQ;
Opc = ISD::AND;
break;
case ISD::SETONE:
case ISD::SETUEQ:
// If the SETUO or SETO CC isn't legal, we might be able to use
// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
// the operands.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
if (!TLI.isCondCodeLegal(CC2, OpVT) &&
(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
CC1 = ISD::SETOGT;
CC2 = ISD::SETOLT;
Opc = ISD::OR;
NeedInvert = ((unsigned)CCCode & 0x8U);
break;
}
LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULT:
case ISD::SETULE:
// If we are floating point, assign and break, otherwise fall through.
if (!OpVT.isInteger()) {
// We can use the 4th bit to tell if we are the unordered
// or ordered version of the opcode.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
break;
}
// Fallthrough if we are unsigned integer.
LLVM_FALLTHROUGH;
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
case ISD::SETNE:
case ISD::SETEQ:
// If all combinations of inverting the condition and swapping operands
// didn't work then we have no means to expand the condition.
llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
SetCC2.getValue(1));
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
return true;
}
}
return false;
}

View File

@ -1388,6 +1388,20 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
if (VT.isFloatingPoint()) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@ -10389,11 +10403,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector()) {
if (Op.getOperand(0).getValueType().isFloatingPoint())
return Op;
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
}
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
@ -17455,10 +17466,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
// Expand floating point vector comparisons.
if (InVT.isFloatingPoint())
return SDValue();
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

View File

@ -262,18 +262,6 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
[(setoge node:$lhs, node:$rhs),
(setge node:$lhs, node:$rhs)]>;
def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
[(setogt node:$lhs, node:$rhs),
(setgt node:$lhs, node:$rhs)]>;
def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
[(setoeq node:$lhs, node:$rhs),
(seteq node:$lhs, node:$rhs)]>;
def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
[(setone node:$lhs, node:$rhs),
(setne node:$lhs, node:$rhs)]>;
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
(AArch64mul_p node:$pred, node:$src1, node:$src2), [{
return N->hasOneUse();
@ -1252,11 +1240,11 @@ let Predicates = [HasSVE] in {
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;

View File

@ -4685,20 +4685,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
}
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
SDPatternOperator op_nopred>
CondCode cc1, CondCode cc2,
CondCode invcc1, CondCode invcc2>
: sve_fp_3op_p_pd<opc, asm, op> {
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
!cast<Instruction>(NAME # _H), PTRUE_H>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
!cast<Instruction>(NAME # _H), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
!cast<Instruction>(NAME # _H), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
!cast<Instruction>(NAME # _S), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
!cast<Instruction>(NAME # _S), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
!cast<Instruction>(NAME # _D), PTRUE_D>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,762 @@
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE when its registers are no bigger than NEON.
; NO_SVE-NOT: z{0-9}
;
; FCMP OEQ
;
; Don't use SVE for 64-bit vectors.
define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v4f16:
; CHECK: fcmeq v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%cmp = fcmp oeq <4 x half> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i16>
ret <4 x i16> %sext
}
; Don't use SVE for 128-bit vectors.
define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v8f16:
; CHECK: fcmeq v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%cmp = fcmp oeq <8 x half> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp oeq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v32f16:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2]
; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]]
; VBITS_EQ_256-NEXT: ret
%op1 = load <32 x half>, <32 x half>* %a
%op2 = load <32 x half>, <32 x half>* %b
%cmp = fcmp oeq <32 x half> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i16>
store <32 x i16> %sext, <32 x i16>* %c
ret void
}
define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v64f16:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; VBITS_GE_1024-NEXT: ret
%op1 = load <64 x half>, <64 x half>* %a
%op2 = load <64 x half>, <64 x half>* %b
%cmp = fcmp oeq <64 x half> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i16>
store <64 x i16> %sext, <64 x i16>* %c
ret void
}
define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v128f16:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; VBITS_GE_2048-NEXT: ret
%op1 = load <128 x half>, <128 x half>* %a
%op2 = load <128 x half>, <128 x half>* %b
%cmp = fcmp oeq <128 x half> %op1, %op2
%sext = sext <128 x i1> %cmp to <128 x i16>
store <128 x i16> %sext, <128 x i16>* %c
ret void
}
; Don't use SVE for 64-bit vectors.
define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v2f32:
; CHECK: fcmeq v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%cmp = fcmp oeq <2 x float> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i32>
ret <2 x i32> %sext
}
; Don't use SVE for 128-bit vectors.
define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v4f32:
; CHECK: fcmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%cmp = fcmp oeq <4 x float> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v8f32:
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <8 x float>, <8 x float>* %a
%op2 = load <8 x float>, <8 x float>* %b
%cmp = fcmp oeq <8 x float> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i32>
store <8 x i32> %sext, <8 x i32>* %c
ret void
}
define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v16f32:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2]
; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]]
; VBITS_EQ_256-NEXT: ret
%op1 = load <16 x float>, <16 x float>* %a
%op2 = load <16 x float>, <16 x float>* %b
%cmp = fcmp oeq <16 x float> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i32>
store <16 x i32> %sext, <16 x i32>* %c
ret void
}
define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v32f32:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
; VBITS_GE_1024-NEXT: ret
%op1 = load <32 x float>, <32 x float>* %a
%op2 = load <32 x float>, <32 x float>* %b
%cmp = fcmp oeq <32 x float> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i32>
store <32 x i32> %sext, <32 x i32>* %c
ret void
}
define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v64f32:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
; VBITS_GE_2048-NEXT: ret
%op1 = load <64 x float>, <64 x float>* %a
%op2 = load <64 x float>, <64 x float>* %b
%cmp = fcmp oeq <64 x float> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i32>
store <64 x i32> %sext, <64 x i32>* %c
ret void
}
; Don't use SVE for 64-bit vectors.
define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v1f64:
; CHECK: fcmeq d0, d0, d1
; CHECK-NEXT: ret
%cmp = fcmp oeq <1 x double> %op1, %op2
%sext = sext <1 x i1> %cmp to <1 x i64>
ret <1 x i64> %sext
}
; Don't use SVE for 128-bit vectors.
define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
; CHECK-LABEL: fcmp_oeq_v2f64:
; CHECK: fcmeq v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%cmp = fcmp oeq <2 x double> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %sext
}
define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v4f64:
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <4 x double>, <4 x double>* %a
%op2 = load <4 x double>, <4 x double>* %b
%cmp = fcmp oeq <4 x double> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i64>
store <4 x i64> %sext, <4 x i64>* %c
ret void
}
define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v8f64:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2]
; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]]
; VBITS_EQ_256-NEXT: ret
%op1 = load <8 x double>, <8 x double>* %a
%op2 = load <8 x double>, <8 x double>* %b
%cmp = fcmp oeq <8 x double> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i64>
store <8 x i64> %sext, <8 x i64>* %c
ret void
}
define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v16f64:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
; VBITS_GE_1024-NEXT: ret
%op1 = load <16 x double>, <16 x double>* %a
%op2 = load <16 x double>, <16 x double>* %b
%cmp = fcmp oeq <16 x double> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i64>
store <16 x i64> %sext, <16 x i64>* %c
ret void
}
define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) #0 {
; CHECK-LABEL: fcmp_oeq_v32f64:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
; VBITS_GE_2048-NEXT: ret
%op1 = load <32 x double>, <32 x double>* %a
%op2 = load <32 x double>, <32 x double>* %b
%cmp = fcmp oeq <32 x double> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i64>
store <32 x i64> %sext, <32 x i64>* %c
ret void
}
;
; FCMP UEQ
;
define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ueq_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ueq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP ONE
;
define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_one_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp one <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP UNE
;
define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_une_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp une <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP OGT
;
define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ogt_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ogt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP UGT
;
define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ugt_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ugt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP OLT
;
define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_olt_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp olt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP ULT
;
define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ult_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ult <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP OGE
;
define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_oge_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp oge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP UGE
;
define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_uge_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp uge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP OLE
;
define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ole_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ole <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP ULE
;
define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ule_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ule <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP UNO
;
define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_uno_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp uno <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP ORD
;
define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ord_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp ord <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP EQ
;
define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_eq_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast oeq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP NE
;
define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ne_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast one <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP GT
;
define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_gt_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast ogt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP LT
;
define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_lt_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast olt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP GE
;
define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_ge_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast oge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
;
; FCMP LE
;
define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
; CHECK-LABEL: fcmp_le_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
%op2 = load <16 x half>, <16 x half>* %b
%cmp = fcmp fast ole <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
store <16 x i16> %sext, <16 x i16>* %c
ret void
}
attributes #0 = { "target-features"="+sve" }

View File

@ -626,32 +626,32 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-NEXT: xxlnor vs0, vs1, vs0
; CHECK-NEXT: xxsel v2, v3, v2, vs0
; CHECK-NEXT: xxlor vs0, vs1, vs0
; CHECK-NEXT: xxsel v2, v2, v3, vs0
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test22:
; CHECK-REG: # %bb.0: # %entry
; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0
; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test22:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1
; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test22:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0
; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0
; CHECK-LE-NEXT: blr
entry:
%m = fcmp ueq <4 x float> %c, %d

View File

@ -285,9 +285,8 @@ define void @fcmp_ult_vv_v8f64(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z)
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: vmfle.vv v25, v8, v28
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@ -358,8 +357,8 @@ define void @fcmp_ueq_vv_v32f32(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %
; CHECK-NEXT: vsetvli a4, a3, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vle32.v v16, (a1)
; CHECK-NEXT: vmflt.vv v25, v16, v8
; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vmflt.vv v25, v8, v16
; CHECK-NEXT: vmflt.vv v26, v16, v8
; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a2)
@ -395,8 +394,8 @@ define void @fcmp_one_vv_v8f64(<16 x double>* %x, <16 x double>* %y, <16 x i1>*
; CHECK-NEXT: vsetivli a3, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vmflt.vv v25, v16, v8
; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vmflt.vv v25, v8, v16
; CHECK-NEXT: vmflt.vv v26, v16, v8
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a2)
@ -433,9 +432,8 @@ define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) {
; CHECK-NEXT: vle16.v v26, (a0)
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vv v25, v26, v26
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@ -453,9 +451,8 @@ define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) {
; CHECK-NEXT: vle16.v v26, (a0)
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vv v25, v26, v26
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@ -747,9 +744,8 @@ define void @fcmp_ult_vf_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vmfge.vf v25, v28, fa0
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@ -820,8 +816,8 @@ define void @fcmp_ueq_vf_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vmflt.vf v25, v8, fa0
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@ -857,8 +853,8 @@ define void @fcmp_one_vf_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vmflt.vf v25, v8, fa0
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@ -896,9 +892,8 @@ define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
; CHECK-NEXT: vmfeq.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
; CHECK-NEXT: vmand.mm v25, v26, v27
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vmand.mm v25, v26, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@ -917,9 +912,8 @@ define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfne.vf v27, v26, fa0
; CHECK-NEXT: vmfne.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v27
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@ -1212,9 +1206,8 @@ define void @fcmp_ult_fv_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vmfle.vf v25, v28, fa0
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@ -1285,8 +1278,8 @@ define void @fcmp_ueq_fv_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmflt.vf v25, v8, fa0
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@ -1322,8 +1315,8 @@ define void @fcmp_one_fv_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmflt.vf v25, v8, fa0
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@ -1359,11 +1352,10 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
; CHECK-NEXT: vmfeq.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
; CHECK-NEXT: vmand.mm v25, v27, v26
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@ -1380,11 +1372,10 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfne.vf v27, v26, fa0
; CHECK-NEXT: vmfne.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
; CHECK-NEXT: vmor.mm v25, v27, v26
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x