mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps
This is currently performed in SelectionDAGLegalize, here we make it also happen in LegalizeVectorOps, allowing a target to lower the SETCC condition codes first in LegalizeVectorOps and then lower to a custom node afterwards, without having to duplicate all of the SETCC condition legalization in the target specific lowering. As a result of this, fixed length floating point SETCC nodes can now be properly lowered for SVE. Differential Revision: https://reviews.llvm.org/D98939
This commit is contained in:
parent
c376195fed
commit
4cc2f2b476
@ -4513,6 +4513,29 @@ public:
|
||||
/// method accepts vectors as its arguments.
|
||||
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
|
||||
|
||||
/// Legalize a SETCC with given LHS and RHS and condition code CC on the
|
||||
/// current target.
|
||||
///
|
||||
/// If the SETCC has been legalized using AND / OR, then the legalized node
|
||||
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
|
||||
/// will be set to false.
|
||||
///
|
||||
/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
|
||||
/// then the values of LHS and RHS will be swapped, CC will be set to the
|
||||
/// new condition, and NeedInvert will be set to false.
|
||||
///
|
||||
/// If the SETCC has been legalized using the inverse condcode, then LHS and
|
||||
/// RHS will be unchanged, CC will set to the inverted condcode, and
|
||||
/// NeedInvert will be set to true. The caller must invert the result of the
|
||||
/// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap
|
||||
/// the effect of a true/false result.
|
||||
///
|
||||
/// \returns true if the SetCC has been legalized, false if it hasn't.
|
||||
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
|
||||
SDValue &RHS, SDValue &CC, bool &NeedInvert,
|
||||
const SDLoc &dl, SDValue &Chain,
|
||||
bool IsSignaling = false) const;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instruction Emitting Hooks
|
||||
//
|
||||
|
@ -133,10 +133,6 @@ private:
|
||||
SDValue N1, SDValue N2,
|
||||
ArrayRef<int> Mask) const;
|
||||
|
||||
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
|
||||
bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
|
||||
bool IsSignaling = false);
|
||||
|
||||
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
|
||||
|
||||
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
|
||||
@ -1685,152 +1681,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
|
||||
Results.push_back(Tmp2);
|
||||
}
|
||||
|
||||
/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
|
||||
/// target.
|
||||
///
|
||||
/// If the SETCC has been legalized using AND / OR, then the legalized node
|
||||
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
|
||||
/// will be set to false.
|
||||
///
|
||||
/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
|
||||
/// then the values of LHS and RHS will be swapped, CC will be set to the
|
||||
/// new condition, and NeedInvert will be set to false.
|
||||
///
|
||||
/// If the SETCC has been legalized using the inverse condcode, then LHS and
|
||||
/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
|
||||
/// will be set to true. The caller must invert the result of the SETCC with
|
||||
/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
|
||||
/// of a true/false result.
|
||||
///
|
||||
/// \returns true if the SetCC has been legalized, false if it hasn't.
|
||||
bool SelectionDAGLegalize::LegalizeSetCCCondCode(
|
||||
EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
|
||||
const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
|
||||
MVT OpVT = LHS.getSimpleValueType();
|
||||
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
|
||||
NeedInvert = false;
|
||||
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
|
||||
default: llvm_unreachable("Unknown condition code action!");
|
||||
case TargetLowering::Legal:
|
||||
// Nothing to do.
|
||||
break;
|
||||
case TargetLowering::Expand: {
|
||||
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
|
||||
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
std::swap(LHS, RHS);
|
||||
CC = DAG.getCondCode(InvCC);
|
||||
return true;
|
||||
}
|
||||
// Swapping operands didn't work. Try inverting the condition.
|
||||
bool NeedSwap = false;
|
||||
InvCC = getSetCCInverse(CCCode, OpVT);
|
||||
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
// If inverting the condition is not enough, try swapping operands
|
||||
// on top of it.
|
||||
InvCC = ISD::getSetCCSwappedOperands(InvCC);
|
||||
NeedSwap = true;
|
||||
}
|
||||
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
CC = DAG.getCondCode(InvCC);
|
||||
NeedInvert = true;
|
||||
if (NeedSwap)
|
||||
std::swap(LHS, RHS);
|
||||
return true;
|
||||
}
|
||||
|
||||
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
|
||||
unsigned Opc = 0;
|
||||
switch (CCCode) {
|
||||
default: llvm_unreachable("Don't know how to expand this condition!");
|
||||
case ISD::SETUO:
|
||||
if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
|
||||
CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
|
||||
break;
|
||||
}
|
||||
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
|
||||
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
|
||||
NeedInvert = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETO:
|
||||
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
|
||||
&& "If SETO is expanded, SETOEQ must be legal!");
|
||||
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
|
||||
case ISD::SETONE:
|
||||
case ISD::SETUEQ:
|
||||
// If the SETUO or SETO CC isn't legal, we might be able to use
|
||||
// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
|
||||
// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
|
||||
// the operands.
|
||||
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
|
||||
if (!TLI.isCondCodeLegal(CC2, OpVT) &&
|
||||
(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
|
||||
TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
|
||||
CC1 = ISD::SETOGT;
|
||||
CC2 = ISD::SETOLT;
|
||||
Opc = ISD::OR;
|
||||
NeedInvert = ((unsigned)CCCode & 0x8U);
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
// If we are floating point, assign and break, otherwise fall through.
|
||||
if (!OpVT.isInteger()) {
|
||||
// We can use the 4th bit to tell if we are the unordered
|
||||
// or ordered version of the opcode.
|
||||
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
|
||||
Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
|
||||
CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
|
||||
break;
|
||||
}
|
||||
// Fallthrough if we are unsigned integer.
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETLE:
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETLT:
|
||||
case ISD::SETNE:
|
||||
case ISD::SETEQ:
|
||||
// If all combinations of inverting the condition and swapping operands
|
||||
// didn't work then we have no means to expand the condition.
|
||||
llvm_unreachable("Don't know how to expand this condition!");
|
||||
}
|
||||
|
||||
SDValue SetCC1, SetCC2;
|
||||
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
|
||||
// If we aren't the ordered or unorder operation,
|
||||
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
|
||||
SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
|
||||
IsSignaling);
|
||||
SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
|
||||
IsSignaling);
|
||||
} else {
|
||||
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
|
||||
SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
|
||||
IsSignaling);
|
||||
SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
|
||||
IsSignaling);
|
||||
}
|
||||
if (Chain)
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
|
||||
SetCC2.getValue(1));
|
||||
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
|
||||
RHS = SDValue();
|
||||
CC = SDValue();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Emit a store/load combination to the stack. This stores
|
||||
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
|
||||
/// a load from the stack slot to DestVT, extending it if needed.
|
||||
@ -3729,8 +3579,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
Tmp2 = Node->getOperand(1 + Offset);
|
||||
Tmp3 = Node->getOperand(2 + Offset);
|
||||
bool Legalized =
|
||||
LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
|
||||
NeedInvert, dl, Chain, IsSignaling);
|
||||
TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
|
||||
NeedInvert, dl, Chain, IsSignaling);
|
||||
|
||||
if (Legalized) {
|
||||
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
|
||||
@ -3825,8 +3675,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
}
|
||||
|
||||
if (!Legalized) {
|
||||
Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
|
||||
Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
|
||||
Legalized = TLI.LegalizeSetCCCondCode(
|
||||
DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
|
||||
NeedInvert, dl, Chain);
|
||||
|
||||
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
|
||||
|
||||
@ -3860,8 +3711,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
Tmp4 = Node->getOperand(1); // CC
|
||||
|
||||
bool Legalized =
|
||||
LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
|
||||
Tmp3, Tmp4, NeedInvert, dl, Chain);
|
||||
TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
|
||||
Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
|
||||
(void)Legalized;
|
||||
assert(Legalized && "Can't legalize BR_CC with legal condition!");
|
||||
|
||||
|
@ -138,6 +138,7 @@ class VectorLegalizer {
|
||||
SDValue ExpandStore(SDNode *N);
|
||||
SDValue ExpandFNEG(SDNode *Node);
|
||||
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::SELECT:
|
||||
case ISD::VSELECT:
|
||||
case ISD::SELECT_CC:
|
||||
case ISD::SETCC:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::TRUNCATE:
|
||||
@ -495,6 +495,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
Node->getOperand(1).getValueType());
|
||||
break;
|
||||
case ISD::SETCC: {
|
||||
MVT OpVT = Node->getOperand(0).getSimpleValueType();
|
||||
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
|
||||
Action = TLI.getCondCodeAction(CCCode, OpVT);
|
||||
if (Action == TargetLowering::Legal)
|
||||
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
|
||||
@ -762,7 +770,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
|
||||
ExpandFSUB(Node, Results);
|
||||
return;
|
||||
case ISD::SETCC:
|
||||
Results.push_back(UnrollVSETCC(Node));
|
||||
ExpandSETCC(Node, Results);
|
||||
return;
|
||||
case ISD::ABS:
|
||||
if (TLI.expandABS(Node, Tmp, DAG)) {
|
||||
@ -1331,6 +1339,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
|
||||
Results.push_back(Tmp);
|
||||
}
|
||||
|
||||
void VectorLegalizer::ExpandSETCC(SDNode *Node,
|
||||
SmallVectorImpl<SDValue> &Results) {
|
||||
bool NeedInvert = false;
|
||||
SDLoc dl(Node);
|
||||
MVT OpVT = Node->getOperand(0).getSimpleValueType();
|
||||
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
|
||||
|
||||
if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
|
||||
Results.push_back(UnrollVSETCC(Node));
|
||||
return;
|
||||
}
|
||||
|
||||
SDValue Chain;
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
SDValue RHS = Node->getOperand(1);
|
||||
SDValue CC = Node->getOperand(2);
|
||||
bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
|
||||
RHS, CC, NeedInvert, dl, Chain);
|
||||
|
||||
if (Legalized) {
|
||||
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
|
||||
// condition code, create a new SETCC node.
|
||||
if (CC.getNode())
|
||||
LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
|
||||
Node->getFlags());
|
||||
|
||||
// If we expanded the SETCC by inverting the condition code, then wrap
|
||||
// the existing SETCC in a NOT to restore the intended condition.
|
||||
if (NeedInvert)
|
||||
LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
|
||||
} else {
|
||||
// Otherwise, SETCC for the given comparison type must be completely
|
||||
// illegal; expand it into a SELECT_CC.
|
||||
EVT VT = Node->getValueType(0);
|
||||
LHS =
|
||||
DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
|
||||
DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
|
||||
DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
|
||||
LHS->setFlags(Node->getFlags());
|
||||
}
|
||||
|
||||
Results.push_back(LHS);
|
||||
}
|
||||
|
||||
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
|
||||
SmallVectorImpl<SDValue> &Results) {
|
||||
SDValue Result, Overflow;
|
||||
|
@ -8698,3 +8698,137 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
|
||||
return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
|
||||
MachinePointerInfo::getUnknownStack(MF));
|
||||
}
|
||||
|
||||
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
|
||||
SDValue &LHS, SDValue &RHS,
|
||||
SDValue &CC, bool &NeedInvert,
|
||||
const SDLoc &dl, SDValue &Chain,
|
||||
bool IsSignaling) const {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
MVT OpVT = LHS.getSimpleValueType();
|
||||
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
|
||||
NeedInvert = false;
|
||||
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
|
||||
default:
|
||||
llvm_unreachable("Unknown condition code action!");
|
||||
case TargetLowering::Legal:
|
||||
// Nothing to do.
|
||||
break;
|
||||
case TargetLowering::Expand: {
|
||||
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
|
||||
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
std::swap(LHS, RHS);
|
||||
CC = DAG.getCondCode(InvCC);
|
||||
return true;
|
||||
}
|
||||
// Swapping operands didn't work. Try inverting the condition.
|
||||
bool NeedSwap = false;
|
||||
InvCC = getSetCCInverse(CCCode, OpVT);
|
||||
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
// If inverting the condition is not enough, try swapping operands
|
||||
// on top of it.
|
||||
InvCC = ISD::getSetCCSwappedOperands(InvCC);
|
||||
NeedSwap = true;
|
||||
}
|
||||
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
|
||||
CC = DAG.getCondCode(InvCC);
|
||||
NeedInvert = true;
|
||||
if (NeedSwap)
|
||||
std::swap(LHS, RHS);
|
||||
return true;
|
||||
}
|
||||
|
||||
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
|
||||
unsigned Opc = 0;
|
||||
switch (CCCode) {
|
||||
default:
|
||||
llvm_unreachable("Don't know how to expand this condition!");
|
||||
case ISD::SETUO:
|
||||
if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
|
||||
CC1 = ISD::SETUNE;
|
||||
CC2 = ISD::SETUNE;
|
||||
Opc = ISD::OR;
|
||||
break;
|
||||
}
|
||||
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
|
||||
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
|
||||
NeedInvert = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETO:
|
||||
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
|
||||
"If SETO is expanded, SETOEQ must be legal!");
|
||||
CC1 = ISD::SETOEQ;
|
||||
CC2 = ISD::SETOEQ;
|
||||
Opc = ISD::AND;
|
||||
break;
|
||||
case ISD::SETONE:
|
||||
case ISD::SETUEQ:
|
||||
// If the SETUO or SETO CC isn't legal, we might be able to use
|
||||
// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
|
||||
// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
|
||||
// the operands.
|
||||
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
|
||||
if (!TLI.isCondCodeLegal(CC2, OpVT) &&
|
||||
(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
|
||||
TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
|
||||
CC1 = ISD::SETOGT;
|
||||
CC2 = ISD::SETOLT;
|
||||
Opc = ISD::OR;
|
||||
NeedInvert = ((unsigned)CCCode & 0x8U);
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
// If we are floating point, assign and break, otherwise fall through.
|
||||
if (!OpVT.isInteger()) {
|
||||
// We can use the 4th bit to tell if we are the unordered
|
||||
// or ordered version of the opcode.
|
||||
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
|
||||
Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
|
||||
CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
|
||||
break;
|
||||
}
|
||||
// Fallthrough if we are unsigned integer.
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::SETLE:
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETLT:
|
||||
case ISD::SETNE:
|
||||
case ISD::SETEQ:
|
||||
// If all combinations of inverting the condition and swapping operands
|
||||
// didn't work then we have no means to expand the condition.
|
||||
llvm_unreachable("Don't know how to expand this condition!");
|
||||
}
|
||||
|
||||
SDValue SetCC1, SetCC2;
|
||||
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
|
||||
// If we aren't the ordered or unorder operation,
|
||||
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
|
||||
SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
|
||||
SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
|
||||
} else {
|
||||
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
|
||||
SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
|
||||
SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
|
||||
}
|
||||
if (Chain)
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
|
||||
SetCC2.getValue(1));
|
||||
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
|
||||
RHS = SDValue();
|
||||
CC = SDValue();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1388,6 +1388,20 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
if (VT.isFloatingPoint()) {
|
||||
setCondCodeAction(ISD::SETO, VT, Expand);
|
||||
setCondCodeAction(ISD::SETOLT, VT, Expand);
|
||||
setCondCodeAction(ISD::SETLT, VT, Expand);
|
||||
setCondCodeAction(ISD::SETOLE, VT, Expand);
|
||||
setCondCodeAction(ISD::SETLE, VT, Expand);
|
||||
setCondCodeAction(ISD::SETULT, VT, Expand);
|
||||
setCondCodeAction(ISD::SETULE, VT, Expand);
|
||||
setCondCodeAction(ISD::SETUGE, VT, Expand);
|
||||
setCondCodeAction(ISD::SETUGT, VT, Expand);
|
||||
setCondCodeAction(ISD::SETUEQ, VT, Expand);
|
||||
setCondCodeAction(ISD::SETUNE, VT, Expand);
|
||||
}
|
||||
|
||||
// Lower fixed length vector operations to scalable equivalents.
|
||||
setOperationAction(ISD::ABS, VT, Custom);
|
||||
setOperationAction(ISD::ADD, VT, Custom);
|
||||
@ -10389,11 +10403,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
|
||||
|
||||
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Op.getValueType().isScalableVector()) {
|
||||
if (Op.getOperand(0).getValueType().isFloatingPoint())
|
||||
return Op;
|
||||
if (Op.getValueType().isScalableVector())
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
|
||||
}
|
||||
|
||||
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
|
||||
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
|
||||
@ -17455,10 +17466,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
|
||||
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
|
||||
"Expected integer result of the same bit length as the inputs!");
|
||||
|
||||
// Expand floating point vector comparisons.
|
||||
if (InVT.isFloatingPoint())
|
||||
return SDValue();
|
||||
|
||||
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
|
||||
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
|
||||
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
|
||||
|
@ -262,18 +262,6 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
|
||||
|
||||
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
|
||||
|
||||
def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
|
||||
[(setoge node:$lhs, node:$rhs),
|
||||
(setge node:$lhs, node:$rhs)]>;
|
||||
def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
|
||||
[(setogt node:$lhs, node:$rhs),
|
||||
(setgt node:$lhs, node:$rhs)]>;
|
||||
def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
|
||||
[(setoeq node:$lhs, node:$rhs),
|
||||
(seteq node:$lhs, node:$rhs)]>;
|
||||
def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
|
||||
[(setone node:$lhs, node:$rhs),
|
||||
(setne node:$lhs, node:$rhs)]>;
|
||||
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
|
||||
(AArch64mul_p node:$pred, node:$src1, node:$src2), [{
|
||||
return N->hasOneUse();
|
||||
@ -1252,11 +1240,11 @@ let Predicates = [HasSVE] in {
|
||||
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
|
||||
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
|
||||
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
|
||||
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
|
||||
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
|
||||
|
||||
|
@ -4685,20 +4685,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
|
||||
SDPatternOperator op_nopred>
|
||||
CondCode cc1, CondCode cc2,
|
||||
CondCode invcc1, CondCode invcc2>
|
||||
: sve_fp_3op_p_pd<opc, asm, op> {
|
||||
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
|
||||
!cast<Instruction>(NAME # _H), PTRUE_H>;
|
||||
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
|
||||
!cast<Instruction>(NAME # _H), PTRUE_S>;
|
||||
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
|
||||
!cast<Instruction>(NAME # _H), PTRUE_D>;
|
||||
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
|
||||
!cast<Instruction>(NAME # _S), PTRUE_S>;
|
||||
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
|
||||
!cast<Instruction>(NAME # _S), PTRUE_D>;
|
||||
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
|
||||
!cast<Instruction>(NAME # _D), PTRUE_D>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
762
test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
Normal file
762
test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
Normal file
@ -0,0 +1,762 @@
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Don't use SVE when its registers are no bigger than NEON.
|
||||
; NO_SVE-NOT: z{0-9}
|
||||
|
||||
;
|
||||
; FCMP OEQ
|
||||
;
|
||||
|
||||
; Don't use SVE for 64-bit vectors.
|
||||
define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v4f16:
|
||||
; CHECK: fcmeq v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <4 x half> %op1, %op2
|
||||
%sext = sext <4 x i1> %cmp to <4 x i16>
|
||||
ret <4 x i16> %sext
|
||||
}
|
||||
|
||||
; Don't use SVE for 128-bit vectors.
|
||||
define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v8f16:
|
||||
; CHECK: fcmeq v0.8h, v0.8h, v1.8h
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <8 x half> %op1, %op2
|
||||
%sext = sext <8 x i1> %cmp to <8 x i16>
|
||||
ret <8 x i16> %sext
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp oeq <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v32f16:
|
||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
||||
; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
|
||||
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
|
||||
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
|
||||
; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
|
||||
; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2]
|
||||
; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
%op1 = load <32 x half>, <32 x half>* %a
|
||||
%op2 = load <32 x half>, <32 x half>* %b
|
||||
%cmp = fcmp oeq <32 x half> %op1, %op2
|
||||
%sext = sext <32 x i1> %cmp to <32 x i16>
|
||||
store <32 x i16> %sext, <32 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v64f16:
|
||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
||||
; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <64 x half>, <64 x half>* %a
|
||||
%op2 = load <64 x half>, <64 x half>* %b
|
||||
%cmp = fcmp oeq <64 x half> %op1, %op2
|
||||
%sext = sext <64 x i1> %cmp to <64 x i16>
|
||||
store <64 x i16> %sext, <64 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v128f16:
|
||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
||||
; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <128 x half>, <128 x half>* %a
|
||||
%op2 = load <128 x half>, <128 x half>* %b
|
||||
%cmp = fcmp oeq <128 x half> %op1, %op2
|
||||
%sext = sext <128 x i1> %cmp to <128 x i16>
|
||||
store <128 x i16> %sext, <128 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
; Don't use SVE for 64-bit vectors.
|
||||
define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v2f32:
|
||||
; CHECK: fcmeq v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <2 x float> %op1, %op2
|
||||
%sext = sext <2 x i1> %cmp to <2 x i32>
|
||||
ret <2 x i32> %sext
|
||||
}
|
||||
|
||||
; Don't use SVE for 128-bit vectors.
|
||||
define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v4f32:
|
||||
; CHECK: fcmeq v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <4 x float> %op1, %op2
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
ret <4 x i32> %sext
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v8f32:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
||||
; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <8 x float>, <8 x float>* %a
|
||||
%op2 = load <8 x float>, <8 x float>* %b
|
||||
%cmp = fcmp oeq <8 x float> %op1, %op2
|
||||
%sext = sext <8 x i1> %cmp to <8 x i32>
|
||||
store <8 x i32> %sext, <8 x i32>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v16f32:
|
||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
||||
; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
||||
; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
||||
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
|
||||
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
|
||||
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
|
||||
; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
|
||||
; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
|
||||
; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2]
|
||||
; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
%op1 = load <16 x float>, <16 x float>* %a
|
||||
%op2 = load <16 x float>, <16 x float>* %b
|
||||
%cmp = fcmp oeq <16 x float> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i32>
|
||||
store <16 x i32> %sext, <16 x i32>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v32f32:
|
||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
||||
; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <32 x float>, <32 x float>* %a
|
||||
%op2 = load <32 x float>, <32 x float>* %b
|
||||
%cmp = fcmp oeq <32 x float> %op1, %op2
|
||||
%sext = sext <32 x i1> %cmp to <32 x i32>
|
||||
store <32 x i32> %sext, <32 x i32>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v64f32:
|
||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
||||
; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
||||
; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <64 x float>, <64 x float>* %a
|
||||
%op2 = load <64 x float>, <64 x float>* %b
|
||||
%cmp = fcmp oeq <64 x float> %op1, %op2
|
||||
%sext = sext <64 x i1> %cmp to <64 x i32>
|
||||
store <64 x i32> %sext, <64 x i32>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
; Don't use SVE for 64-bit vectors.
|
||||
define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v1f64:
|
||||
; CHECK: fcmeq d0, d0, d1
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <1 x double> %op1, %op2
|
||||
%sext = sext <1 x i1> %cmp to <1 x i64>
|
||||
ret <1 x i64> %sext
|
||||
}
|
||||
|
||||
; Don't use SVE for 128-bit vectors.
|
||||
define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v2f64:
|
||||
; CHECK: fcmeq v0.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp oeq <2 x double> %op1, %op2
|
||||
%sext = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %sext
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v4f64:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
||||
; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <4 x double>, <4 x double>* %a
|
||||
%op2 = load <4 x double>, <4 x double>* %b
|
||||
%cmp = fcmp oeq <4 x double> %op1, %op2
|
||||
%sext = sext <4 x i1> %cmp to <4 x i64>
|
||||
store <4 x i64> %sext, <4 x i64>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v8f64:
|
||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
||||
; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
||||
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
|
||||
; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
|
||||
; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
|
||||
; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
|
||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
|
||||
; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2]
|
||||
; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
%op1 = load <8 x double>, <8 x double>* %a
|
||||
%op2 = load <8 x double>, <8 x double>* %b
|
||||
%cmp = fcmp oeq <8 x double> %op1, %op2
|
||||
%sext = sext <8 x i1> %cmp to <8 x i64>
|
||||
store <8 x i64> %sext, <8 x i64>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v16f64:
|
||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
||||
; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <16 x double>, <16 x double>* %a
|
||||
%op2 = load <16 x double>, <16 x double>* %b
|
||||
%cmp = fcmp oeq <16 x double> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i64>
|
||||
store <16 x i64> %sext, <16 x i64>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oeq_v32f64:
|
||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
||||
; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <32 x double>, <32 x double>* %a
|
||||
%op2 = load <32 x double>, <32 x double>* %b
|
||||
%cmp = fcmp oeq <32 x double> %op1, %op2
|
||||
%sext = sext <32 x i1> %cmp to <32 x i64>
|
||||
store <32 x i64> %sext, <32 x i64>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP UEQ
|
||||
;
|
||||
|
||||
define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ueq_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ueq <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP ONE
|
||||
;
|
||||
|
||||
define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_one_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp one <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP UNE
|
||||
;
|
||||
|
||||
define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_une_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp une <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP OGT
|
||||
;
|
||||
|
||||
define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ogt_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ogt <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP UGT
|
||||
;
|
||||
|
||||
define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ugt_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ugt <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP OLT
|
||||
;
|
||||
|
||||
define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_olt_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp olt <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP ULT
|
||||
;
|
||||
|
||||
define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ult_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ult <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP OGE
|
||||
;
|
||||
|
||||
define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_oge_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp oge <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP UGE
|
||||
;
|
||||
|
||||
define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_uge_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp uge <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP OLE
|
||||
;
|
||||
|
||||
define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ole_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ole <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP ULE
|
||||
;
|
||||
|
||||
define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ule_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ule <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP UNO
|
||||
;
|
||||
|
||||
define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_uno_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp uno <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP ORD
|
||||
;
|
||||
|
||||
define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ord_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
|
||||
; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
|
||||
; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp ord <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP EQ
|
||||
;
|
||||
|
||||
define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_eq_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast oeq <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP NE
|
||||
;
|
||||
|
||||
define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ne_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast one <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP GT
|
||||
;
|
||||
|
||||
define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_gt_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast ogt <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP LT
|
||||
;
|
||||
|
||||
define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_lt_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast olt <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP GE
|
||||
;
|
||||
|
||||
define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_ge_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast oge <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; FCMP LE
|
||||
;
|
||||
|
||||
define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
||||
; CHECK-LABEL: fcmp_le_v16f16:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
||||
; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
|
||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x half>, <16 x half>* %a
|
||||
%op2 = load <16 x half>, <16 x half>* %b
|
||||
%cmp = fcmp fast ole <16 x half> %op1, %op2
|
||||
%sext = sext <16 x i1> %cmp to <16 x i16>
|
||||
store <16 x i16> %sext, <16 x i16>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -626,32 +626,32 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
|
||||
; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
|
||||
; CHECK-NEXT: xxlnor vs0, vs1, vs0
|
||||
; CHECK-NEXT: xxsel v2, v3, v2, vs0
|
||||
; CHECK-NEXT: xxlor vs0, vs1, vs0
|
||||
; CHECK-NEXT: xxsel v2, v2, v3, vs0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-REG-LABEL: test22:
|
||||
; CHECK-REG: # %bb.0: # %entry
|
||||
; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
|
||||
; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
|
||||
; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0
|
||||
; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
|
||||
; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
|
||||
; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0
|
||||
; CHECK-REG-NEXT: blr
|
||||
;
|
||||
; CHECK-FISL-LABEL: test22:
|
||||
; CHECK-FISL: # %bb.0: # %entry
|
||||
; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
|
||||
; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
|
||||
; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1
|
||||
; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
|
||||
; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
|
||||
; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0
|
||||
; CHECK-FISL-NEXT: blr
|
||||
;
|
||||
; CHECK-LE-LABEL: test22:
|
||||
; CHECK-LE: # %bb.0: # %entry
|
||||
; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
|
||||
; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
|
||||
; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0
|
||||
; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
|
||||
; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
|
||||
; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0
|
||||
; CHECK-LE-NEXT: blr
|
||||
entry:
|
||||
%m = fcmp ueq <4 x float> %c, %d
|
||||
|
@ -285,9 +285,8 @@ define void @fcmp_ult_vv_v8f64(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z)
|
||||
; CHECK-NEXT: vle64.v v28, (a0)
|
||||
; CHECK-NEXT: vle64.v v8, (a1)
|
||||
; CHECK-NEXT: vmfle.vv v25, v8, v28
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a2)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x double>, <8 x double>* %x
|
||||
@ -358,8 +357,8 @@ define void @fcmp_ueq_vv_v32f32(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %
|
||||
; CHECK-NEXT: vsetvli a4, a3, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: vle32.v v16, (a1)
|
||||
; CHECK-NEXT: vmflt.vv v25, v16, v8
|
||||
; CHECK-NEXT: vmflt.vv v26, v8, v16
|
||||
; CHECK-NEXT: vmflt.vv v25, v8, v16
|
||||
; CHECK-NEXT: vmflt.vv v26, v16, v8
|
||||
; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu
|
||||
; CHECK-NEXT: vmnor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a2)
|
||||
@ -395,8 +394,8 @@ define void @fcmp_one_vv_v8f64(<16 x double>* %x, <16 x double>* %y, <16 x i1>*
|
||||
; CHECK-NEXT: vsetivli a3, 16, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: vle64.v v16, (a1)
|
||||
; CHECK-NEXT: vmflt.vv v25, v16, v8
|
||||
; CHECK-NEXT: vmflt.vv v26, v8, v16
|
||||
; CHECK-NEXT: vmflt.vv v25, v8, v16
|
||||
; CHECK-NEXT: vmflt.vv v26, v16, v8
|
||||
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a2)
|
||||
@ -433,9 +432,8 @@ define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) {
|
||||
; CHECK-NEXT: vle16.v v26, (a0)
|
||||
; CHECK-NEXT: vmfeq.vv v27, v25, v25
|
||||
; CHECK-NEXT: vmfeq.vv v25, v26, v26
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v25, v27
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v25, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a2)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x half>, <4 x half>* %x
|
||||
@ -453,9 +451,8 @@ define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) {
|
||||
; CHECK-NEXT: vle16.v v26, (a0)
|
||||
; CHECK-NEXT: vmfne.vv v27, v25, v25
|
||||
; CHECK-NEXT: vmfne.vv v25, v26, v26
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v25, v27
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v25, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a2)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x half>, <2 x half>* %x
|
||||
@ -747,9 +744,8 @@ define void @fcmp_ult_vf_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
|
||||
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
|
||||
; CHECK-NEXT: vle64.v v28, (a0)
|
||||
; CHECK-NEXT: vmfge.vf v25, v28, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x double>, <8 x double>* %x
|
||||
@ -820,8 +816,8 @@ define void @fcmp_ueq_vf_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
|
||||
; CHECK-NEXT: addi a2, zero, 32
|
||||
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
|
||||
; CHECK-NEXT: vmnor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
@ -857,8 +853,8 @@ define void @fcmp_one_vf_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
@ -896,9 +892,8 @@ define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
|
||||
; CHECK-NEXT: vfmv.v.f v26, fa0
|
||||
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
|
||||
; CHECK-NEXT: vmfeq.vv v26, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v26, v27
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v26, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x half>, <4 x half>* %x
|
||||
@ -917,9 +912,8 @@ define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
|
||||
; CHECK-NEXT: vfmv.v.f v26, fa0
|
||||
; CHECK-NEXT: vmfne.vf v27, v26, fa0
|
||||
; CHECK-NEXT: vmfne.vv v26, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v26, v27
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v26, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x half>, <2 x half>* %x
|
||||
@ -1212,9 +1206,8 @@ define void @fcmp_ult_fv_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
|
||||
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
|
||||
; CHECK-NEXT: vle64.v v28, (a0)
|
||||
; CHECK-NEXT: vmfle.vf v25, v28, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmnand.mm v25, v25, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x double>, <8 x double>* %x
|
||||
@ -1285,8 +1278,8 @@ define void @fcmp_ueq_fv_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
|
||||
; CHECK-NEXT: addi a2, zero, 32
|
||||
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: vmflt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
|
||||
; CHECK-NEXT: vmnor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
@ -1322,8 +1315,8 @@ define void @fcmp_one_fv_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: vmflt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vmfgt.vf v25, v8, fa0
|
||||
; CHECK-NEXT: vmflt.vf v26, v8, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v26, v25
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
@ -1359,11 +1352,10 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
|
||||
; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vfmv.v.f v26, fa0
|
||||
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
|
||||
; CHECK-NEXT: vmfeq.vv v26, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v27, v26
|
||||
; CHECK-NEXT: vmfeq.vv v27, v25, v25
|
||||
; CHECK-NEXT: vmfeq.vf v25, v26, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmand.mm v25, v25, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x half>, <4 x half>* %x
|
||||
@ -1380,11 +1372,10 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
|
||||
; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vfmv.v.f v26, fa0
|
||||
; CHECK-NEXT: vmfne.vf v27, v26, fa0
|
||||
; CHECK-NEXT: vmfne.vv v26, v25, v25
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v27, v26
|
||||
; CHECK-NEXT: vmfne.vv v27, v25, v25
|
||||
; CHECK-NEXT: vmfne.vf v25, v26, fa0
|
||||
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
|
||||
; CHECK-NEXT: vmor.mm v25, v25, v27
|
||||
; CHECK-NEXT: vse1.v v25, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x half>, <2 x half>* %x
|
||||
|
Loading…
Reference in New Issue
Block a user