1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM] Supporting lowering of half-precision FP arguments and returns in AArch32's backend

Summary:
Half-precision floating point arguments and returns are currently
promoted to either float or int32 in clang's CodeGen and there's
no existing support for the lowering of `half` arguments and returns
from IR in AArch32's backend.

Such frontend coercions, implemented as coercion through memory
in clang, can cause a series of issues in argument lowering, as causing
arguments to be stored on the wrong bits on big-endian architectures
and incurring in missing overflow detections in the return of certain
functions.

This patch introduces the handling of half-precision arguments and returns in
the backend using the actual "half" type on the IR. Using the "half"
type the backend is able to properly enforce the AAPCS' directions for
those arguments, making sure they are stored on the proper bits of the
registers and performing the necessary floating point convertions.

Reviewers: rjmccall, olista01, asl, efriedma, ostannard, SjoerdMeijer

Reviewed By: ostannard

Subscribers: stuij, hiraditya, dmgreen, llvm-commits, chill, dnsampaio, danielkiss, kristof.beyls, cfe-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75169
This commit is contained in:
Lucas Prates 2020-06-09 09:45:47 +01:00
parent 718e220d58
commit a515304bf7
18 changed files with 557 additions and 424 deletions

View File

@ -3600,6 +3600,24 @@ public:
// the SelectionDAGBuilder code knows how to lower these. // the SelectionDAGBuilder code knows how to lower these.
// //
/// Target-specific splitting of values into parts that fit a register
/// storing a legal type
virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT,
Optional<CallingConv::ID> CC) const {
return false;
}
/// Target-specific combining of register parts into its original value
virtual SDValue
joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT,
Optional<CallingConv::ID> CC) const {
return SDValue();
}
/// This hook must be implemented to lower the incoming (formal) arguments, /// This hook must be implemented to lower the incoming (formal) arguments,
/// described by the Ins array, into the specified DAG. The implementation /// described by the Ins array, into the specified DAG. The implementation
/// should fill in the InVals array with legal-type argument values, and /// should fill in the InVals array with legal-type argument values, and

View File

@ -298,7 +298,11 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) { if (VA.needsCustom()) {
j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); unsigned NumArgRegs =
Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
if (!NumArgRegs)
return false;
j += NumArgRegs;
continue; continue;
} }

View File

@ -205,12 +205,17 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
MVT PartVT, EVT ValueVT, const Value *V, MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None, Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) { Optional<ISD::NodeType> AssertOp = None) {
// Let the target assemble the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
PartVT, ValueVT, CC))
return Val;
if (ValueVT.isVector()) if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC); CC);
assert(NumParts > 0 && "No parts to assemble!"); assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0]; SDValue Val = Parts[0];
if (NumParts > 1) { if (NumParts > 1) {
@ -512,6 +517,11 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
const Value *V, const Value *V,
Optional<CallingConv::ID> CallConv = None, Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
// Let the target split the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
CallConv))
return;
EVT ValueVT = Val.getValueType(); EVT ValueVT = Val.getValueType();
// Handle the vector case separately. // Handle the vector case separately.

View File

@ -140,7 +140,10 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
CCValAssign VA = VAs[0]; CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling"); assert(VA.needsCustom() && "Value doesn't need custom handling");
assert(VA.getValVT() == MVT::f64 && "Unsupported type");
// Custom lowering for other types, such as f16, is currently not supported
if (VA.getValVT() != MVT::f64)
return 0;
CCValAssign NextVA = VAs[1]; CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling"); assert(NextVA.needsCustom() && "Value doesn't need custom handling");
@ -360,7 +363,10 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
CCValAssign VA = VAs[0]; CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling"); assert(VA.needsCustom() && "Value doesn't need custom handling");
assert(VA.getValVT() == MVT::f64 && "Unsupported type");
// Custom lowering for other types, such as f16, is currently not supported
if (VA.getValVT() != MVT::f64)
return 0;
CCValAssign NextVA = VAs[1]; CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling"); assert(NextVA.needsCustom() && "Value doesn't need custom handling");

View File

@ -278,5 +278,33 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
return true; return true;
} }
static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, CCState &State,
ArrayRef<MCPhysReg> RegList) {
unsigned Reg = State.AllocateReg(RegList);
if (Reg) {
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return true;
}
return false;
}
static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
// f16 arguments are extended to i32 and assigned to a register in [r0, r3]
return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
RRegList);
}
static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags,
CCState &State) {
// f16 arguments are extended to f32 and assigned to a register in [s0, s15]
return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
SRegList);
}
// Include the table generated calling convention implementations. // Include the table generated calling convention implementations.
#include "ARMGenCallingConv.inc" #include "ARMGenCallingConv.inc"

View File

@ -139,7 +139,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>, CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>,
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f16, f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
CCIfType<[v2f64], CCIfAlign<"16", CCIfType<[v2f64], CCIfAlign<"16",
CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
@ -176,6 +176,7 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>,
CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common> CCDelegateTo<CC_ARM_AAPCS_Common>
]>; ]>;
@ -193,6 +194,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>,
CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common> CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>; ]>;
@ -224,6 +226,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>, S9, S10, S11, S12, S13, S14, S15]>>,
CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common> CCDelegateTo<CC_ARM_AAPCS_Common>
]>; ]>;
@ -243,6 +246,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>, S9, S10, S11, S12, S13, S14, S15]>>,
CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common> CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>; ]>;

View File

@ -2024,7 +2024,8 @@ SDValue ARMTargetLowering::LowerCallResult(
} }
SDValue Val; SDValue Val;
if (VA.needsCustom()) { if (VA.needsCustom() &&
(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
// Handle f64 or half of a v2f64. // Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag); InFlag);
@ -2073,6 +2074,17 @@ SDValue ARMTargetLowering::LowerCallResult(
break; break;
} }
// f16 arguments have their size extended to 4 bytes and passed as if they
// had been copied to the LSBs of a 32-bit register.
// For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
assert(Subtarget->hasFullFP16() &&
"Lowering f16 type return without full fp16 support");
Val = DAG.getNode(ISD::BITCAST, dl,
MVT::getIntegerVT(VA.getLocVT().getSizeInBits()), Val);
Val = DAG.getNode(ARMISD::VMOVhr, dl, VA.getValVT(), Val);
}
InVals.push_back(Val); InVals.push_back(Val);
} }
@ -2241,31 +2253,40 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
break; break;
} }
// f16 arguments have their size extended to 4 bytes and passed as if they
// had been copied to the LSBs of a 32-bit register.
// For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
assert(Subtarget->hasFullFP16() &&
"Lowering f16 type argument without full fp16 support");
Arg = DAG.getNode(ARMISD::VMOVrh, dl,
MVT::getIntegerVT(VA.getLocVT().getSizeInBits()), Arg);
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
}
// f64 and v2f64 might be passed in i32 pairs and must be split into pieces // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
if (VA.needsCustom()) { if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
if (VA.getLocVT() == MVT::v2f64) {
SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, dl, MVT::i32)); DAG.getConstant(0, dl, MVT::i32));
SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(1, dl, MVT::i32)); DAG.getConstant(1, dl, MVT::i32));
PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); StackPtr, MemOpChains, Flags);
VA = ArgLocs[++i]; // skip ahead to next loc VA = ArgLocs[++i]; // skip ahead to next loc
if (VA.isRegLoc()) { if (VA.isRegLoc()) {
PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); StackPtr, MemOpChains, Flags);
} else { } else {
assert(VA.isMemLoc()); assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, MemOpChains.push_back(
dl, DAG, VA, Flags)); LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
} }
} else { } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
StackPtr, MemOpChains, Flags); StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) { } else if (VA.isRegLoc()) {
if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i32) { Outs[0].VT == MVT::i32) {
@ -2755,7 +2776,7 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect) if (VA.getLocInfo() == CCValAssign::Indirect)
return false; return false;
if (VA.needsCustom()) { if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
// f64 and vector types are split into multiple registers or // f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match // register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure // the registers; give up on memory f64 refs until we figure
@ -2907,7 +2928,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
break; break;
} }
if (VA.needsCustom()) { if (VA.needsCustom() &&
(VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
if (VA.getLocVT() == MVT::v2f64) { if (VA.getLocVT() == MVT::v2f64) {
// Extract the first half and return it in two registers. // Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
@ -2915,15 +2937,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half); DAG.getVTList(MVT::i32, MVT::i32), Half);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Chain =
HalfGPRs.getValue(isLittleEndian ? 0 : 1), DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
Flag); HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
Flag = Chain.getValue(1); Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Chain =
HalfGPRs.getValue(isLittleEndian ? 1 : 0), DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
Flag); HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
Flag = Chain.getValue(1); Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc VA = RVLocs[++i]; // skip ahead to next loc
@ -2937,14 +2959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg); DAG.getVTList(MVT::i32, MVT::i32), Arg);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
fmrrd.getValue(isLittleEndian ? 0 : 1), fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
Flag);
Flag = Chain.getValue(1); Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
fmrrd.getValue(isLittleEndian ? 1 : 0), fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
Flag);
} else } else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
@ -4080,6 +4100,40 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
AFI->setVarArgsFrameIndex(FrameIndex); AFI->setVarArgsFrameIndex(FrameIndex);
} }
bool ARMTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.hasValue();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
unsigned PartBits = PartVT.getSizeInBits();
Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
Parts[0] = Val;
return true;
}
return false;
}
SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.hasValue();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
unsigned PartBits = PartVT.getSizeInBits();
SDValue Val = Parts[0];
Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
return Val;
}
return SDValue();
}
SDValue ARMTargetLowering::LowerFormalArguments( SDValue ARMTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@ -4152,32 +4206,28 @@ SDValue ARMTargetLowering::LowerFormalArguments(
if (VA.isRegLoc()) { if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT(); EVT RegVT = VA.getLocVT();
if (VA.needsCustom()) { if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
// f64 and vector types are split up into multiple registers or // f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots. // combinations of registers and stack slots.
if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 =
SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
Chain, DAG, dl);
VA = ArgLocs[++i]; // skip ahead to next loc VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2; SDValue ArgValue2;
if (VA.isMemLoc()) { if (VA.isMemLoc()) {
int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, ArgValue2 = DAG.getLoad(
MachinePointerInfo::getFixedStack( MVT::f64, dl, Chain, FIN,
DAG.getMachineFunction(), FI)); MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else { } else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
Chain, DAG, dl);
} }
ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
ArgValue, ArgValue1, ArgValue1, DAG.getIntPtrConstant(0, dl));
DAG.getIntPtrConstant(0, dl)); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue2, DAG.getIntPtrConstant(1, dl));
ArgValue, ArgValue2, } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
DAG.getIntPtrConstant(1, dl));
} else
ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else { } else {
const TargetRegisterClass *RC; const TargetRegisterClass *RC;
@ -4229,6 +4279,18 @@ SDValue ARMTargetLowering::LowerFormalArguments(
break; break;
} }
// f16 arguments have their size extended to 4 bytes and passed as if they
// had been copied to the LSBs of a 32-bit register.
// For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
assert(Subtarget->hasFullFP16() &&
"Lowering f16 type argument without full fp16 support");
ArgValue = DAG.getNode(ISD::BITCAST, dl,
MVT::getIntegerVT(VA.getLocVT().getSizeInBits()),
ArgValue);
ArgValue = DAG.getNode(ARMISD::VMOVhr, dl, VA.getValVT(), ArgValue);
}
InVals.push_back(ArgValue); InVals.push_back(ArgValue);
} else { // VA.isRegLoc() } else { // VA.isRegLoc()
// sanity check // sanity check

View File

@ -806,6 +806,17 @@ class VectorType;
MachineBasicBlock *Entry, MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
bool
splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
Optional<CallingConv::ID> CC) const override;
SDValue
joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT,
Optional<CallingConv::ID> CC) const override;
SDValue SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SmallVectorImpl<ISD::InputArg> &Ins,

View File

@ -49,7 +49,7 @@ define i17 @test_funny_ints(i17 %a, i17 %b) {
} }
define half @test_half(half %a, half %b) { define half @test_half(half %a, half %b) {
; CHECK: remark: {{.*}} unable to translate instruction: ret: ' ret half %res' (in function: test_half) ; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* (in function: test_half)
; CHECK-LABEL: warning: Instruction selection used fallback path for test_half ; CHECK-LABEL: warning: Instruction selection used fallback path for test_half
%res = fadd half %a, %b %res = fadd half %a, %b
ret half %res ret half %res

View File

@ -1,41 +1,46 @@
; RUN: llc -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD ; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" define half @foo(half %a, half %b) {
target triple = "armv7a--none-eabi" ; SOFT-LABEL: foo:
; SOFT: @ %bb.0: @ %entry
define float @foo(float %a.coerce, float %b.coerce) { ; SOFT-NEXT: vmov s2, r1
; SOFT-NEXT: vmov s0, r0
; SOFT-NEXT: vcvtb.f32.f16 s2, s2
; SOFT-NEXT: vcvtb.f32.f16 s0, s0
; SOFT-NEXT: vadd.f32 s0, s0, s2
; SOFT-NEXT: vcvtb.f16.f32 s0, s0
; SOFT-NEXT: vmov r0, s0
; SOFT-NEXT: bx lr
;
; HARD-LABEL: foo:
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vcvtb.f32.f16 s2, s1
; HARD-NEXT: vcvtb.f32.f16 s0, s0
; HARD-NEXT: vadd.f32 s0, s0, s2
; HARD-NEXT: vcvtb.f16.f32 s0, s0
; HARD-NEXT: bx lr
;
; FULL-SOFT-LABEL: foo:
; FULL-SOFT: @ %bb.0: @ %entry
; FULL-SOFT-NEXT: vmov.f16 s0, r1
; FULL-SOFT-NEXT: vmov.f16 s2, r0
; FULL-SOFT-NEXT: vadd.f16 s0, s2, s0
; FULL-SOFT-NEXT: vmov r0, s0
; FULL-SOFT-NEXT: bx lr
;
; FULL-HARD-LABEL: foo:
; FULL-HARD: @ %bb.0: @ %entry
; FULL-HARD-NEXT: vadd.f16 s0, s0, s1
; FULL-HARD-NEXT: bx lr
entry: entry:
%0 = bitcast float %a.coerce to i32 %0 = fadd half %a, %b
%tmp.0.extract.trunc = trunc i32 %0 to i16 ret half %0
%1 = bitcast i16 %tmp.0.extract.trunc to half
%2 = bitcast float %b.coerce to i32
%tmp1.0.extract.trunc = trunc i32 %2 to i16
%3 = bitcast i16 %tmp1.0.extract.trunc to half
%4 = fadd half %1, %3
%5 = bitcast half %4 to i16
%tmp5.0.insert.ext = zext i16 %5 to i32
%6 = bitcast i32 %tmp5.0.insert.ext to float
ret float %6
; CHECK: foo:
; SOFT: vmov {{s[0-9]+}}, r1
; SOFT: vmov {{s[0-9]+}}, r0
; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
; SOFT: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; SOFT: vcvtb.f16.f32 {{s[0-9]+}}, {{s[0-9]+}}
; SOFT: vmov r0, {{s[0-9]+}}
; HARD-NOT: vmov
; HARD-NOT: uxth
; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s1
; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s0
; HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; HARD: vcvtb.f16.f32 [[SREG:s[0-9]+]], {{s[0-9]+}}
; HARD-NEXT: vmov [[REG0:r[0-9]+]], [[SREG]]
; HARD-NEXT: uxth [[REG1:r[0-9]+]], [[REG0]]
; HARD-NEXT: vmov s0, [[REG1]]
; CHECK: bx lr
} }

View File

@ -1,71 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4 ; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi --float-abi=soft -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4-SOFT
; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 ; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi --float-abi=soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16-SOFT
; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi --float-abi=hard -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4-HARD
; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi --float-abi=hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16-HARD
target triple = "thumbv8.1m.main-arm-unknown-eabi" target triple = "thumbv8.1m.main-arm-unknown-eabi"
define float @add(float %a, float %b) { define float @add(float %a, float %b) {
; CHECK-LABEL: add: ; CHECK-VFPV4-SOFT-LABEL: add:
; CHECK: @ %bb.0: @ %entry ; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r1 ; CHECK-VFPV4-SOFT-NEXT: vmov s0, r1
; CHECK-NEXT: vmov s2, r0 ; CHECK-VFPV4-SOFT-NEXT: vmov s2, r0
; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vmov r0, s0 ; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-VFPV4-SOFT-NEXT: bx lr
;
; CHECK-FP16-SOFT-LABEL: add:
; CHECK-FP16-SOFT: @ %bb.0: @ %entry
; CHECK-FP16-SOFT-NEXT: vmov s0, r1
; CHECK-FP16-SOFT-NEXT: vmov s2, r0
; CHECK-FP16-SOFT-NEXT: vadd.f32 s0, s2, s0
; CHECK-FP16-SOFT-NEXT: vmov r0, s0
; CHECK-FP16-SOFT-NEXT: bx lr
;
; CHECK-VFPV4-HARD-LABEL: add:
; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s0, s1
; CHECK-VFPV4-HARD-NEXT: bx lr
;
; CHECK-FP16-HARD-LABEL: add:
; CHECK-FP16-HARD: @ %bb.0: @ %entry
; CHECK-FP16-HARD-NEXT: vadd.f32 s0, s0, s1
; CHECK-FP16-HARD-NEXT: bx lr
entry: entry:
%add = fadd float %a, %b %add = fadd float %a, %b
ret float %add ret float %add
} }
define i32 @addf16(i32 %a.coerce, i32 %b.coerce) { define half @addf16(half %a, half %b) {
; CHECK-VFPV4-LABEL: addf16: ; CHECK-VFPV4-SOFT-LABEL: addf16:
; CHECK-VFPV4: @ %bb.0: @ %entry ; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
; CHECK-VFPV4-NEXT: vmov s2, r1 ; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1
; CHECK-VFPV4-NEXT: vmov s0, r0 ; CHECK-VFPV4-SOFT-NEXT: vmov s0, r0
; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-VFPV4-NEXT: vadd.f32 s0, s0, s2 ; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s0, s2
; CHECK-VFPV4-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-VFPV4-NEXT: vmov r0, s0 ; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
; CHECK-VFPV4-NEXT: uxth r0, r0 ; CHECK-VFPV4-SOFT-NEXT: bx lr
; CHECK-VFPV4-NEXT: bx lr
; ;
; CHECK-FP16-LABEL: addf16: ; CHECK-FP16-SOFT-LABEL: addf16:
; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-SOFT: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmov.f16 s0, r1 ; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, r1
; CHECK-FP16-NEXT: vmov.f16 s2, r0 ; CHECK-FP16-SOFT-NEXT: vmov.f16 s2, r0
; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0 ; CHECK-FP16-SOFT-NEXT: vadd.f16 s0, s2, s0
; CHECK-FP16-NEXT: vmov.f16 r0, s0 ; CHECK-FP16-SOFT-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr ; CHECK-FP16-SOFT-NEXT: bx lr
;
; CHECK-VFPV4-HARD-LABEL: addf16:
; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s2, s1
; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s0, s2
; CHECK-VFPV4-HARD-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-VFPV4-HARD-NEXT: bx lr
;
; CHECK-FP16-HARD-LABEL: addf16:
; CHECK-FP16-HARD: @ %bb.0: @ %entry
; CHECK-FP16-HARD-NEXT: vadd.f16 s0, s0, s1
; CHECK-FP16-HARD-NEXT: bx lr
entry: entry:
%tmp.0.extract.trunc = trunc i32 %a.coerce to i16 %add = fadd half %a, %b
%0 = bitcast i16 %tmp.0.extract.trunc to half ret half %add
%tmp1.0.extract.trunc = trunc i32 %b.coerce to i16
%1 = bitcast i16 %tmp1.0.extract.trunc to half
%add = fadd half %0, %1
%2 = bitcast half %add to i16
%tmp4.0.insert.ext = zext i16 %2 to i32
ret i32 %tmp4.0.insert.ext
} }
define half @load_i16(i16 *%hp) { define half @load_i16(i16 *%hp) {
; CHECK-VFPV4-LABEL: load_i16: ; CHECK-VFPV4-SOFT-LABEL: load_i16:
; CHECK-VFPV4: @ %bb.0: @ %entry ; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
; CHECK-VFPV4-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-VFPV4-SOFT-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-VFPV4-NEXT: ldrh r0, [r0] ; CHECK-VFPV4-SOFT-NEXT: ldrh r0, [r0]
; CHECK-VFPV4-NEXT: vmov s2, r0 ; CHECK-VFPV4-SOFT-NEXT: vmov s2, r0
; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-VFPV4-NEXT: vadd.f32 s0, s2, s0 ; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s2, s0
; CHECK-VFPV4-NEXT: vmov r0, s0 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-VFPV4-NEXT: bx lr ; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
; CHECK-VFPV4-SOFT-NEXT: bx lr
; ;
; CHECK-FP16-LABEL: load_i16: ; CHECK-FP16-SOFT-LABEL: load_i16:
; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-SOFT: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vldr.16 s2, [r1] ; CHECK-FP16-SOFT-NEXT: vldr.16 s2, [r0]
; CHECK-FP16-NEXT: vmov.f16 s0, #1.000000e+00 ; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, #1.000000e+00
; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0 ; CHECK-FP16-SOFT-NEXT: vadd.f16 s0, s2, s0
; CHECK-FP16-NEXT: vstr.16 s0, [r0] ; CHECK-FP16-SOFT-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr ; CHECK-FP16-SOFT-NEXT: bx lr
;
; CHECK-VFPV4-HARD-LABEL: load_i16:
; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
; CHECK-VFPV4-HARD-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-VFPV4-HARD-NEXT: ldrh r0, [r0]
; CHECK-VFPV4-HARD-NEXT: vmov s2, r0
; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s2, s0
; CHECK-VFPV4-HARD-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-VFPV4-HARD-NEXT: bx lr
;
; CHECK-FP16-HARD-LABEL: load_i16:
; CHECK-FP16-HARD: @ %bb.0: @ %entry
; CHECK-FP16-HARD-NEXT: vldr.16 s2, [r0]
; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+00
; CHECK-FP16-HARD-NEXT: vadd.f16 s0, s2, s0
; CHECK-FP16-HARD-NEXT: bx lr
entry: entry:
%h = load i16, i16 *%hp, align 2 %h = load i16, i16 *%hp, align 2
%hc = bitcast i16 %h to half %hc = bitcast i16 %h to half

View File

@ -933,7 +933,6 @@ define void @test_extractvalue(%struct.dummy* %p, half* %q) {
} }
; CHECK-ALL-LABEL: test_struct_return: ; CHECK-ALL-LABEL: test_struct_return:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-VFP-LIBCALL: bl __aeabi_h2f ; CHECK-VFP-LIBCALL: bl __aeabi_h2f
; CHECK-NOVFP-DAG: ldr ; CHECK-NOVFP-DAG: ldr
; CHECK-NOVFP-DAG: ldrh ; CHECK-NOVFP-DAG: ldrh

View File

@ -2,266 +2,235 @@
; RUN: llc < %s -mtriple=armv8-eabi -mattr=+fullfp16 | FileCheck %s ; RUN: llc < %s -mtriple=armv8-eabi -mattr=+fullfp16 | FileCheck %s
; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 | FileCheck %s ; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 | FileCheck %s
; TODO: we can't pass half-precision arguments as "half" types yet. We do define half @fp16_vminnm_o(half %a, half %b) {
; that for the time being by passing "float %f.coerce" and the necessary
; bitconverts/truncates. In these tests we pass i16 and use 1 bitconvert, which
; is the shortest way to get a half type. But when we can pass half types, we
; want to use that here.
define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) {
; CHECK-LABEL: fp16_vminnm_o: ; CHECK-LABEL: fp16_vminnm_o:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2 ; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp olt half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp olt half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) { define half @fp16_vminnm_o_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_o_rev: ; CHECK-LABEL: fp16_vminnm_o_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2 ; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ogt half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp ogt half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vminnm_u(i16 signext %a, i16 signext %b) { define half @fp16_vminnm_u(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u: ; CHECK-LABEL: fp16_vminnm_u:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r2 ; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ult half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp ult half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vminnm_ule(i16 signext %a, i16 signext %b) { define half @fp16_vminnm_ule(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_ule: ; CHECK-LABEL: fp16_vminnm_ule:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r2 ; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ule half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp ule half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vminnm_u_rev(i16 signext %a, i16 signext %b) { define half @fp16_vminnm_u_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u_rev: ; CHECK-LABEL: fp16_vminnm_u_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2 ; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ugt half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %b, half %a
%cmp = fcmp ugt half %0, %1
%cond = select i1 %cmp, half %1, half %0
ret half %cond ret half %cond
} }
define half @fp16_vmaxnm_o(i16 signext %a, i16 signext %b) { define half @fp16_vmaxnm_o(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_o: ; CHECK-LABEL: fp16_vmaxnm_o:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2 ; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ogt half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp ogt half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vmaxnm_oge(i16 signext %a, i16 signext %b) { define half @fp16_vmaxnm_oge(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_oge: ; CHECK-LABEL: fp16_vmaxnm_oge:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2 ; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp oge half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %a, half %b
%cmp = fcmp oge half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond ret half %cond
} }
define half @fp16_vmaxnm_o_rev(i16 signext %a, i16 signext %b) { define half @fp16_vmaxnm_o_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_o_rev: ; CHECK-LABEL: fp16_vmaxnm_o_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r2 ; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp olt half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %b, half %a
%cmp = fcmp olt half %0, %1
%cond = select i1 %cmp, half %1, half %0
ret half %cond ret half %cond
} }
define half @fp16_vmaxnm_ole_rev(i16 signext %a, i16 signext %b) { define half @fp16_vmaxnm_ole_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_ole_rev: ; CHECK-LABEL: fp16_vmaxnm_ole_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r2
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
%cmp = fcmp ole half %0, %1
%cond = select i1 %cmp, half %1, half %0
ret half %cond
}
define half @fp16_vmaxnm_u(i16 signext %a, i16 signext %b) {
; CHECK-LABEL: fp16_vmaxnm_u:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r2
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
%cmp = fcmp ugt half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond
}
define half @fp16_vmaxnm_uge(i16 signext %a, i16 signext %b) {
; CHECK-LABEL: fp16_vmaxnm_uge:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r2
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i16 %a to half
%1 = bitcast i16 %b to half
%cmp = fcmp uge half %0, %1
%cond = select i1 %cmp, half %0, half %1
ret half %cond
}
define half @fp16_vmaxnm_u_rev(i16 signext %a, i16 signext %b) {
; CHECK-LABEL: fp16_vmaxnm_u_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r2
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = bitcast i16 %a to half %cmp = fcmp ole half %a, %b
%1 = bitcast i16 %b to half %cond = select i1 %cmp, half %b, half %a
%cmp = fcmp ult half %0, %1 ret half %cond
%cond = select i1 %cmp, half %1, half %0 }
define half @fp16_vmaxnm_u(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_u:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%cmp = fcmp ugt half %a, %b
%cond = select i1 %cmp, half %a, half %b
ret half %cond
}
define half @fp16_vmaxnm_uge(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_uge:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%cmp = fcmp uge half %a, %b
%cond = select i1 %cmp, half %a, half %b
ret half %cond
}
define half @fp16_vmaxnm_u_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_u_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%cmp = fcmp ult half %a, %b
%cond = select i1 %cmp, half %b, half %a
ret half %cond ret half %cond
} }
; known non-NaNs ; known non-NaNs
define half @fp16_vminnm_NNNo(i16 signext %a) { define half @fp16_vminnm_NNNo(half %a) {
; CHECK-LABEL: fp16_vminnm_NNNo: ; CHECK-LABEL: fp16_vminnm_NNNo:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 ; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0 ; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0: ; CHECK-NEXT: .LCPI12_0:
; CHECK-NEXT: .short 0x5040 @ half 34 ; CHECK-NEXT: .short 0x5040 @ half 34
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp olt half %a, 12.
%cmp1 = fcmp olt half %0, 12. %cond1 = select i1 %cmp1, half %a, half 12.
%cond1 = select i1 %cmp1, half %0, half 12.
%cmp2 = fcmp olt half 34., %cond1 %cmp2 = fcmp olt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1 %cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2 ret half %cond2
} }
define half @fp16_vminnm_NNNo_rev(i16 signext %a) { define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-LABEL: fp16_vminnm_NNNo_rev: ; CHECK-LABEL: fp16_vminnm_NNNo_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI13_0 ; CHECK-NEXT: vldr.16 s2, .LCPI13_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1 ; CHECK-NEXT: vldr.16 s2, .LCPI13_1
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -270,50 +239,48 @@ define half @fp16_vminnm_NNNo_rev(i16 signext %a) {
; CHECK-NEXT: .LCPI13_1: ; CHECK-NEXT: .LCPI13_1:
; CHECK-NEXT: .short 0x54e0 @ half 78 ; CHECK-NEXT: .short 0x54e0 @ half 78
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp ogt half %a, 56.
%cmp1 = fcmp ogt half %0, 56. %cond1 = select i1 %cmp1, half 56., half %a
%cond1 = select i1 %cmp1, half 56., half %0
%cmp2 = fcmp ogt half 78., %cond1 %cmp2 = fcmp ogt half 78., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 78. %cond2 = select i1 %cmp2, half %cond1, half 78.
ret half %cond2 ret half %cond2
} }
define half @fp16_vminnm_NNNu(i16 signext %b) { define half @fp16_vminnm_NNNu(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNu: ; CHECK-LABEL: fp16_vminnm_NNNu:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 ; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI14_0 ; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI14_0: ; CHECK-NEXT: .LCPI14_0:
; CHECK-NEXT: .short 0x5040 @ half 34 ; CHECK-NEXT: .short 0x5040 @ half 34
entry: entry:
%0 = bitcast i16 %b to half %cmp1 = fcmp ult half 12., %b
%cmp1 = fcmp ult half 12., %0 %cond1 = select i1 %cmp1, half 12., half %b
%cond1 = select i1 %cmp1, half 12., half %0
%cmp2 = fcmp ult half %cond1, 34. %cmp2 = fcmp ult half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34. %cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2 ret half %cond2
} }
define half @fp16_vminnm_NNNule(i16 signext %b) { define half @fp16_vminnm_NNNule(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNule: ; CHECK-LABEL: fp16_vminnm_NNNule:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0 ; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI15_1 ; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -323,25 +290,24 @@ define half @fp16_vminnm_NNNule(i16 signext %b) {
; CHECK-NEXT: .short 0x5300 @ half 56 ; CHECK-NEXT: .short 0x5300 @ half 56
entry: entry:
%0 = bitcast i16 %b to half %cmp1 = fcmp ule half 34., %b
%cmp1 = fcmp ule half 34., %0 %cond1 = select i1 %cmp1, half 34., half %b
%cond1 = select i1 %cmp1, half 34., half %0
%cmp2 = fcmp ule half %cond1, 56. %cmp2 = fcmp ule half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56. %cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2 ret half %cond2
} }
define half @fp16_vminnm_NNNu_rev(i16 signext %b) { define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNu_rev: ; CHECK-LABEL: fp16_vminnm_NNNu_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI16_0 ; CHECK-NEXT: vldr.16 s2, .LCPI16_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1 ; CHECK-NEXT: vldr.16 s2, .LCPI16_1
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -352,50 +318,48 @@ define half @fp16_vminnm_NNNu_rev(i16 signext %b) {
entry: entry:
%0 = bitcast i16 %b to half %cmp1 = fcmp ugt half 56., %b
%cmp1 = fcmp ugt half 56., %0 %cond1 = select i1 %cmp1, half %b, half 56.
%cond1 = select i1 %cmp1, half %0, half 56.
%cmp2 = fcmp ugt half %cond1, 78. %cmp2 = fcmp ugt half %cond1, 78.
%cond2 = select i1 %cmp2, half 78., half %cond1 %cond2 = select i1 %cmp2, half 78., half %cond1
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNo(i16 signext %a) { define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNo: ; CHECK-LABEL: fp16_vmaxnm_NNNo:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 ; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0 ; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI17_0: ; CHECK-NEXT: .LCPI17_0:
; CHECK-NEXT: .short 0x5040 @ half 34 ; CHECK-NEXT: .short 0x5040 @ half 34
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp ogt half %a, 12.
%cmp1 = fcmp ogt half %0, 12. %cond1 = select i1 %cmp1, half %a, half 12.
%cond1 = select i1 %cmp1, half %0, half 12.
%cmp2 = fcmp ogt half 34., %cond1 %cmp2 = fcmp ogt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1 %cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNoge(i16 signext %a) { define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNoge: ; CHECK-LABEL: fp16_vmaxnm_NNNoge:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0 ; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1 ; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -404,25 +368,24 @@ define half @fp16_vmaxnm_NNNoge(i16 signext %a) {
; CHECK-NEXT: .LCPI18_1: ; CHECK-NEXT: .LCPI18_1:
; CHECK-NEXT: .short 0x5300 @ half 56 ; CHECK-NEXT: .short 0x5300 @ half 56
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp oge half %a, 34.
%cmp1 = fcmp oge half %0, 34. %cond1 = select i1 %cmp1, half %a, half 34.
%cond1 = select i1 %cmp1, half %0, half 34.
%cmp2 = fcmp oge half 56., %cond1 %cmp2 = fcmp oge half 56., %cond1
%cond2 = select i1 %cmp2, half 56., half %cond1 %cond2 = select i1 %cmp2, half 56., half %cond1
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) { define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNo_rev: ; CHECK-LABEL: fp16_vmaxnm_NNNo_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI19_0 ; CHECK-NEXT: vldr.16 s2, .LCPI19_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1 ; CHECK-NEXT: vldr.16 s2, .LCPI19_1
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -431,25 +394,24 @@ define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) {
; CHECK-NEXT: .LCPI19_1: ; CHECK-NEXT: .LCPI19_1:
; CHECK-NEXT: .short 0x54e0 @ half 78 ; CHECK-NEXT: .short 0x54e0 @ half 78
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp olt half %a, 56.
%cmp1 = fcmp olt half %0, 56. %cond1 = select i1 %cmp1, half 56., half %a
%cond1 = select i1 %cmp1, half 56., half %0
%cmp2 = fcmp olt half 78., %cond1 %cmp2 = fcmp olt half 78., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 78. %cond2 = select i1 %cmp2, half %cond1, half 78.
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) { define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNole_rev: ; CHECK-LABEL: fp16_vmaxnm_NNNole_rev:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI20_0 ; CHECK-NEXT: vldr.16 s2, .LCPI20_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1 ; CHECK-NEXT: vldr.16 s2, .LCPI20_1
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -458,50 +420,48 @@ define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) {
; CHECK-NEXT: .LCPI20_1: ; CHECK-NEXT: .LCPI20_1:
; CHECK-NEXT: .short 0x55a0 @ half 90 ; CHECK-NEXT: .short 0x55a0 @ half 90
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp ole half %a, 78.
%cmp1 = fcmp ole half %0, 78. %cond1 = select i1 %cmp1, half 78., half %a
%cond1 = select i1 %cmp1, half 78., half %0
%cmp2 = fcmp ole half 90., %cond1 %cmp2 = fcmp ole half 90., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 90. %cond2 = select i1 %cmp2, half %cond1, half 90.
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNu(i16 signext %b) { define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK-LABEL: fp16_vmaxnm_NNNu: ; CHECK-LABEL: fp16_vmaxnm_NNNu:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 ; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI21_0 ; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI21_0: ; CHECK-NEXT: .LCPI21_0:
; CHECK-NEXT: .short 0x5040 @ half 34 ; CHECK-NEXT: .short 0x5040 @ half 34
entry: entry:
%0 = bitcast i16 %b to half %cmp1 = fcmp ugt half 12., %b
%cmp1 = fcmp ugt half 12., %0 %cond1 = select i1 %cmp1, half 12., half %b
%cond1 = select i1 %cmp1, half 12., half %0
%cmp2 = fcmp ugt half %cond1, 34. %cmp2 = fcmp ugt half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34. %cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2 ret half %cond2
} }
define half @fp16_vmaxnm_NNNuge(i16 signext %b) { define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK-LABEL: fp16_vmaxnm_NNNuge: ; CHECK-LABEL: fp16_vmaxnm_NNNuge:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0 ; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI22_1 ; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -510,81 +470,77 @@ define half @fp16_vmaxnm_NNNuge(i16 signext %b) {
; CHECK-NEXT: .LCPI22_1: ; CHECK-NEXT: .LCPI22_1:
; CHECK-NEXT: .short 0x5300 @ half 56 ; CHECK-NEXT: .short 0x5300 @ half 56
entry: entry:
%0 = bitcast i16 %b to half %cmp1 = fcmp uge half 34., %b
%cmp1 = fcmp uge half 34., %0 %cond1 = select i1 %cmp1, half 34., half %b
%cond1 = select i1 %cmp1, half 34., half %0
%cmp2 = fcmp uge half %cond1, 56. %cmp2 = fcmp uge half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56. %cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2 ret half %cond2
} }
define half @fp16_vminmaxnm_neg0(i16 signext %a) { define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0: ; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, .LCPI23_0 ; CHECK-NEXT: vldr.16 s0, .LCPI23_0
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vminnm.f16 s2, s2, s0 ; CHECK-NEXT: vminnm.f16 s2, s2, s0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0: ; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .short 0x8000 @ half -0 ; CHECK-NEXT: .short 0x8000 @ half -0
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp olt half %a, -0.
%cmp1 = fcmp olt half %0, -0. %cond1 = select i1 %cmp1, half %a, half -0.
%cond1 = select i1 %cmp1, half %0, half -0.
%cmp2 = fcmp ugt half %cond1, -0. %cmp2 = fcmp ugt half %cond1, -0.
%cond2 = select i1 %cmp2, half %cond1, half -0. %cond2 = select i1 %cmp2, half %cond1, half -0.
ret half %cond2 ret half %cond2
} }
define half @fp16_vminmaxnm_e_0(i16 signext %a) { define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_0: ; CHECK-LABEL: fp16_vminmaxnm_e_0:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vldr.16 s2, .LCPI24_0 ; CHECK-NEXT: vldr.16 s2, .LCPI24_0
; CHECK-NEXT: vcmp.f16 s0, #0 ; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI24_0: ; CHECK-NEXT: .LCPI24_0:
; CHECK-NEXT: .short 0x0000 @ half 0 ; CHECK-NEXT: .short 0x0000 @ half 0
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp nsz ole half 0., %a
%cmp1 = fcmp nsz ole half 0., %0 %cond1 = select i1 %cmp1, half 0., half %a
%cond1 = select i1 %cmp1, half 0., half %0
%cmp2 = fcmp nsz uge half 0., %cond1 %cmp2 = fcmp nsz uge half 0., %cond1
%cond2 = select i1 %cmp2, half 0., half %cond1 %cond2 = select i1 %cmp2, half 0., half %cond1
ret half %cond2 ret half %cond2
} }
define half @fp16_vminmaxnm_e_neg0(i16 signext %a) { define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0: ; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, .LCPI25_0 ; CHECK-NEXT: vldr.16 s0, .LCPI25_0
; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vminnm.f16 s2, s2, s0 ; CHECK-NEXT: vminnm.f16 s2, s2, s0
; CHECK-NEXT: vcmp.f16 s0, s2 ; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI25_0: ; CHECK-NEXT: .LCPI25_0:
; CHECK-NEXT: .short 0x8000 @ half -0 ; CHECK-NEXT: .short 0x8000 @ half -0
entry: entry:
%0 = bitcast i16 %a to half %cmp1 = fcmp nsz ule half -0., %a
%cmp1 = fcmp nsz ule half -0., %0 %cond1 = select i1 %cmp1, half -0., half %a
%cond1 = select i1 %cmp1, half -0., half %0
%cmp2 = fcmp nsz oge half -0., %cond1 %cmp2 = fcmp nsz oge half -0., %cond1
%cond2 = select i1 %cmp2, half -0., half %cond1 %cond2 = select i1 %cmp2, half -0., half %cond1
ret half %cond2 ret half %cond2

View File

@ -21,6 +21,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .p2align 2

View File

@ -21,6 +21,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .p2align 2

View File

@ -682,7 +682,7 @@ entry:
define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) { define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) {
; CHECK-LABEL: extract_f16_0: ; CHECK-LABEL: extract_f16_0:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%res = extractelement <8 x half> %a, i32 0 %res = extractelement <8 x half> %a, i32 0
@ -693,7 +693,6 @@ define arm_aapcs_vfpcc half @extract_f16_3(<8 x half> %a) {
; CHECK-LABEL: extract_f16_3: ; CHECK-LABEL: extract_f16_3:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s0, s1 ; CHECK-NEXT: vmovx.f16 s0, s1
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%res = extractelement <8 x half> %a, i32 3 %res = extractelement <8 x half> %a, i32 3

View File

@ -253,10 +253,9 @@ entry:
define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) { define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
; CHECK-LABEL: vdup_f16_extract: ; CHECK-LABEL: vdup_f16_extract:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, [r2] ; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NEXT: vadd.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%0 = load half, half *%src1, align 2 %0 = load half, half *%src1, align 2

View File

@ -78,7 +78,6 @@ define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) {
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vminnm.f16 s0, s0, s2 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -103,7 +102,6 @@ define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) {
; CHECK-NEXT: vminnm.f16 s4, s4, s6 ; CHECK-NEXT: vminnm.f16 s4, s4, s6
; CHECK-NEXT: vminnm.f16 s4, s4, s3 ; CHECK-NEXT: vminnm.f16 s4, s4, s3
; CHECK-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-NEXT: vminnm.f16 s0, s4, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) %z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
@ -125,7 +123,6 @@ define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
; CHECK-FP-NEXT: vminnm.f16 s4, s4, s6 ; CHECK-FP-NEXT: vminnm.f16 s4, s4, s6
; CHECK-FP-NEXT: vminnm.f16 s4, s4, s3 ; CHECK-FP-NEXT: vminnm.f16 s4, s4, s3
; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0 ; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
; CHECK-FP-NEXT: vstr.16 s0, [r0]
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmin_v16f16: ; CHECK-NOFP-LABEL: fmin_v16f16:
@ -169,7 +166,6 @@ define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) %z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
@ -309,20 +305,20 @@ entry:
define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) { define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
; CHECK-FP-LABEL: fmin_v4f16_nofast: ; CHECK-FP-LABEL: fmin_v4f16_nofast:
; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP: @ %bb.0: @ %entry
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmin_v4f16_nofast: ; CHECK-NOFP-LABEL: fmin_v4f16_nofast:
; CHECK-NOFP: @ %bb.0: @ %entry ; CHECK-NOFP: @ %bb.0: @ %entry
; CHECK-NOFP-NEXT: vmov r1, s1 ; CHECK-NOFP-NEXT: vmov r0, s1
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
; CHECK-NOFP-NEXT: vdup.32 q1, r1 ; CHECK-NOFP-NEXT: vdup.32 q1, r0
; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s10
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
@ -333,7 +329,6 @@ define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
@ -346,13 +341,13 @@ define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) {
; CHECK-FP-NEXT: vmov.f64 d2, d1 ; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3 ; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmin_v8f16_nofast: ; CHECK-NOFP-LABEL: fmin_v8f16_nofast:
@ -384,7 +379,6 @@ define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
@ -398,13 +392,13 @@ define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) {
; CHECK-FP-NEXT: vmov.f64 d2, d1 ; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3 ; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmin_v16f16_nofast: ; CHECK-NOFP-LABEL: fmin_v16f16_nofast:
@ -462,7 +456,6 @@ define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
@ -1195,7 +1188,6 @@ define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) {
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: @ %bb.1:
@ -1220,7 +1212,6 @@ define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) {
; CHECK-NEXT: vmaxnm.f16 s4, s4, s6 ; CHECK-NEXT: vmaxnm.f16 s4, s4, s6
; CHECK-NEXT: vmaxnm.f16 s4, s4, s3 ; CHECK-NEXT: vmaxnm.f16 s4, s4, s3
; CHECK-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-NEXT: vmaxnm.f16 s0, s4, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
%z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) %z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
@ -1242,7 +1233,6 @@ define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s6 ; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s6
; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s3 ; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s3
; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
; CHECK-FP-NEXT: vstr.16 s0, [r0]
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmax_v16f16: ; CHECK-NOFP-LABEL: fmax_v16f16:
@ -1286,7 +1276,6 @@ define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 ; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) %z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
@ -1424,20 +1413,20 @@ entry:
define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) { define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
; CHECK-FP-LABEL: fmax_v4f16_nofast: ; CHECK-FP-LABEL: fmax_v4f16_nofast:
; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP: @ %bb.0: @ %entry
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmax_v4f16_nofast: ; CHECK-NOFP-LABEL: fmax_v4f16_nofast:
; CHECK-NOFP: @ %bb.0: @ %entry ; CHECK-NOFP: @ %bb.0: @ %entry
; CHECK-NOFP-NEXT: vmov r1, s1 ; CHECK-NOFP-NEXT: vmov r0, s1
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
; CHECK-NOFP-NEXT: vdup.32 q1, r1 ; CHECK-NOFP-NEXT: vdup.32 q1, r0
; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 ; CHECK-NOFP-NEXT: vcmp.f16 s10, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
@ -1448,7 +1437,6 @@ define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
@ -1461,13 +1449,13 @@ define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) {
; CHECK-FP-NEXT: vmov.f64 d2, d1 ; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3 ; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmax_v8f16_nofast: ; CHECK-NOFP-LABEL: fmax_v8f16_nofast:
@ -1499,7 +1487,6 @@ define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
@ -1513,13 +1500,13 @@ define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) {
; CHECK-FP-NEXT: vmov.f64 d2, d1 ; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3 ; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov r1, s1 ; CHECK-FP-NEXT: vmov r0, s1
; CHECK-FP-NEXT: vdup.32 q1, r1 ; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vmov.u16 r1, q0[1] ; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
; CHECK-FP-NEXT: vdup.16 q1, r1 ; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-FP-NEXT: vstr.16 s0, [r0] ; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr ; CHECK-FP-NEXT: bx lr
; ;
; CHECK-NOFP-LABEL: fmax_v16f16_nofast: ; CHECK-NOFP-LABEL: fmax_v16f16_nofast:
@ -1577,7 +1564,6 @@ define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) {
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr ; CHECK-NOFP-NEXT: bx lr
entry: entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x) %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)