mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[X86] Part 2 to fix x86-64 fp128 calling convention.
Part 1 was submitted in http://reviews.llvm.org/D15134. Changes in this part: * X86RegisterInfo.td, X86RecognizableInstr.cpp: Add FR128 register class. * X86CallingConv.td: Pass f128 values in XMM registers or on stack. * X86InstrCompiler.td, X86InstrInfo.td, X86InstrSSE.td: Add instruction selection patterns for f128. * X86ISelLowering.cpp: When target has MMX registers, configure MVT::f128 in FR128RegClass, with TypeSoftenFloat action, and custom actions for some opcodes. Add missed cases of MVT::f128 in places that handle f32, f64, or vector types. Add TODO comment to support f128 type in inline assembly code. * SelectionDAGBuilder.cpp: Fix infinite loop when f128 type can have VT == TLI.getTypeToTransformTo(Ctx, VT). * Add unit tests for x86-64 fp128 type. Differential Revision: http://reviews.llvm.org/D11438 llvm-svn: 255558
This commit is contained in:
parent
14a74b66f7
commit
bd51d0c9b1
@ -2451,7 +2451,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
|
||||
|
||||
// We care about the legality of the operation after it has been type
|
||||
// legalized.
|
||||
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
|
||||
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
|
||||
VT != TLI.getTypeToTransformTo(Ctx, VT))
|
||||
VT = TLI.getTypeToTransformTo(Ctx, VT);
|
||||
|
||||
// If the vselect is legal, assume we want to leave this as a vector setcc +
|
||||
|
@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[
|
||||
// The X86-64 calling convention always returns FP values in XMM0.
|
||||
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
|
||||
CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
|
||||
|
||||
// MMX vector types are always returned in XMM0.
|
||||
CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
|
||||
@ -293,7 +294,7 @@ def CC_X86_64_C : CallingConv<[
|
||||
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
|
||||
|
||||
// The first 8 FP/Vector arguments are passed in XMM registers.
|
||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasSSE1()",
|
||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
|
||||
|
||||
@ -318,7 +319,7 @@ def CC_X86_64_C : CallingConv<[
|
||||
|
||||
// Long doubles get stack slots whose size and alignment depends on the
|
||||
// subtarget.
|
||||
CCIfType<[f80], CCAssignToStack<0, 0>>,
|
||||
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
|
||||
|
||||
// Vectors get 16-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
|
||||
|
@ -296,6 +296,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::f128, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
|
||||
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
|
||||
@ -303,6 +304,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
|
||||
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
|
||||
@ -415,12 +417,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
|
||||
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
|
||||
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
|
||||
setOperationAction(ISD::SELECT , MVT::f128 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f128 , Custom);
|
||||
setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
|
||||
setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
|
||||
setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
|
||||
@ -619,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
|
||||
// Long double always uses X87.
|
||||
// Long double always uses X87, except f128 in MMX.
|
||||
if (!Subtarget->useSoftFloat()) {
|
||||
if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
|
||||
addRegisterClass(MVT::f128, &X86::FR128RegClass);
|
||||
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
|
||||
setOperationAction(ISD::FABS , MVT::f128, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f128, Custom);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
|
||||
}
|
||||
|
||||
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
|
||||
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
|
||||
@ -2363,7 +2375,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
EVT CopyVT = VA.getLocVT();
|
||||
|
||||
// If this is x86-64, and we disabled SSE, we can't return FP values
|
||||
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
|
||||
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
|
||||
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
|
||||
report_fatal_error("SSE register return with SSE disabled");
|
||||
}
|
||||
@ -2647,6 +2659,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||
RC = &X86::FR32RegClass;
|
||||
else if (RegVT == MVT::f64)
|
||||
RC = &X86::FR64RegClass;
|
||||
else if (RegVT == MVT::f128)
|
||||
RC = &X86::FR128RegClass;
|
||||
else if (RegVT.is512BitVector())
|
||||
RC = &X86::VR512RegClass;
|
||||
else if (RegVT.is256BitVector())
|
||||
@ -13410,6 +13424,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
SDLoc dl(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
bool IsF128 = (VT == MVT::f128);
|
||||
|
||||
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
|
||||
// decide if we should generate a 16-byte constant mask when we only need 4 or
|
||||
// 8 bytes for the scalar case.
|
||||
@ -13422,6 +13438,11 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
LogicVT = VT;
|
||||
EltVT = VT.getVectorElementType();
|
||||
NumElts = VT.getVectorNumElements();
|
||||
} else if (IsF128) {
|
||||
// SSE instructions are used for optimized f128 logical operations.
|
||||
LogicVT = MVT::f128;
|
||||
EltVT = VT;
|
||||
NumElts = 1;
|
||||
} else {
|
||||
// There are no scalar bitwise logical SSE/AVX instructions, so we
|
||||
// generate a 16-byte vector constant and logic op even for the scalar case.
|
||||
@ -13453,7 +13474,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
|
||||
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
|
||||
|
||||
if (VT.isVector())
|
||||
if (VT.isVector() || IsF128)
|
||||
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
|
||||
|
||||
// For the scalar case extend to a 128-bit vector, perform the logic op,
|
||||
@ -13472,6 +13493,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
SDLoc dl(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT SrcVT = Op1.getSimpleValueType();
|
||||
bool IsF128 = (VT == MVT::f128);
|
||||
|
||||
// If second operand is smaller, extend it first.
|
||||
if (SrcVT.bitsLT(VT)) {
|
||||
@ -13486,13 +13508,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
|
||||
// At this point the operands and the result should have the same
|
||||
// type, and that won't be f80 since that is not custom lowered.
|
||||
assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
|
||||
"Unexpected type in LowerFCOPYSIGN");
|
||||
|
||||
const fltSemantics &Sem =
|
||||
VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
|
||||
VT == MVT::f64 ? APFloat::IEEEdouble :
|
||||
(IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
|
||||
const unsigned SizeInBits = VT.getSizeInBits();
|
||||
|
||||
SmallVector<Constant *, 4> CV(
|
||||
VT == MVT::f64 ? 2 : 4,
|
||||
VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
|
||||
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
|
||||
|
||||
// First, clear all bits but the sign bit from the second operand (sign).
|
||||
@ -13505,12 +13530,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
// Perform all logic operations as 16-byte vectors because there are no
|
||||
// scalar FP logic instructions in SSE. This allows load folding of the
|
||||
// constants into the logic instructions.
|
||||
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
|
||||
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
|
||||
SDValue Mask1 =
|
||||
DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
|
||||
false, false, false, 16);
|
||||
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
|
||||
if (!IsF128)
|
||||
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
|
||||
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
|
||||
|
||||
// Next, clear the sign bit from the first operand (magnitude).
|
||||
@ -13519,8 +13545,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
APFloat APF = Op0CN->getValueAPF();
|
||||
// If the magnitude is a positive zero, the sign bit alone is enough.
|
||||
if (APF.isPosZero())
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return IsF128 ? SignBit :
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
APF.clearSign();
|
||||
CV[0] = ConstantFP::get(*Context, APF);
|
||||
} else {
|
||||
@ -13536,13 +13563,15 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
false, false, false, 16);
|
||||
// If the magnitude operand wasn't a constant, we need to AND out the sign.
|
||||
if (!isa<ConstantFPSDNode>(Op0)) {
|
||||
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
|
||||
if (!IsF128)
|
||||
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
|
||||
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
|
||||
}
|
||||
// OR the magnitude value with the sign bit.
|
||||
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return IsF128 ? Val :
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
@ -22158,6 +22187,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
return EmitLoweredTLSCall(MI, BB);
|
||||
case X86::CMOV_FR32:
|
||||
case X86::CMOV_FR64:
|
||||
case X86::CMOV_FR128:
|
||||
case X86::CMOV_GR8:
|
||||
case X86::CMOV_GR16:
|
||||
case X86::CMOV_GR32:
|
||||
@ -23821,7 +23851,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
// ignored in unsafe-math mode).
|
||||
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
|
||||
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
|
||||
VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
|
||||
VT != MVT::f80 && VT != MVT::f128 &&
|
||||
(TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
|
||||
(Subtarget->hasSSE2() ||
|
||||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||
@ -27946,6 +27977,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
case MVT::f64:
|
||||
case MVT::i64:
|
||||
return std::make_pair(0U, &X86::FR64RegClass);
|
||||
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
|
||||
// Vector types.
|
||||
case MVT::v16i8:
|
||||
case MVT::v8i16:
|
||||
@ -28058,6 +28090,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
// target independent register mapper will just pick the first match it can
|
||||
// find, ignoring the required type.
|
||||
|
||||
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
|
||||
if (VT == MVT::f32 || VT == MVT::i32)
|
||||
Res.second = &X86::FR32RegClass;
|
||||
else if (VT == MVT::f64 || VT == MVT::i64)
|
||||
|
@ -512,6 +512,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {
|
||||
|
||||
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
|
||||
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
|
||||
defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;
|
||||
defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
|
||||
defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
|
||||
defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
|
||||
|
@ -955,11 +955,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
|
||||
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
|
||||
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
|
||||
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
|
||||
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
|
||||
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
|
||||
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
|
||||
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
|
||||
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
|
||||
def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
|
||||
|
||||
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
|
||||
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
|
||||
|
@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>;
|
||||
def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>;
|
||||
}
|
||||
|
||||
// Bitcasts between 256-bit vector types. Return the original type since
|
||||
@ -8851,3 +8853,59 @@ let mayLoad = 1, Constraints
|
||||
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extra selection patterns for FR128, f128, f128mem
|
||||
|
||||
// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
|
||||
def : Pat<(store (f128 FR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
|
||||
|
||||
def : Pat<(loadf128 addr:$src),
|
||||
(COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
|
||||
|
||||
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
||||
def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
|
||||
FR128)>;
|
||||
|
||||
def : Pat<(X86fand FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
||||
def : Pat<(and FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
||||
def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
|
||||
FR128)>;
|
||||
|
||||
def : Pat<(X86for FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
||||
def : Pat<(or FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
||||
def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
|
||||
(COPY_TO_REGCLASS
|
||||
(XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
|
||||
FR128)>;
|
||||
|
||||
def : Pat<(X86fxor FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
||||
def : Pat<(xor FR128:$src1, FR128:$src2),
|
||||
(COPY_TO_REGCLASS
|
||||
(XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
|
||||
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
|
||||
|
@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
|
||||
|
||||
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
|
||||
|
||||
def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;
|
||||
|
||||
|
||||
// FIXME: This sets up the floating point register files as though they are f64
|
||||
// values, though they really are f80 values. This will cause us to spill
|
||||
|
47
test/CodeGen/X86/fp128-calling-conv.ll
Normal file
47
test/CodeGen/X86/fp128-calling-conv.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; __float128 myFP128 = 1.0L; // x86_64-linux-android
|
||||
@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
|
||||
|
||||
; The first few parameters are passed in registers and the other are on stack.
|
||||
|
||||
define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
|
||||
entry:
|
||||
ret fp128 %d0
|
||||
; CHECK-LABEL: TestParam_FP128_0:
|
||||
; CHECK-NOT: mov
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
|
||||
entry:
|
||||
ret fp128 %d1
|
||||
; CHECK-LABEL: TestParam_FP128_1:
|
||||
; CHECK: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
|
||||
entry:
|
||||
ret fp128 %d7
|
||||
; CHECK-LABEL: TestParam_FP128_7:
|
||||
; CHECK: movaps %xmm7, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
|
||||
entry:
|
||||
ret fp128 %d8
|
||||
; CHECK-LABEL: TestParam_FP128_8:
|
||||
; CHECK: movaps 8(%rsp), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
|
||||
entry:
|
||||
ret fp128 %d9
|
||||
; CHECK-LABEL: TestParam_FP128_9:
|
||||
; CHECK: movaps 24(%rsp), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
279
test/CodeGen/X86/fp128-cast.ll
Normal file
279
test/CodeGen/X86/fp128-cast.ll
Normal file
@ -0,0 +1,279 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; Check soft floating point conversion function calls.
|
||||
|
||||
@vi32 = common global i32 0, align 4
|
||||
@vi64 = common global i64 0, align 8
|
||||
@vu32 = common global i32 0, align 4
|
||||
@vu64 = common global i64 0, align 8
|
||||
@vf32 = common global float 0.000000e+00, align 4
|
||||
@vf64 = common global double 0.000000e+00, align 8
|
||||
@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
|
||||
|
||||
define void @TestFPExtF32_F128() {
|
||||
entry:
|
||||
%0 = load float, float* @vf32, align 4
|
||||
%conv = fpext float %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPExtF32_F128:
|
||||
; CHECK: movss vf32(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __extendsftf2
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPExtF64_F128() {
|
||||
entry:
|
||||
%0 = load double, double* @vf64, align 8
|
||||
%conv = fpext double %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPExtF64_F128:
|
||||
; CHECK: movsd vf64(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __extenddftf2
|
||||
; CHECK-NEXT: movapd %xmm0, vf128(%rip)
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define void @TestFPToSIF128_I32() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptosi fp128 %0 to i32
|
||||
store i32 %conv, i32* @vi32, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPToSIF128_I32:
|
||||
; CHECK: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __fixtfsi
|
||||
; CHECK-NEXT: movl %eax, vi32(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPToUIF128_U32() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptoui fp128 %0 to i32
|
||||
store i32 %conv, i32* @vu32, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPToUIF128_U32:
|
||||
; CHECK: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __fixunstfsi
|
||||
; CHECK-NEXT: movl %eax, vu32(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPToSIF128_I64() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptosi fp128 %0 to i32
|
||||
%conv1 = sext i32 %conv to i64
|
||||
store i64 %conv1, i64* @vi64, align 8
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPToSIF128_I64:
|
||||
; CHECK: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __fixtfsi
|
||||
; CHECK-NEXT: cltq
|
||||
; CHECK-NEXT: movq %rax, vi64(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPToUIF128_U64() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptoui fp128 %0 to i32
|
||||
%conv1 = zext i32 %conv to i64
|
||||
store i64 %conv1, i64* @vu64, align 8
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPToUIF128_U64:
|
||||
; CHECK: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __fixunstfsi
|
||||
; CHECK-NEXT: movl %eax, %eax
|
||||
; CHECK-NEXT: movq %rax, vu64(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPTruncF128_F32() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptrunc fp128 %0 to float
|
||||
store float %conv, float* @vf32, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPTruncF128_F32:
|
||||
; CHECK: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __trunctfsf2
|
||||
; CHECK-NEXT: movss %xmm0, vf32(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestFPTruncF128_F64() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%conv = fptrunc fp128 %0 to double
|
||||
store double %conv, double* @vf64, align 8
|
||||
ret void
|
||||
; CHECK-LABEL: TestFPTruncF128_F64:
|
||||
; CHECK: movapd vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __trunctfdf2
|
||||
; CHECK-NEXT: movsd %xmm0, vf64(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestSIToFPI32_F128() {
|
||||
entry:
|
||||
%0 = load i32, i32* @vi32, align 4
|
||||
%conv = sitofp i32 %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestSIToFPI32_F128:
|
||||
; CHECK: movl vi32(%rip), %edi
|
||||
; CHECK-NEXT: callq __floatsitf
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestUIToFPU32_F128() #2 {
|
||||
entry:
|
||||
%0 = load i32, i32* @vu32, align 4
|
||||
%conv = uitofp i32 %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestUIToFPU32_F128:
|
||||
; CHECK: movl vu32(%rip), %edi
|
||||
; CHECK-NEXT: callq __floatunsitf
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestSIToFPI64_F128(){
|
||||
entry:
|
||||
%0 = load i64, i64* @vi64, align 8
|
||||
%conv = sitofp i64 %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestSIToFPI64_F128:
|
||||
; CHECK: movq vi64(%rip), %rdi
|
||||
; CHECK-NEXT: callq __floatditf
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @TestUIToFPU64_F128() #2 {
|
||||
entry:
|
||||
%0 = load i64, i64* @vu64, align 8
|
||||
%conv = uitofp i64 %0 to fp128
|
||||
store fp128 %conv, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestUIToFPU64_F128:
|
||||
; CHECK: movq vu64(%rip), %rdi
|
||||
; CHECK-NEXT: callq __floatunditf
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define i32 @TestConst128(fp128 %v) {
|
||||
entry:
|
||||
%cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestConst128:
|
||||
; CHECK: movaps {{.*}}, %xmm1
|
||||
; CHECK-NEXT: callq __gttf2
|
||||
; CHECK-NEXT: test
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; C code:
|
||||
; struct TestBits_ieee_ext {
|
||||
; unsigned v1;
|
||||
; unsigned v2;
|
||||
; };
|
||||
; union TestBits_LDU {
|
||||
; FP128 ld;
|
||||
; struct TestBits_ieee_ext bits;
|
||||
; };
|
||||
; int TestBits128(FP128 ld) {
|
||||
; union TestBits_LDU u;
|
||||
; u.ld = ld * ld;
|
||||
; return ((u.bits.v1 | u.bits.v2) == 0);
|
||||
; }
|
||||
define i32 @TestBits128(fp128 %ld) {
|
||||
entry:
|
||||
%mul = fmul fp128 %ld, %ld
|
||||
%0 = bitcast fp128 %mul to i128
|
||||
%shift = lshr i128 %0, 32
|
||||
%or5 = or i128 %shift, %0
|
||||
%or = trunc i128 %or5 to i32
|
||||
%cmp = icmp eq i32 %or, 0
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestBits128:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: callq __multf3
|
||||
; CHECK-NEXT: movaps %xmm0, (%rsp)
|
||||
; CHECK-NEXT: movq (%rsp),
|
||||
; CHECK-NEXT: movq %
|
||||
; CHECK-NEXT: shrq $32,
|
||||
; CHECK: orl
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; If TestBits128 fails due to any llvm or clang change,
|
||||
; please make sure the original simplified C code will
|
||||
; be compiled into correct IL and assembly code, not
|
||||
; just this TestBits128 test case. Better yet, try to
|
||||
; test the whole libm and its test cases.
|
||||
}
|
||||
|
||||
; C code: (compiled with -target x86_64-linux-android)
|
||||
; typedef long double __float128;
|
||||
; __float128 TestPair128(unsigned long a, unsigned long b) {
|
||||
; unsigned __int128 n;
|
||||
; unsigned __int128 v1 = ((unsigned __int128)a << 64);
|
||||
; unsigned __int128 v2 = (unsigned __int128)b;
|
||||
; n = (v1 | v2) + 3;
|
||||
; return *(__float128*)&n;
|
||||
; }
|
||||
define fp128 @TestPair128(i64 %a, i64 %b) {
|
||||
entry:
|
||||
%conv = zext i64 %a to i128
|
||||
%shl = shl nuw i128 %conv, 64
|
||||
%conv1 = zext i64 %b to i128
|
||||
%or = or i128 %shl, %conv1
|
||||
%add = add i128 %or, 3
|
||||
%0 = bitcast i128 %add to fp128
|
||||
ret fp128 %0
|
||||
; CHECK-LABEL: TestPair128:
|
||||
; CHECK: addq $3, %rsi
|
||||
; CHECK: movq %rsi, -24(%rsp)
|
||||
; CHECK: movq %rdi, -16(%rsp)
|
||||
; CHECK: movaps -24(%rsp), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
define fp128 @TestTruncCopysign(fp128 %x, i32 %n) {
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %n, 50000
|
||||
br i1 %cmp, label %if.then, label %cleanup
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%conv = fptrunc fp128 %x to double
|
||||
%call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2
|
||||
%conv1 = fpext double %call to fp128
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.then
|
||||
%retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ]
|
||||
ret fp128 %retval.0
|
||||
; CHECK-LABEL: TestTruncCopysign:
|
||||
; CHECK: callq __trunctfdf2
|
||||
; CHECK-NEXT: andpd {{.*}}, %xmm0
|
||||
; CHECK-NEXT: orpd {{.*}}, %xmm0
|
||||
; CHECK-NEXT: callq __extenddftf2
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
declare double @copysign(double, double) #1
|
||||
|
||||
attributes #2 = { nounwind readnone }
|
96
test/CodeGen/X86/fp128-compare.ll
Normal file
96
test/CodeGen/X86/fp128-compare.ll
Normal file
@ -0,0 +1,96 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
define i32 @TestComp128GT(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp ogt fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128GT:
|
||||
; CHECK: callq __gttf2
|
||||
; CHECK: setg %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define i32 @TestComp128GE(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp oge fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128GE:
|
||||
; CHECK: callq __getf2
|
||||
; CHECK: testl %eax, %eax
|
||||
; CHECK: setns %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp olt fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128LT:
|
||||
; CHECK: callq __lttf2
|
||||
; CHECK-NEXT: shrl $31, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; The 'shrl' is a special optimization in llvm to combine
|
||||
; the effect of 'fcmp olt' and 'zext'. The main purpose is
|
||||
; to test soften call to __lttf2.
|
||||
}
|
||||
|
||||
define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp ole fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128LE:
|
||||
; CHECK: callq __letf2
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK: setle %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp oeq fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128EQ:
|
||||
; CHECK: callq __eqtf2
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK: sete %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define i32 @TestComp128NE(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%cmp = fcmp une fp128 %d1, %d2
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
; CHECK-LABEL: TestComp128NE:
|
||||
; CHECK: callq __netf2
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK: setne %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define fp128 @TestMax(fp128 %x, fp128 %y) {
|
||||
entry:
|
||||
%cmp = fcmp ogt fp128 %x, %y
|
||||
%cond = select i1 %cmp, fp128 %x, fp128 %y
|
||||
ret fp128 %cond
|
||||
; CHECK-LABEL: TestMax:
|
||||
; CHECK: movaps %xmm1
|
||||
; CHECK: movaps %xmm0
|
||||
; CHECK: callq __gttf2
|
||||
; CHECK: movaps {{.*}}, %xmm0
|
||||
; CHECK: testl %eax, %eax
|
||||
; CHECK: movaps {{.*}}, %xmm0
|
||||
; CHECK: retq
|
||||
}
|
320
test/CodeGen/X86/fp128-i128.ll
Normal file
320
test/CodeGen/X86/fp128-i128.ll
Normal file
@ -0,0 +1,320 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; These tests were generated from simplified libm C code.
|
||||
; When compiled for the x86_64-linux-android target,
|
||||
; long double is mapped to f128 type that should be passed
|
||||
; in SSE registers. When the f128 type calling convention
|
||||
; problem was fixed, old llvm code failed to handle f128 values
|
||||
; in several f128/i128 type operations. These unit tests hopefully
|
||||
; will catch regression in any future change in this area.
|
||||
; To modified or enhance these test cases, please consult libm
|
||||
; code pattern and compile with -target x86_64-linux-android
|
||||
; to generate IL. The __float128 keyword if not accepted by
|
||||
; clang, just define it to "long double".
|
||||
;
|
||||
|
||||
; typedef long double __float128;
|
||||
; union IEEEl2bits {
|
||||
; __float128 e;
|
||||
; struct {
|
||||
; unsigned long manl :64;
|
||||
; unsigned long manh :48;
|
||||
; unsigned int exp :15;
|
||||
; unsigned int sign :1;
|
||||
; } bits;
|
||||
; struct {
|
||||
; unsigned long manl :64;
|
||||
; unsigned long manh :48;
|
||||
; unsigned int expsign :16;
|
||||
; } xbits;
|
||||
; };
|
||||
|
||||
; C code:
|
||||
; void foo(__float128 x);
|
||||
; void TestUnionLD1(__float128 s, unsigned long n) {
|
||||
; union IEEEl2bits u;
|
||||
; __float128 w;
|
||||
; u.e = s;
|
||||
; u.bits.manh = n;
|
||||
; w = u.e;
|
||||
; foo(w);
|
||||
; }
|
||||
define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %s to i128
|
||||
%1 = zext i64 %n to i128
|
||||
%bf.value = shl nuw i128 %1, 64
|
||||
%bf.shl = and i128 %bf.value, 5192296858534809181786422619668480
|
||||
%bf.clear = and i128 %0, -5192296858534809181786422619668481
|
||||
%bf.set = or i128 %bf.shl, %bf.clear
|
||||
%2 = bitcast i128 %bf.set to fp128
|
||||
tail call void @foo(fp128 %2) #2
|
||||
ret void
|
||||
; CHECK-LABEL: TestUnionLD1:
|
||||
; CHECK: movaps %xmm0, -24(%rsp)
|
||||
; CHECK-NEXT: movq -24(%rsp), %rax
|
||||
; CHECK-NEXT: movabsq $281474976710655, %rcx
|
||||
; CHECK-NEXT: andq %rdi, %rcx
|
||||
; CHECK-NEXT: movabsq $-281474976710656, %rdx
|
||||
; CHECK-NEXT: andq -16(%rsp), %rdx
|
||||
; CHECK-NEXT: movq %rax, -40(%rsp)
|
||||
; CHECK-NEXT: orq %rcx, %rdx
|
||||
; CHECK-NEXT: movq %rdx, -32(%rsp)
|
||||
; CHECK-NEXT: movaps -40(%rsp), %xmm0
|
||||
; CHECK-NEXT: jmp foo
|
||||
}
|
||||
|
||||
; C code:
|
||||
; __float128 TestUnionLD2(__float128 s) {
|
||||
; union IEEEl2bits u;
|
||||
; __float128 w;
|
||||
; u.e = s;
|
||||
; u.bits.manl = 0;
|
||||
; w = u.e;
|
||||
; return w;
|
||||
; }
|
||||
define fp128 @TestUnionLD2(fp128 %s) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %s to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
%1 = bitcast i128 %bf.clear to fp128
|
||||
ret fp128 %1
|
||||
; CHECK-LABEL: TestUnionLD2:
|
||||
; CHECK: movaps %xmm0, -24(%rsp)
|
||||
; CHECK-NEXT: movq -16(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, -32(%rsp)
|
||||
; CHECK-NEXT: movq $0, -40(%rsp)
|
||||
; CHECK-NEXT: movaps -40(%rsp), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
; C code:
|
||||
; __float128 TestI128_1(__float128 x)
|
||||
; {
|
||||
; union IEEEl2bits z;
|
||||
; z.e = x;
|
||||
; z.bits.sign = 0;
|
||||
; return (z.e < 0.1L) ? 1.0L : 2.0L;
|
||||
; }
|
||||
define fp128 @TestI128_1(fp128 %x) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, 170141183460469231731687303715884105727
|
||||
%1 = bitcast i128 %bf.clear to fp128
|
||||
%cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999
|
||||
%cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000
|
||||
ret fp128 %cond
|
||||
; CHECK-LABEL: TestI128_1:
|
||||
; CHECK: movaps %xmm0,
|
||||
; CHECK: movabsq $9223372036854775807,
|
||||
; CHECK: callq __lttf2
|
||||
; CHECK: testl %eax, %eax
|
||||
; CHECK: movaps {{.*}}, %xmm0
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; C code:
|
||||
; __float128 TestI128_2(__float128 x, __float128 y)
|
||||
; {
|
||||
; unsigned short hx;
|
||||
; union IEEEl2bits ge_u;
|
||||
; ge_u.e = x;
|
||||
; hx = ge_u.xbits.expsign;
|
||||
; return (hx & 0x8000) == 0 ? x : y;
|
||||
; }
|
||||
define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%cmp = icmp sgt i128 %0, -1
|
||||
%cond = select i1 %cmp, fp128 %x, fp128 %y
|
||||
ret fp128 %cond
|
||||
; CHECK-LABEL: TestI128_2:
|
||||
; CHECK: movaps %xmm0, -24(%rsp)
|
||||
; CHECK-NEXT: cmpq $0, -16(%rsp)
|
||||
; CHECK-NEXT: jns
|
||||
; CHECK: movaps %xmm1, %xmm0
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; C code:
|
||||
; __float128 TestI128_3(__float128 x, int *ex)
|
||||
; {
|
||||
; union IEEEl2bits u;
|
||||
; u.e = x;
|
||||
; if (u.bits.exp == 0) {
|
||||
; u.e *= 0x1.0p514;
|
||||
; u.bits.exp = 0x3ffe;
|
||||
; }
|
||||
; return (u.e);
|
||||
; }
|
||||
define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.cast = and i128 %0, 170135991163610696904058773219554885632
|
||||
%cmp = icmp eq i128 %bf.cast, 0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%mul = fmul fp128 %x, 0xL00000000000000004201000000000000
|
||||
%1 = bitcast fp128 %mul to i128
|
||||
%bf.clear4 = and i128 %1, -170135991163610696904058773219554885633
|
||||
%bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %entry
|
||||
%u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ]
|
||||
%2 = bitcast i128 %u.sroa.0.0 to fp128
|
||||
ret fp128 %2
|
||||
; CHECK-LABEL: TestI128_3:
|
||||
; CHECK: movaps %xmm0,
|
||||
; CHECK: movabsq $9223090561878065152,
|
||||
; CHECK: testq
|
||||
; CHECK: callq __multf3
|
||||
; CHECK-NEXT: movaps %xmm0
|
||||
; CHECK: movabsq $-9223090561878065153,
|
||||
; CHECK: movabsq $4611123068473966592,
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; C code:
|
||||
; __float128 TestI128_4(__float128 x)
|
||||
; {
|
||||
; union IEEEl2bits u;
|
||||
; __float128 df;
|
||||
; u.e = x;
|
||||
; u.xbits.manl = 0;
|
||||
; df = u.e;
|
||||
; return x + df;
|
||||
; }
|
||||
define fp128 @TestI128_4(fp128 %x) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
%1 = bitcast i128 %bf.clear to fp128
|
||||
%add = fadd fp128 %1, %x
|
||||
ret fp128 %add
|
||||
; CHECK-LABEL: TestI128_4:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, 16(%rsp)
|
||||
; CHECK-NEXT: movq 24(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, 8(%rsp)
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: callq __addtf3
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
@v128 = common global i128 0, align 16
|
||||
@v128_2 = common global i128 0, align 16
|
||||
|
||||
; C code:
|
||||
; unsigned __int128 v128, v128_2;
|
||||
; void TestShift128_2() {
|
||||
; v128 = ((v128 << 96) | v128_2);
|
||||
; }
|
||||
define void @TestShift128_2() #2 {
|
||||
entry:
|
||||
%0 = load i128, i128* @v128, align 16
|
||||
%shl = shl i128 %0, 96
|
||||
%1 = load i128, i128* @v128_2, align 16
|
||||
%or = or i128 %shl, %1
|
||||
store i128 %or, i128* @v128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestShift128_2:
|
||||
; CHECK: movq v128(%rip), %rax
|
||||
; CHECK-NEXT: shlq $32, %rax
|
||||
; CHECK-NEXT: movq v128_2(%rip), %rcx
|
||||
; CHECK-NEXT: orq v128_2+8(%rip), %rax
|
||||
; CHECK-NEXT: movq %rcx, v128(%rip)
|
||||
; CHECK-NEXT: movq %rax, v128+8(%rip)
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
define fp128 @acosl(fp128 %x) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%bf.clear = and i128 %0, -18446744073709551616
|
||||
%1 = bitcast i128 %bf.clear to fp128
|
||||
%add = fadd fp128 %1, %x
|
||||
ret fp128 %add
|
||||
; CHECK-LABEL: acosl:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, 16(%rsp)
|
||||
; CHECK-NEXT: movq 24(%rsp), %rax
|
||||
; CHECK-NEXT: movq %rax, 8(%rsp)
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: movaps (%rsp), %xmm0
|
||||
; CHECK-NEXT: callq __addtf3
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; Compare i128 values and check i128 constants.
|
||||
define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
|
||||
entry:
|
||||
%0 = bitcast fp128 %x to i128
|
||||
%cmp = icmp sgt i128 %0, -1
|
||||
%cond = select i1 %cmp, fp128 %x, fp128 %y
|
||||
ret fp128 %cond
|
||||
; CHECK-LABEL: TestComp:
|
||||
; CHECK: movaps %xmm0, -24(%rsp)
|
||||
; CHECK-NEXT: cmpq $0, -16(%rsp)
|
||||
; CHECK-NEXT: jns
|
||||
; CHECK: movaps %xmm1, %xmm0
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
declare void @foo(fp128) #1
|
||||
|
||||
; Test logical operations on fp128 values.
|
||||
define fp128 @TestFABS_LD(fp128 %x) #0 {
|
||||
entry:
|
||||
%call = tail call fp128 @fabsl(fp128 %x) #2
|
||||
ret fp128 %call
|
||||
; CHECK-LABEL: TestFABS_LD
|
||||
; CHECK: andps {{.*}}, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
declare fp128 @fabsl(fp128) #1
|
||||
|
||||
declare fp128 @copysignl(fp128, fp128) #1
|
||||
|
||||
; Test more complicated logical operations generated from copysignl.
|
||||
define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
|
||||
entry:
|
||||
%z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
|
||||
%z.real = load fp128, fp128* %z.realp, align 16
|
||||
%z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1
|
||||
%z.imag4 = load fp128, fp128* %z.imagp, align 16
|
||||
%cmp = fcmp ogt fp128 %z.real, %z.imag4
|
||||
%sub = fsub fp128 %z.imag4, %z.imag4
|
||||
br i1 %cmp, label %if.then, label %cleanup
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%call = tail call fp128 @fabsl(fp128 %sub) #2
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.then
|
||||
%z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ]
|
||||
%call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ]
|
||||
%call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2
|
||||
%0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0
|
||||
%1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1
|
||||
store fp128 %call.sink, fp128* %0, align 16
|
||||
store fp128 %call5, fp128* %1, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: TestCopySign
|
||||
; CHECK-NOT: call
|
||||
; CHECK: callq __subtf3
|
||||
; CHECK-NOT: call
|
||||
; CHECK: callq __gttf2
|
||||
; CHECK-NOT: call
|
||||
; CHECK: andps {{.*}}, %xmm0
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind readnone }
|
107
test/CodeGen/X86/fp128-libcalls.ll
Normal file
107
test/CodeGen/X86/fp128-libcalls.ll
Normal file
@ -0,0 +1,107 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; Check all soft floating point library function calls.
|
||||
|
||||
@vf64 = common global double 0.000000e+00, align 8
|
||||
@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
|
||||
|
||||
define void @Test128Add(fp128 %d1, fp128 %d2) {
|
||||
entry:
|
||||
%add = fadd fp128 %d1, %d2
|
||||
store fp128 %add, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128Add:
|
||||
; CHECK: callq __addtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128_1Add(fp128 %d1){
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%add = fadd fp128 %0, %d1
|
||||
store fp128 %add, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128_1Add:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __addtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128Sub(fp128 %d1, fp128 %d2){
|
||||
entry:
|
||||
%sub = fsub fp128 %d1, %d2
|
||||
store fp128 %sub, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128Sub:
|
||||
; CHECK: callq __subtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128_1Sub(fp128 %d1){
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%sub = fsub fp128 %0, %d1
|
||||
store fp128 %sub, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128_1Sub:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __subtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128Mul(fp128 %d1, fp128 %d2){
|
||||
entry:
|
||||
%mul = fmul fp128 %d1, %d2
|
||||
store fp128 %mul, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128Mul:
|
||||
; CHECK: callq __multf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128_1Mul(fp128 %d1){
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%mul = fmul fp128 %0, %d1
|
||||
store fp128 %mul, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128_1Mul:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __multf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128Div(fp128 %d1, fp128 %d2){
|
||||
entry:
|
||||
%div = fdiv fp128 %d1, %d2
|
||||
store fp128 %div, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128Div:
|
||||
; CHECK: callq __divtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @Test128_1Div(fp128 %d1){
|
||||
entry:
|
||||
%0 = load fp128, fp128* @vf128, align 16
|
||||
%div = fdiv fp128 %0, %d1
|
||||
store fp128 %div, fp128* @vf128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: Test128_1Div:
|
||||
; CHECK: movaps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps vf128(%rip), %xmm0
|
||||
; CHECK-NEXT: callq __divtf3
|
||||
; CHECK-NEXT: movaps %xmm0, vf128(%rip)
|
||||
; CHECK: retq
|
||||
}
|
35
test/CodeGen/X86/fp128-load.ll
Normal file
35
test/CodeGen/X86/fp128-load.ll
Normal file
@ -0,0 +1,35 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; __float128 myFP128 = 1.0L; // x86_64-linux-android
|
||||
@my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16
|
||||
|
||||
define fp128 @get_fp128() {
|
||||
entry:
|
||||
%0 = load fp128, fp128* @my_fp128, align 16
|
||||
ret fp128 %0
|
||||
; CHECK-LABEL: get_fp128:
|
||||
; CHECK: movaps my_fp128(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
@TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4
|
||||
|
||||
define fp128 @TestLoadExtend(fp128 %x, i32 %n) {
|
||||
entry:
|
||||
%idxprom = sext i32 %n to i64
|
||||
%arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%conv = fpext float %0 to fp128
|
||||
ret fp128 %conv
|
||||
; CHECK-LABEL: TestLoadExtend:
|
||||
; CHECK: movslq %edi, %rax
|
||||
; CHECK-NEXT: movss TestLoadExtend.data(,%rax,4), %xmm0
|
||||
; CHECK-NEXT: callq __extendsftf2
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
; CHECK-LABEL: my_fp128:
|
||||
; CHECK-NEXT: .quad 0
|
||||
; CHECK-NEXT: .quad 4611404543450677248
|
||||
; CHECK-NEXT: .size my_fp128, 16
|
14
test/CodeGen/X86/fp128-store.ll
Normal file
14
test/CodeGen/X86/fp128-store.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
|
||||
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
|
||||
|
||||
; __float128 myFP128 = 1.0L; // x86_64-linux-android
|
||||
@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
|
||||
|
||||
define void @set_FP128(fp128 %x) {
|
||||
entry:
|
||||
store fp128 %x, fp128* @myFP128, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: set_FP128:
|
||||
; CHECK: movaps %xmm0, myFP128(%rip)
|
||||
; CHECK-NEXT: retq
|
||||
}
|
@ -1,8 +1,14 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+sse2,+soft-float | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse,+soft-float \
|
||||
; RUN: | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2,+soft-float \
|
||||
; RUN: | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse \
|
||||
; RUN: | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 \
|
||||
; RUN: | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+mmx,+sse2,+soft-float | FileCheck %s
|
||||
|
||||
; CHECK-NOT: xmm{[0-9]+}
|
||||
; CHECK-NOT: xmm{{[0-9]+}}
|
||||
|
||||
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
|
||||
|
||||
@ -15,6 +21,8 @@ entry:
|
||||
call void @bar(%struct.__va_list_tag* %va3) nounwind
|
||||
call void @llvm.va_end(i8* %va12)
|
||||
ret i32 undef
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: ret{{[lq]}}
|
||||
}
|
||||
|
||||
declare void @llvm.va_start(i8*) nounwind
|
||||
@ -27,4 +35,23 @@ define float @t2(float %a, float %b) nounwind readnone {
|
||||
entry:
|
||||
%0 = fadd float %a, %b ; <float> [#uses=1]
|
||||
ret float %0
|
||||
; CHECK-LABEL: t2:
|
||||
; SOFT1-NOT: xmm{{[0-9]+}}
|
||||
; SOFT2-NOT: xmm{{[0-9]+}}
|
||||
; SSE1: xmm{{[0-9]+}}
|
||||
; SSE2: xmm{{[0-9]+}}
|
||||
; CHECK: ret{{[lq]}}
|
||||
}
|
||||
|
||||
; soft-float means no SSE instruction and passing fp128 as pair of i64.
|
||||
define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone {
|
||||
entry:
|
||||
%0 = fadd fp128 %b, %a
|
||||
ret fp128 %0
|
||||
; CHECK-LABEL: t3:
|
||||
; SOFT1-NOT: xmm{{[0-9]+}}
|
||||
; SOFT2-NOT: xmm{{[0-9]+}}
|
||||
; SSE1: xmm{{[0-9]+}}
|
||||
; SSE2: xmm{{[0-9]+}}
|
||||
; CHECK: ret{{[lq]}}
|
||||
}
|
||||
|
@ -951,6 +951,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
||||
TYPE("f128mem", TYPE_M128)
|
||||
TYPE("f256mem", TYPE_M256)
|
||||
TYPE("f512mem", TYPE_M512)
|
||||
TYPE("FR128", TYPE_XMM128)
|
||||
TYPE("FR64", TYPE_XMM64)
|
||||
TYPE("FR64X", TYPE_XMM64)
|
||||
TYPE("f64mem", TYPE_M64FP)
|
||||
@ -1069,6 +1070,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
|
||||
// register IDs in 8-bit immediates nowadays.
|
||||
ENCODING("FR32", ENCODING_IB)
|
||||
ENCODING("FR64", ENCODING_IB)
|
||||
ENCODING("FR128", ENCODING_IB)
|
||||
ENCODING("VR128", ENCODING_IB)
|
||||
ENCODING("VR256", ENCODING_IB)
|
||||
ENCODING("FR32X", ENCODING_IB)
|
||||
@ -1091,6 +1093,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
|
||||
ENCODING("GR8", ENCODING_RM)
|
||||
ENCODING("VR128", ENCODING_RM)
|
||||
ENCODING("VR128X", ENCODING_RM)
|
||||
ENCODING("FR128", ENCODING_RM)
|
||||
ENCODING("FR64", ENCODING_RM)
|
||||
ENCODING("FR32", ENCODING_RM)
|
||||
ENCODING("FR64X", ENCODING_RM)
|
||||
@ -1120,6 +1123,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
|
||||
ENCODING("GR64", ENCODING_REG)
|
||||
ENCODING("GR8", ENCODING_REG)
|
||||
ENCODING("VR128", ENCODING_REG)
|
||||
ENCODING("FR128", ENCODING_REG)
|
||||
ENCODING("FR64", ENCODING_REG)
|
||||
ENCODING("FR32", ENCODING_REG)
|
||||
ENCODING("VR64", ENCODING_REG)
|
||||
@ -1157,6 +1161,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
|
||||
ENCODING("GR32", ENCODING_VVVV)
|
||||
ENCODING("GR64", ENCODING_VVVV)
|
||||
ENCODING("FR32", ENCODING_VVVV)
|
||||
ENCODING("FR128", ENCODING_VVVV)
|
||||
ENCODING("FR64", ENCODING_VVVV)
|
||||
ENCODING("VR128", ENCODING_VVVV)
|
||||
ENCODING("VR256", ENCODING_VVVV)
|
||||
|
Loading…
Reference in New Issue
Block a user