mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Add custom lowering for lrint/llrint to either cvtss2si/cvtsd2si or fist.
lrint/llrint are defined as rounding using the current rounding mode. Numbers that can't be converted raise FE_INVALID and an implementation defined value is returned. They may also write to errno. I believe this means we can use cvtss2si/cvtsd2si or fist to convert as long as -fno-math-errno is passed on the command line. Clang will leave them as libcalls if errno is enabled so they won't become ISD::LRINT/LLRINT in SelectionDAG. For 64-bit results on a 32-bit target we can't use cvtss2si/cvtsd2si but we can use fist since it can write to a 64-bit memory location. Though maybe we could consider using vcvtps2qq/vcvtpd2qq on avx512dq targets? gcc also does this optimization. I think we might be able to do this with STRICT_LRINT/LLRINT as well, but I've left that for future work. Differential Revision: https://reviews.llvm.org/D73859
This commit is contained in:
parent
65d778ed6e
commit
ccc67b1eb8
@ -270,6 +270,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
|
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
|
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
|
||||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
|
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::LRINT, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::LRINT, MVT::f64, Custom);
|
||||||
|
setOperationAction(ISD::LLRINT, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::LLRINT, MVT::f64, Custom);
|
||||||
|
|
||||||
|
if (!Subtarget.is64Bit()) {
|
||||||
|
setOperationAction(ISD::LRINT, MVT::i64, Custom);
|
||||||
|
setOperationAction(ISD::LLRINT, MVT::i64, Custom);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle address space casts between mixed sized pointers.
|
// Handle address space casts between mixed sized pointers.
|
||||||
@ -663,8 +673,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||||||
setOperationAction(ISD::FMA, MVT::f80, Expand);
|
setOperationAction(ISD::FMA, MVT::f80, Expand);
|
||||||
setOperationAction(ISD::LROUND, MVT::f80, Expand);
|
setOperationAction(ISD::LROUND, MVT::f80, Expand);
|
||||||
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
|
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
|
||||||
setOperationAction(ISD::LRINT, MVT::f80, Expand);
|
setOperationAction(ISD::LRINT, MVT::f80, Custom);
|
||||||
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
|
setOperationAction(ISD::LLRINT, MVT::f80, Custom);
|
||||||
|
|
||||||
// Handle constrained floating-point operations of scalar.
|
// Handle constrained floating-point operations of scalar.
|
||||||
setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
|
setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
|
||||||
@ -20306,6 +20316,63 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
|
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
SDValue Src = Op.getOperand(0);
|
||||||
|
MVT SrcVT = Src.getSimpleValueType();
|
||||||
|
|
||||||
|
// If the source is in an SSE register, the node is Legal.
|
||||||
|
if (isScalarFPTypeInSSEReg(SrcVT))
|
||||||
|
return Op;
|
||||||
|
|
||||||
|
return LRINT_LLRINTHelper(Op.getNode(), DAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
EVT DstVT = N->getValueType(0);
|
||||||
|
SDValue Src = N->getOperand(0);
|
||||||
|
EVT SrcVT = Src.getValueType();
|
||||||
|
|
||||||
|
if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
|
||||||
|
// f16 must be promoted before using the lowering in this routine.
|
||||||
|
// fp128 does not use this lowering.
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDLoc DL(N);
|
||||||
|
SDValue Chain = DAG.getEntryNode();
|
||||||
|
|
||||||
|
bool UseSSE = isScalarFPTypeInSSEReg(SrcVT);
|
||||||
|
|
||||||
|
// If we're converting from SSE, the stack slot needs to hold both types.
|
||||||
|
// Otherwise it only needs to hold the DstVT.
|
||||||
|
EVT OtherVT = UseSSE ? SrcVT : DstVT;
|
||||||
|
SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
|
||||||
|
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
|
||||||
|
MachinePointerInfo MPI =
|
||||||
|
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
|
||||||
|
|
||||||
|
if (UseSSE) {
|
||||||
|
assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
|
||||||
|
Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
|
||||||
|
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
|
||||||
|
SDValue Ops[] = { Chain, StackPtr };
|
||||||
|
|
||||||
|
Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
|
||||||
|
/*Align*/0, MachineMemOperand::MOLoad);
|
||||||
|
Chain = Src.getValue(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue StoreOps[] = { Chain, Src, StackPtr };
|
||||||
|
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL,
|
||||||
|
DAG.getVTList(MVT::Other), StoreOps,
|
||||||
|
DstVT, MPI, /*Align*/0,
|
||||||
|
MachineMemOperand::MOStore);
|
||||||
|
|
||||||
|
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||||
bool IsStrict = Op->isStrictFPOpcode();
|
bool IsStrict = Op->isStrictFPOpcode();
|
||||||
|
|
||||||
@ -28637,6 +28704,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
|
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
|
||||||
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
||||||
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
|
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
|
||||||
|
case ISD::LRINT:
|
||||||
|
case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG);
|
||||||
case ISD::SETCC:
|
case ISD::SETCC:
|
||||||
case ISD::STRICT_FSETCC:
|
case ISD::STRICT_FSETCC:
|
||||||
case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
|
case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
|
||||||
@ -29191,6 +29260,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case ISD::LRINT:
|
||||||
|
case ISD::LLRINT: {
|
||||||
|
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
|
||||||
|
Results.push_back(V);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
case ISD::SINT_TO_FP:
|
case ISD::SINT_TO_FP:
|
||||||
case ISD::STRICT_SINT_TO_FP:
|
case ISD::STRICT_SINT_TO_FP:
|
||||||
case ISD::UINT_TO_FP:
|
case ISD::UINT_TO_FP:
|
||||||
|
@ -1334,6 +1334,7 @@ namespace llvm {
|
|||||||
|
|
||||||
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
|
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
|
||||||
SDValue &Chain) const;
|
SDValue &Chain) const;
|
||||||
|
SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
@ -1357,6 +1358,7 @@ namespace llvm {
|
|||||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
@ -7233,6 +7233,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
|
|||||||
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
|
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
|
||||||
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||||
|
|
||||||
|
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
|
||||||
|
X86VectorVTInfo DstVT, SDNode OpNode,
|
||||||
|
X86FoldableSchedWrite sched,
|
||||||
|
string aliasStr> {
|
||||||
|
let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
|
||||||
|
let isCodeGenOnly = 1 in {
|
||||||
|
def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
|
||||||
|
EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
|
||||||
|
def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
|
||||||
|
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
|
||||||
|
}
|
||||||
|
} // Predicates = [HasAVX512]
|
||||||
|
}
|
||||||
|
|
||||||
|
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
|
||||||
|
lrint, WriteCvtSS2I,
|
||||||
|
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
|
||||||
|
llrint, WriteCvtSS2I,
|
||||||
|
"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
|
||||||
|
lrint, WriteCvtSD2I,
|
||||||
|
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
|
||||||
|
llrint, WriteCvtSD2I,
|
||||||
|
"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
|
||||||
|
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
|
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
|
||||||
// which produce unnecessary vmovs{s,d} instructions
|
// which produce unnecessary vmovs{s,d} instructions
|
||||||
let Predicates = [HasAVX512] in {
|
let Predicates = [HasAVX512] in {
|
||||||
|
@ -74,6 +74,11 @@ def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
|
|||||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(X86fist node:$val, node:$ptr), [{
|
||||||
|
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||||
|
}]>;
|
||||||
|
|
||||||
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
|
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
(X86fist node:$val, node:$ptr), [{
|
(X86fist node:$val, node:$ptr), [{
|
||||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||||
@ -525,14 +530,20 @@ def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
|
|||||||
|
|
||||||
let mayStore = 1, hasSideEffects = 0 in {
|
let mayStore = 1, hasSideEffects = 0 in {
|
||||||
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
|
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
|
||||||
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
|
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
|
||||||
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
|
[(X86fist32 RFP32:$src, addr:$op)]>;
|
||||||
|
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
|
||||||
|
[(X86fist64 RFP32:$src, addr:$op)]>;
|
||||||
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
|
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
|
||||||
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
|
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
|
||||||
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
|
[(X86fist32 RFP64:$src, addr:$op)]>;
|
||||||
|
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
|
||||||
|
[(X86fist64 RFP64:$src, addr:$op)]>;
|
||||||
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
|
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
|
||||||
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
|
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
|
||||||
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
|
[(X86fist32 RFP80:$src, addr:$op)]>;
|
||||||
|
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
|
||||||
|
[(X86fist64 RFP80:$src, addr:$op)]>;
|
||||||
} // mayStore
|
} // mayStore
|
||||||
} // SchedRW, Uses = [FPCW]
|
} // SchedRW, Uses = [FPCW]
|
||||||
|
|
||||||
@ -791,9 +802,6 @@ def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
|
|||||||
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
|
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
|
||||||
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
|
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
|
||||||
|
|
||||||
// Used to conv. between f80 and i64 for i64 atomic loads.
|
|
||||||
def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
|
|
||||||
|
|
||||||
// FP extensions map onto simple pseudo-value conversions if they are to/from
|
// FP extensions map onto simple pseudo-value conversions if they are to/from
|
||||||
// the FP stack.
|
// the FP stack.
|
||||||
def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
|
def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
|
||||||
|
@ -884,6 +884,23 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf6
|
|||||||
"cvttsd2si", "cvttsd2si",
|
"cvttsd2si", "cvttsd2si",
|
||||||
WriteCvtSD2I, SSEPackedDouble>,
|
WriteCvtSD2I, SSEPackedDouble>,
|
||||||
XD, VEX, VEX_W, VEX_LIG;
|
XD, VEX, VEX_W, VEX_LIG;
|
||||||
|
|
||||||
|
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
|
||||||
|
"cvtss2si", "cvtss2si",
|
||||||
|
WriteCvtSS2I, SSEPackedSingle>,
|
||||||
|
XS, VEX, VEX_LIG;
|
||||||
|
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
|
||||||
|
"cvtss2si", "cvtss2si",
|
||||||
|
WriteCvtSS2I, SSEPackedSingle>,
|
||||||
|
XS, VEX, VEX_W, VEX_LIG;
|
||||||
|
defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
|
||||||
|
"cvtsd2si", "cvtsd2si",
|
||||||
|
WriteCvtSD2I, SSEPackedDouble>,
|
||||||
|
XD, VEX, VEX_LIG;
|
||||||
|
defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
|
||||||
|
"cvtsd2si", "cvtsd2si",
|
||||||
|
WriteCvtSD2I, SSEPackedDouble>,
|
||||||
|
XD, VEX, VEX_W, VEX_LIG;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The assembler can recognize rr 64-bit instructions by seeing a rxx
|
// The assembler can recognize rr 64-bit instructions by seeing a rxx
|
||||||
@ -923,6 +940,12 @@ let Predicates = [UseAVX] in {
|
|||||||
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
||||||
def : Pat<(f64 (any_sint_to_fp GR64:$src)),
|
def : Pat<(f64 (any_sint_to_fp GR64:$src)),
|
||||||
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let isCodeGenOnly = 1 in {
|
let isCodeGenOnly = 1 in {
|
||||||
@ -938,6 +961,20 @@ defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
|
|||||||
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
|
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
|
||||||
"cvttsd2si", "cvttsd2si",
|
"cvttsd2si", "cvttsd2si",
|
||||||
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
|
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
|
||||||
|
|
||||||
|
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
|
||||||
|
"cvtss2si", "cvtss2si",
|
||||||
|
WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
|
||||||
|
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
|
||||||
|
"cvtss2si", "cvtss2si",
|
||||||
|
WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
|
||||||
|
defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
|
||||||
|
"cvtsd2si", "cvtsd2si",
|
||||||
|
WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
|
||||||
|
defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
|
||||||
|
"cvtsd2si", "cvtsd2si",
|
||||||
|
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
|
||||||
|
|
||||||
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
|
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
|
||||||
"cvtsi2ss", "cvtsi2ss{l}",
|
"cvtsi2ss", "cvtsi2ss{l}",
|
||||||
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
|
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
|
||||||
@ -952,6 +989,16 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
|
|||||||
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
|
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
|
||||||
} // isCodeGenOnly = 1
|
} // isCodeGenOnly = 1
|
||||||
|
|
||||||
|
let Predicates = [UseSSE1] in {
|
||||||
|
def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [UseSSE2] in {
|
||||||
|
def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
|
||||||
|
def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
|
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
|
||||||
// and/or XMM operand(s).
|
// and/or XMM operand(s).
|
||||||
|
|
||||||
|
@ -1,65 +1,153 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
|
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
|
||||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
|
||||||
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
|
||||||
|
|
||||||
define i64 @testmsxs(float %x) {
|
define i64 @testmsxs(float %x) {
|
||||||
; X86-LABEL: testmsxs:
|
; X86-NOSSE-LABEL: testmsxs:
|
||||||
; X86: # %bb.0: # %entry
|
; X86-NOSSE: # %bb.0: # %entry
|
||||||
; X86-NEXT: pushl %eax
|
; X86-NOSSE-NEXT: pushl %ebp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||||
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
|
||||||
; X86-NEXT: fstps (%esp)
|
; X86-NOSSE-NEXT: movl %esp, %ebp
|
||||||
; X86-NEXT: calll llrintf
|
; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
|
||||||
; X86-NEXT: popl %ecx
|
; X86-NOSSE-NEXT: andl $-8, %esp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
; X86-NOSSE-NEXT: subl $8, %esp
|
||||||
; X86-NEXT: retl
|
; X86-NOSSE-NEXT: flds 8(%ebp)
|
||||||
|
; X86-NOSSE-NEXT: fistpll (%esp)
|
||||||
|
; X86-NOSSE-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-NOSSE-NEXT: movl %ebp, %esp
|
||||||
|
; X86-NOSSE-NEXT: popl %ebp
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-NOSSE-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE2-LABEL: testmsxs:
|
; X86-SSE2-LABEL: testmsxs:
|
||||||
; SSE2: # %bb.0: # %entry
|
; X86-SSE2: # %bb.0: # %entry
|
||||||
; SSE2-NEXT: pushl %eax
|
; X86-SSE2-NEXT: pushl %ebp
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 8
|
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
||||||
; SSE2-NEXT: movss %xmm0, (%esp)
|
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||||
; SSE2-NEXT: calll llrintf
|
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
||||||
; SSE2-NEXT: popl %ecx
|
; X86-SSE2-NEXT: andl $-8, %esp
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 4
|
; X86-SSE2-NEXT: subl $8, %esp
|
||||||
; SSE2-NEXT: retl
|
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-SSE2-NEXT: movss %xmm0, (%esp)
|
||||||
|
; X86-SSE2-NEXT: flds (%esp)
|
||||||
|
; X86-SSE2-NEXT: fistpll (%esp)
|
||||||
|
; X86-SSE2-NEXT: movl (%esp), %eax
|
||||||
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||||
|
; X86-SSE2-NEXT: popl %ebp
|
||||||
|
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-SSE2-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: testmsxs:
|
; X86-AVX-LABEL: testmsxs:
|
||||||
; X64: # %bb.0: # %entry
|
; X86-AVX: # %bb.0: # %entry
|
||||||
; X64-NEXT: jmp llrintf # TAILCALL
|
; X86-AVX-NEXT: pushl %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-AVX-NEXT: .cfi_offset %ebp, -8
|
||||||
|
; X86-AVX-NEXT: movl %esp, %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
|
||||||
|
; X86-AVX-NEXT: andl $-8, %esp
|
||||||
|
; X86-AVX-NEXT: subl $8, %esp
|
||||||
|
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
|
||||||
|
; X86-AVX-NEXT: flds (%esp)
|
||||||
|
; X86-AVX-NEXT: fistpll (%esp)
|
||||||
|
; X86-AVX-NEXT: movl (%esp), %eax
|
||||||
|
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-AVX-NEXT: movl %ebp, %esp
|
||||||
|
; X86-AVX-NEXT: popl %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-AVX-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-SSE-LABEL: testmsxs:
|
||||||
|
; X64-SSE: # %bb.0: # %entry
|
||||||
|
; X64-SSE-NEXT: cvtss2si %xmm0, %rax
|
||||||
|
; X64-SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; X64-AVX-LABEL: testmsxs:
|
||||||
|
; X64-AVX: # %bb.0: # %entry
|
||||||
|
; X64-AVX-NEXT: vcvtss2si %xmm0, %rax
|
||||||
|
; X64-AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.llrint.f32(float %x)
|
%0 = tail call i64 @llvm.llrint.f32(float %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
define i64 @testmsxd(double %x) {
|
define i64 @testmsxd(double %x) {
|
||||||
; X86-LABEL: testmsxd:
|
; X86-NOSSE-LABEL: testmsxd:
|
||||||
; X86: # %bb.0: # %entry
|
; X86-NOSSE: # %bb.0: # %entry
|
||||||
; X86-NEXT: subl $8, %esp
|
; X86-NOSSE-NEXT: pushl %ebp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||||
; X86-NEXT: fldl {{[0-9]+}}(%esp)
|
; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
|
||||||
; X86-NEXT: fstpl (%esp)
|
; X86-NOSSE-NEXT: movl %esp, %ebp
|
||||||
; X86-NEXT: calll llrint
|
; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
|
||||||
; X86-NEXT: addl $8, %esp
|
; X86-NOSSE-NEXT: andl $-8, %esp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
; X86-NOSSE-NEXT: subl $8, %esp
|
||||||
; X86-NEXT: retl
|
; X86-NOSSE-NEXT: fldl 8(%ebp)
|
||||||
|
; X86-NOSSE-NEXT: fistpll (%esp)
|
||||||
|
; X86-NOSSE-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-NOSSE-NEXT: movl %ebp, %esp
|
||||||
|
; X86-NOSSE-NEXT: popl %ebp
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-NOSSE-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE2-LABEL: testmsxd:
|
; X86-SSE2-LABEL: testmsxd:
|
||||||
; SSE2: # %bb.0: # %entry
|
; X86-SSE2: # %bb.0: # %entry
|
||||||
; SSE2-NEXT: subl $8, %esp
|
; X86-SSE2-NEXT: pushl %ebp
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 12
|
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
||||||
; SSE2-NEXT: movsd %xmm0, (%esp)
|
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||||
; SSE2-NEXT: calll llrint
|
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
||||||
; SSE2-NEXT: addl $8, %esp
|
; X86-SSE2-NEXT: andl $-8, %esp
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 4
|
; X86-SSE2-NEXT: subl $8, %esp
|
||||||
; SSE2-NEXT: retl
|
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
|
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||||
|
; X86-SSE2-NEXT: fldl (%esp)
|
||||||
|
; X86-SSE2-NEXT: fistpll (%esp)
|
||||||
|
; X86-SSE2-NEXT: movl (%esp), %eax
|
||||||
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||||
|
; X86-SSE2-NEXT: popl %ebp
|
||||||
|
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-SSE2-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: testmsxd:
|
; X86-AVX-LABEL: testmsxd:
|
||||||
; X64: # %bb.0: # %entry
|
; X86-AVX: # %bb.0: # %entry
|
||||||
; X64-NEXT: jmp llrint # TAILCALL
|
; X86-AVX-NEXT: pushl %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-AVX-NEXT: .cfi_offset %ebp, -8
|
||||||
|
; X86-AVX-NEXT: movl %esp, %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
|
||||||
|
; X86-AVX-NEXT: andl $-8, %esp
|
||||||
|
; X86-AVX-NEXT: subl $8, %esp
|
||||||
|
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
|
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||||
|
; X86-AVX-NEXT: fldl (%esp)
|
||||||
|
; X86-AVX-NEXT: fistpll (%esp)
|
||||||
|
; X86-AVX-NEXT: movl (%esp), %eax
|
||||||
|
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-AVX-NEXT: movl %ebp, %esp
|
||||||
|
; X86-AVX-NEXT: popl %ebp
|
||||||
|
; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
|
||||||
|
; X86-AVX-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-SSE-LABEL: testmsxd:
|
||||||
|
; X64-SSE: # %bb.0: # %entry
|
||||||
|
; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
|
||||||
|
; X64-SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; X64-AVX-LABEL: testmsxd:
|
||||||
|
; X64-AVX: # %bb.0: # %entry
|
||||||
|
; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
|
||||||
|
; X64-AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.llrint.f64(double %x)
|
%0 = tail call i64 @llvm.llrint.f64(double %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
@ -68,29 +156,28 @@ entry:
|
|||||||
define i64 @testmsll(x86_fp80 %x) {
|
define i64 @testmsll(x86_fp80 %x) {
|
||||||
; X86-LABEL: testmsll:
|
; X86-LABEL: testmsll:
|
||||||
; X86: # %bb.0: # %entry
|
; X86: # %bb.0: # %entry
|
||||||
; X86-NEXT: subl $12, %esp
|
; X86-NEXT: pushl %ebp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
; X86-NEXT: .cfi_offset %ebp, -8
|
||||||
; X86-NEXT: fstpt (%esp)
|
; X86-NEXT: movl %esp, %ebp
|
||||||
; X86-NEXT: calll llrintl
|
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||||
; X86-NEXT: addl $12, %esp
|
; X86-NEXT: andl $-8, %esp
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
; X86-NEXT: subl $8, %esp
|
||||||
|
; X86-NEXT: fldt 8(%ebp)
|
||||||
|
; X86-NEXT: fistpll (%esp)
|
||||||
|
; X86-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-NEXT: movl %ebp, %esp
|
||||||
|
; X86-NEXT: popl %ebp
|
||||||
|
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE2-LABEL: testmsll:
|
|
||||||
; SSE2: # %bb.0: # %entry
|
|
||||||
; SSE2-NEXT: subl $12, %esp
|
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
; SSE2-NEXT: fldt {{[0-9]+}}(%esp)
|
|
||||||
; SSE2-NEXT: fstpt (%esp)
|
|
||||||
; SSE2-NEXT: calll llrintl
|
|
||||||
; SSE2-NEXT: addl $12, %esp
|
|
||||||
; SSE2-NEXT: .cfi_def_cfa_offset 4
|
|
||||||
; SSE2-NEXT: retl
|
|
||||||
;
|
|
||||||
; X64-LABEL: testmsll:
|
; X64-LABEL: testmsll:
|
||||||
; X64: # %bb.0: # %entry
|
; X64: # %bb.0: # %entry
|
||||||
; X64-NEXT: jmp llrintl # TAILCALL
|
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||||
|
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||||
|
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||||
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
|
%0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
|
@ -1,30 +1,102 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86
|
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
|
||||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
|
||||||
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
|
||||||
|
|
||||||
define i32 @testmsws(float %x) {
|
define i32 @testmsws(float %x) {
|
||||||
; CHECK-LABEL: testmsws:
|
; X86-NOSSE-LABEL: testmsws:
|
||||||
; CHECK: # %bb.0: # %entry
|
; X86-NOSSE: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrintf # TAILCALL
|
; X86-NOSSE-NEXT: pushl %eax
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
|
||||||
|
; X86-NOSSE-NEXT: fistpl (%esp)
|
||||||
|
; X86-NOSSE-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NOSSE-NEXT: popl %ecx
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
|
||||||
|
; X86-NOSSE-NEXT: retl
|
||||||
|
;
|
||||||
|
; X86-SSE2-LABEL: testmsws:
|
||||||
|
; X86-SSE2: # %bb.0: # %entry
|
||||||
|
; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-SSE2-NEXT: retl
|
||||||
|
;
|
||||||
|
; X86-AVX-LABEL: testmsws:
|
||||||
|
; X86-AVX: # %bb.0: # %entry
|
||||||
|
; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-AVX-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-SSE-LABEL: testmsws:
|
||||||
|
; X64-SSE: # %bb.0: # %entry
|
||||||
|
; X64-SSE-NEXT: cvtss2si %xmm0, %eax
|
||||||
|
; X64-SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; X64-AVX-LABEL: testmsws:
|
||||||
|
; X64-AVX: # %bb.0: # %entry
|
||||||
|
; X64-AVX-NEXT: vcvtss2si %xmm0, %eax
|
||||||
|
; X64-AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
|
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
define i32 @testmswd(double %x) {
|
define i32 @testmswd(double %x) {
|
||||||
; CHECK-LABEL: testmswd:
|
; X86-NOSSE-LABEL: testmswd:
|
||||||
; CHECK: # %bb.0: # %entry
|
; X86-NOSSE: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrint # TAILCALL
|
; X86-NOSSE-NEXT: pushl %eax
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
|
||||||
|
; X86-NOSSE-NEXT: fistpl (%esp)
|
||||||
|
; X86-NOSSE-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NOSSE-NEXT: popl %ecx
|
||||||
|
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
|
||||||
|
; X86-NOSSE-NEXT: retl
|
||||||
|
;
|
||||||
|
; X86-SSE2-LABEL: testmswd:
|
||||||
|
; X86-SSE2: # %bb.0: # %entry
|
||||||
|
; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-SSE2-NEXT: retl
|
||||||
|
;
|
||||||
|
; X86-AVX-LABEL: testmswd:
|
||||||
|
; X86-AVX: # %bb.0: # %entry
|
||||||
|
; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-AVX-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-SSE-LABEL: testmswd:
|
||||||
|
; X64-SSE: # %bb.0: # %entry
|
||||||
|
; X64-SSE-NEXT: cvtsd2si %xmm0, %eax
|
||||||
|
; X64-SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; X64-AVX-LABEL: testmswd:
|
||||||
|
; X64-AVX: # %bb.0: # %entry
|
||||||
|
; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax
|
||||||
|
; X64-AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
|
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
define i32 @testmsll(x86_fp80 %x) {
|
define i32 @testmsll(x86_fp80 %x) {
|
||||||
; CHECK-LABEL: testmsll:
|
; X86-LABEL: testmsll:
|
||||||
; CHECK: # %bb.0: # %entry
|
; X86: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrintl # TAILCALL
|
; X86-NEXT: pushl %eax
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||||
|
; X86-NEXT: fistpl (%esp)
|
||||||
|
; X86-NEXT: movl (%esp), %eax
|
||||||
|
; X86-NEXT: popl %ecx
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||||
|
; X86-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-LABEL: testmsll:
|
||||||
|
; X64: # %bb.0: # %entry
|
||||||
|
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||||
|
; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
|
||||||
|
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||||
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
|
%0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
|
@ -1,19 +1,33 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
|
||||||
|
|
||||||
define i64 @testmsxs(float %x) {
|
define i64 @testmsxs(float %x) {
|
||||||
; CHECK-LABEL: testmsxs:
|
; SSE-LABEL: testmsxs:
|
||||||
; CHECK: # %bb.0: # %entry
|
; SSE: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrintf # TAILCALL
|
; SSE-NEXT: cvtss2si %xmm0, %rax
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX-LABEL: testmsxs:
|
||||||
|
; AVX: # %bb.0: # %entry
|
||||||
|
; AVX-NEXT: vcvtss2si %xmm0, %rax
|
||||||
|
; AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
|
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
define i64 @testmsxd(double %x) {
|
define i64 @testmsxd(double %x) {
|
||||||
; CHECK-LABEL: testmsxd:
|
; SSE-LABEL: testmsxd:
|
||||||
; CHECK: # %bb.0: # %entry
|
; SSE: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrint # TAILCALL
|
; SSE-NEXT: cvtsd2si %xmm0, %rax
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX-LABEL: testmsxd:
|
||||||
|
; AVX: # %bb.0: # %entry
|
||||||
|
; AVX-NEXT: vcvtsd2si %xmm0, %rax
|
||||||
|
; AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
|
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
@ -22,7 +36,10 @@ entry:
|
|||||||
define i64 @testmsll(x86_fp80 %x) {
|
define i64 @testmsll(x86_fp80 %x) {
|
||||||
; CHECK-LABEL: testmsll:
|
; CHECK-LABEL: testmsll:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: jmp lrintl # TAILCALL
|
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||||
|
; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||||
|
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||||
|
; CHECK-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
|
%0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user