1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] Add custom lowering for lrint/llrint to either cvtss2si/cvtsd2si or fist.

lrint/llrint are defined as rounding using the current rounding
mode. Numbers that can't be converted raise FE_INVALID and an
implementation defined value is returned. They may also write to
errno.

I believe this means we can use cvtss2si/cvtsd2si or fist to
convert as long as -fno-math-errno is passed on the command line.
Clang will leave them as libcalls if errno is enabled so they
won't become ISD::LRINT/LLRINT in SelectionDAG.

For 64-bit results on a 32-bit target we can't use cvtss2si/cvtsd2si
but we can use fist since it can write to a 64-bit memory location.
Though maybe we could consider using vcvtps2qq/vcvtpd2qq on avx512dq
targets?

gcc also does this optimization.

I think we might be able to do this with STRICT_LRINT/LLRINT as
well, but I've left that for future work.

Differential Revision: https://reviews.llvm.org/D73859
This commit is contained in:
Craig Topper 2020-02-04 15:25:16 -08:00
parent 65d778ed6e
commit ccc67b1eb8
8 changed files with 447 additions and 99 deletions

View File

@ -270,6 +270,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::LRINT, MVT::f32, Custom);
setOperationAction(ISD::LRINT, MVT::f64, Custom);
setOperationAction(ISD::LLRINT, MVT::f32, Custom);
setOperationAction(ISD::LLRINT, MVT::f64, Custom);
if (!Subtarget.is64Bit()) {
setOperationAction(ISD::LRINT, MVT::i64, Custom);
setOperationAction(ISD::LLRINT, MVT::i64, Custom);
}
}
// Handle address space casts between mixed sized pointers.
@ -663,8 +673,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f80, Expand);
setOperationAction(ISD::LROUND, MVT::f80, Expand);
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
setOperationAction(ISD::LRINT, MVT::f80, Expand);
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
setOperationAction(ISD::LRINT, MVT::f80, Custom);
setOperationAction(ISD::LLRINT, MVT::f80, Custom);
// Handle constrained floating-point operations of scalar.
setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
@ -20306,6 +20316,63 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// If the source is in an SSE register, the node is Legal.
if (isScalarFPTypeInSSEReg(SrcVT))
return Op;
return LRINT_LLRINTHelper(Op.getNode(), DAG);
}
SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
SelectionDAG &DAG) const {
EVT DstVT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
// f16 must be promoted before using the lowering in this routine.
// fp128 does not use this lowering.
return SDValue();
}
SDLoc DL(N);
SDValue Chain = DAG.getEntryNode();
bool UseSSE = isScalarFPTypeInSSEReg(SrcVT);
// If we're converting from SSE, the stack slot needs to hold both types.
// Otherwise it only needs to hold the DstVT.
EVT OtherVT = UseSSE ? SrcVT : DstVT;
SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
if (UseSSE) {
assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Chain, StackPtr };
Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
/*Align*/0, MachineMemOperand::MOLoad);
Chain = Src.getValue(1);
}
SDValue StoreOps[] = { Chain, Src, StackPtr };
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL,
DAG.getVTList(MVT::Other), StoreOps,
DstVT, MPI, /*Align*/0,
MachineMemOperand::MOStore);
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@ -28637,6 +28704,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::LRINT:
case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
@ -29191,6 +29260,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
case ISD::LRINT:
case ISD::LLRINT: {
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
Results.push_back(V);
return;
}
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:

View File

@ -1334,6 +1334,7 @@ namespace llvm {
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
SDValue &Chain) const;
SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@ -1357,6 +1358,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;

View File

@ -7233,6 +7233,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
X86FoldableSchedWrite sched,
string aliasStr> {
let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
} // Predicates = [HasAVX512]
}
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
lrint, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
llrint, WriteCvtSS2I,
"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
lrint, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
llrint, WriteCvtSD2I,
"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
}
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {

View File

@ -74,6 +74,11 @@ def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
(X86fist node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
(X86fist node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
@ -525,14 +530,20 @@ def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
[(X86fist32 RFP32:$src, addr:$op)]>;
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
[(X86fist64 RFP32:$src, addr:$op)]>;
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
[(X86fist32 RFP64:$src, addr:$op)]>;
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
[(X86fist64 RFP64:$src, addr:$op)]>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
[(X86fist32 RFP80:$src, addr:$op)]>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fist64 RFP80:$src, addr:$op)]>;
} // mayStore
} // SchedRW, Uses = [FPCW]
@ -791,9 +802,6 @@ def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
// Used to conv. between f80 and i64 for i64 atomic loads.
def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,

View File

@ -884,6 +884,23 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf6
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
@ -923,6 +940,12 @@ let Predicates = [UseAVX] in {
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
}
let isCodeGenOnly = 1 in {
@ -938,6 +961,20 @@ defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
@ -952,6 +989,16 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
let Predicates = [UseSSE1] in {
def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
}
let Predicates = [UseSSE2] in {
def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
}
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).

View File

@ -1,65 +1,153 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
define i64 @testmsxs(float %x) {
; X86-LABEL: testmsxs:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: flds {{[0-9]+}}(%esp)
; X86-NEXT: fstps (%esp)
; X86-NEXT: calll llrintf
; X86-NEXT: popl %ecx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X86-NOSSE-LABEL: testmsxs:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $8, %esp
; X86-NOSSE-NEXT: flds 8(%ebp)
; X86-NOSSE-NEXT: fistpll (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
; X86-NOSSE-NEXT: retl
;
; SSE2-LABEL: testmsxs:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pushl %eax
; SSE2-NEXT: .cfi_def_cfa_offset 8
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss %xmm0, (%esp)
; SSE2-NEXT: calll llrintf
; SSE2-NEXT: popl %ecx
; SSE2-NEXT: .cfi_def_cfa_offset 4
; SSE2-NEXT: retl
; X86-SSE2-LABEL: testmsxs:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE2-NEXT: movss %xmm0, (%esp)
; X86-SSE2-NEXT: flds (%esp)
; X86-SSE2-NEXT: fistpll (%esp)
; X86-SSE2-NEXT: movl (%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X64-LABEL: testmsxs:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp llrintf # TAILCALL
; X86-AVX-LABEL: testmsxs:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
; X86-AVX-NEXT: .cfi_offset %ebp, -8
; X86-AVX-NEXT: movl %esp, %ebp
; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
; X86-AVX-NEXT: andl $-8, %esp
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
; X86-AVX-NEXT: flds (%esp)
; X86-AVX-NEXT: fistpll (%esp)
; X86-AVX-NEXT: movl (%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmsxs:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtss2si %xmm0, %rax
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: testmsxs:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtss2si %xmm0, %rax
; X64-AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f32(float %x)
ret i64 %0
}
define i64 @testmsxd(double %x) {
; X86-LABEL: testmsxd:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NEXT: fstpl (%esp)
; X86-NEXT: calll llrint
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X86-NOSSE-LABEL: testmsxd:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: .cfi_offset %ebp, -8
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $8, %esp
; X86-NOSSE-NEXT: fldl 8(%ebp)
; X86-NOSSE-NEXT: fistpll (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4
; X86-NOSSE-NEXT: retl
;
; SSE2-LABEL: testmsxd:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: subl $8, %esp
; SSE2-NEXT: .cfi_def_cfa_offset 12
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movsd %xmm0, (%esp)
; SSE2-NEXT: calll llrint
; SSE2-NEXT: addl $8, %esp
; SSE2-NEXT: .cfi_def_cfa_offset 4
; SSE2-NEXT: retl
; X86-SSE2-LABEL: testmsxd:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $8, %esp
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
; X86-SSE2-NEXT: fldl (%esp)
; X86-SSE2-NEXT: fistpll (%esp)
; X86-SSE2-NEXT: movl (%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X64-LABEL: testmsxd:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp llrint # TAILCALL
; X86-AVX-LABEL: testmsxd:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
; X86-AVX-NEXT: .cfi_offset %ebp, -8
; X86-AVX-NEXT: movl %esp, %ebp
; X86-AVX-NEXT: .cfi_def_cfa_register %ebp
; X86-AVX-NEXT: andl $-8, %esp
; X86-AVX-NEXT: subl $8, %esp
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
; X86-AVX-NEXT: fldl (%esp)
; X86-AVX-NEXT: fistpll (%esp)
; X86-AVX-NEXT: movl (%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl %ebp, %esp
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: .cfi_def_cfa %esp, 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmsxd:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: testmsxd:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
; X64-AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f64(double %x)
ret i64 %0
@ -68,29 +156,28 @@ entry:
define i64 @testmsll(x86_fp80 %x) {
; X86-LABEL: testmsll:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll llrintl
; X86-NEXT: addl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: fistpll (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; SSE2-LABEL: testmsll:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: subl $12, %esp
; SSE2-NEXT: .cfi_def_cfa_offset 16
; SSE2-NEXT: fldt {{[0-9]+}}(%esp)
; SSE2-NEXT: fstpt (%esp)
; SSE2-NEXT: calll llrintl
; SSE2-NEXT: addl $12, %esp
; SSE2-NEXT: .cfi_def_cfa_offset 4
; SSE2-NEXT: retl
;
; X64-LABEL: testmsll:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp llrintl # TAILCALL
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
ret i64 %0

View File

@ -1,30 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
define i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrintf # TAILCALL
; X86-NOSSE-LABEL: testmsws:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %eax
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: popl %ecx
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmsws:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: testmsws:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmsws:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtss2si %xmm0, %eax
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: testmsws:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtss2si %xmm0, %eax
; X64-AVX-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
ret i32 %0
}
define i32 @testmswd(double %x) {
; CHECK-LABEL: testmswd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrint # TAILCALL
; X86-NOSSE-LABEL: testmswd:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %eax
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: popl %ecx
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
; X86-SSE2-LABEL: testmswd:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: testmswd:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: testmswd:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtsd2si %xmm0, %eax
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: testmswd:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax
; X64-AVX-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
ret i32 %0
}
define i32 @testmsll(x86_fp80 %x) {
; CHECK-LABEL: testmsll:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrintl # TAILCALL
; X86-LABEL: testmsll:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fistpl (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: popl %ecx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: testmsll:
; X64: # %bb.0: # %entry
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
entry:
%0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
ret i32 %0

View File

@ -1,19 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
define i64 @testmsxs(float %x) {
; CHECK-LABEL: testmsxs:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrintf # TAILCALL
; SSE-LABEL: testmsxs:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtss2si %xmm0, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: testmsxs:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtss2si %xmm0, %rax
; AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
ret i64 %0
}
define i64 @testmsxd(double %x) {
; CHECK-LABEL: testmsxd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrint # TAILCALL
; SSE-LABEL: testmsxd:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsd2si %xmm0, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: testmsxd:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsd2si %xmm0, %rax
; AVX-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
ret i64 %0
@ -22,7 +36,10 @@ entry:
define i64 @testmsll(x86_fp80 %x) {
; CHECK-LABEL: testmsll:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: jmp lrintl # TAILCALL
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: retq
entry:
%0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
ret i64 %0