mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[TargetLowering] expandFP_TO_UINT - avoid FPE due to out of range conversion (PR17686)
PR17686 demonstrates that for some targets FP exceptions can fire in cases where the FP_TO_UINT is expanded using a FP_TO_SINT instruction. The existing code converts both the inrange and outofrange cases using FP_TO_SINT and then selects the result, this patch changes this for 'strict' cases to pre-select the FP_TO_SINT input and the offset adjustment. The X87 cases don't need the strict flag but generates much nicer code with it.... Differential Revision: https://reviews.llvm.org/D53794 llvm-svn: 348251
This commit is contained in:
parent
5f2f923973
commit
5e25d966f6
@ -1746,6 +1746,16 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if it is more correct/profitable to use strict FP_TO_INT
|
||||
/// conversion operations - canonicalizing the FP source value instead of
|
||||
/// converting all cases and then selecting based on value.
|
||||
/// This may be true if the target throws exceptions for out of bounds
|
||||
/// conversions or has fast FP CMOV.
|
||||
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
|
||||
bool IsSigned) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// TargetLowering Configuration Methods - These methods should be invoked by
|
||||
// the derived class constructor to configure this object for the target.
|
||||
|
@ -4200,20 +4200,39 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Expand based on maximum range of FP_TO_SINT:
|
||||
// True = fp_to_sint(Src)
|
||||
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
|
||||
// Result = select (Src < 0x8000000000000000), True, False
|
||||
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
|
||||
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
|
||||
|
||||
SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
|
||||
// TODO: Should any fast-math-flags be set for the FSUB?
|
||||
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
|
||||
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
|
||||
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
|
||||
DAG.getConstant(SignMask, dl, DstVT));
|
||||
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
|
||||
bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
|
||||
if (Strict) {
|
||||
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
|
||||
// signmask then offset (the result of which should be fully representable).
|
||||
// Sel = Src < 0x8000000000000000
|
||||
// Val = select Sel, Src, Src - 0x8000000000000000
|
||||
// Ofs = select Sel, 0, 0x8000000000000000
|
||||
// Result = fp_to_sint(Val) ^ Ofs
|
||||
|
||||
// TODO: Should any fast-math-flags be set for the FSUB?
|
||||
SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
|
||||
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
|
||||
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
|
||||
DAG.getConstant(SignMask, dl, DstVT));
|
||||
Result = DAG.getNode(ISD::XOR, dl, DstVT,
|
||||
DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
|
||||
} else {
|
||||
// Expand based on maximum range of FP_TO_SINT:
|
||||
// True = fp_to_sint(Src)
|
||||
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
|
||||
// Result = select (Src < 0x8000000000000000), True, False
|
||||
|
||||
SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
|
||||
// TODO: Should any fast-math-flags be set for the FSUB?
|
||||
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
|
||||
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
|
||||
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
|
||||
DAG.getConstant(SignMask, dl, DstVT));
|
||||
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4812,6 +4812,12 @@ bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
|
||||
(1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
|
||||
bool IsSigned) const {
|
||||
// f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
|
||||
return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
unsigned Index) const {
|
||||
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
|
||||
|
@ -1047,6 +1047,9 @@ namespace llvm {
|
||||
|
||||
bool decomposeMulByConstant(EVT VT, SDValue C) const override;
|
||||
|
||||
bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
|
||||
bool IsSigned) const override;
|
||||
|
||||
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
|
||||
/// with this index.
|
||||
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
|
@ -483,29 +483,20 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
|
||||
; X64-X87-NEXT: flds {{.*}}(%rip)
|
||||
; X64-X87-NEXT: fld %st(1)
|
||||
; X64-X87-NEXT: fsub %st(1)
|
||||
; X64-X87-NEXT: xorl %eax, %eax
|
||||
; X64-X87-NEXT: fxch %st(1)
|
||||
; X64-X87-NEXT: fucompi %st(2)
|
||||
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; X64-X87-NEXT: fstp %st(1)
|
||||
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
|
||||
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fld %st(1)
|
||||
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fucompi %st(1)
|
||||
; X64-X87-NEXT: fstp %st(0)
|
||||
; X64-X87-NEXT: jbe .LBB10_1
|
||||
; X64-X87-NEXT: # %bb.2:
|
||||
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-X87-NEXT: retq
|
||||
; X64-X87-NEXT: .LBB10_1:
|
||||
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; X64-X87-NEXT: setbe %al
|
||||
; X64-X87-NEXT: shlq $63, %rax
|
||||
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-X87-NEXT: retq
|
||||
;
|
||||
@ -515,17 +506,14 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
|
||||
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
|
||||
; X64-SSSE3-NEXT: fld %st(1)
|
||||
; X64-SSSE3-NEXT: fsub %st(1)
|
||||
; X64-SSSE3-NEXT: xorl %eax, %eax
|
||||
; X64-SSSE3-NEXT: fxch %st(1)
|
||||
; X64-SSSE3-NEXT: fucompi %st(2)
|
||||
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; X64-SSSE3-NEXT: fstp %st(1)
|
||||
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; X64-SSSE3-NEXT: fld %st(1)
|
||||
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; X64-SSSE3-NEXT: fucompi %st(1)
|
||||
; X64-SSSE3-NEXT: fstp %st(0)
|
||||
; X64-SSSE3-NEXT: jbe .LBB10_1
|
||||
; X64-SSSE3-NEXT: # %bb.2:
|
||||
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
; X64-SSSE3-NEXT: .LBB10_1:
|
||||
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; X64-SSSE3-NEXT: setbe %al
|
||||
; X64-SSSE3-NEXT: shlq $63, %rax
|
||||
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
%1 = fptoui x86_fp80 %a0 to i64
|
||||
@ -577,29 +565,20 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
|
||||
; X64-X87-NEXT: flds {{.*}}(%rip)
|
||||
; X64-X87-NEXT: fld %st(1)
|
||||
; X64-X87-NEXT: fsub %st(1)
|
||||
; X64-X87-NEXT: xorl %eax, %eax
|
||||
; X64-X87-NEXT: fxch %st(1)
|
||||
; X64-X87-NEXT: fucompi %st(2)
|
||||
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; X64-X87-NEXT: fstp %st(1)
|
||||
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
|
||||
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fld %st(1)
|
||||
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-X87-NEXT: fucompi %st(1)
|
||||
; X64-X87-NEXT: fstp %st(0)
|
||||
; X64-X87-NEXT: jbe .LBB11_1
|
||||
; X64-X87-NEXT: # %bb.2:
|
||||
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-X87-NEXT: retq
|
||||
; X64-X87-NEXT: .LBB11_1:
|
||||
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; X64-X87-NEXT: setbe %al
|
||||
; X64-X87-NEXT: shlq $63, %rax
|
||||
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-X87-NEXT: retq
|
||||
;
|
||||
@ -609,17 +588,14 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
|
||||
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
|
||||
; X64-SSSE3-NEXT: fld %st(1)
|
||||
; X64-SSSE3-NEXT: fsub %st(1)
|
||||
; X64-SSSE3-NEXT: xorl %eax, %eax
|
||||
; X64-SSSE3-NEXT: fxch %st(1)
|
||||
; X64-SSSE3-NEXT: fucompi %st(2)
|
||||
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; X64-SSSE3-NEXT: fstp %st(1)
|
||||
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; X64-SSSE3-NEXT: fld %st(1)
|
||||
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; X64-SSSE3-NEXT: fucompi %st(1)
|
||||
; X64-SSSE3-NEXT: fstp %st(0)
|
||||
; X64-SSSE3-NEXT: jbe .LBB11_1
|
||||
; X64-SSSE3-NEXT: # %bb.2:
|
||||
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
; X64-SSSE3-NEXT: .LBB11_1:
|
||||
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; X64-SSSE3-NEXT: setbe %al
|
||||
; X64-SSSE3-NEXT: shlq $63, %rax
|
||||
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
%1 = load x86_fp80, x86_fp80 *%a0
|
||||
|
@ -1147,25 +1147,21 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
|
||||
;
|
||||
; SSE3_64_WIN-LABEL: x_to_u64:
|
||||
; SSE3_64_WIN: # %bb.0:
|
||||
; SSE3_64_WIN-NEXT: subq $16, %rsp
|
||||
; SSE3_64_WIN-NEXT: pushq %rax
|
||||
; SSE3_64_WIN-NEXT: fldt (%rcx)
|
||||
; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip)
|
||||
; SSE3_64_WIN-NEXT: fld %st(1)
|
||||
; SSE3_64_WIN-NEXT: fsub %st(1)
|
||||
; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp)
|
||||
; SSE3_64_WIN-NEXT: fld %st(1)
|
||||
; SSE3_64_WIN-NEXT: xorl %eax, %eax
|
||||
; SSE3_64_WIN-NEXT: fxch %st(1)
|
||||
; SSE3_64_WIN-NEXT: fucompi %st(2)
|
||||
; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; SSE3_64_WIN-NEXT: fstp %st(1)
|
||||
; SSE3_64_WIN-NEXT: fisttpll (%rsp)
|
||||
; SSE3_64_WIN-NEXT: fucompi %st(1)
|
||||
; SSE3_64_WIN-NEXT: fstp %st(0)
|
||||
; SSE3_64_WIN-NEXT: jbe .LBB4_1
|
||||
; SSE3_64_WIN-NEXT: # %bb.2:
|
||||
; SSE3_64_WIN-NEXT: movq (%rsp), %rax
|
||||
; SSE3_64_WIN-NEXT: addq $16, %rsp
|
||||
; SSE3_64_WIN-NEXT: retq
|
||||
; SSE3_64_WIN-NEXT: .LBB4_1:
|
||||
; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE3_64_WIN-NEXT: addq $16, %rsp
|
||||
; SSE3_64_WIN-NEXT: setbe %al
|
||||
; SSE3_64_WIN-NEXT: shlq $63, %rax
|
||||
; SSE3_64_WIN-NEXT: xorq (%rsp), %rax
|
||||
; SSE3_64_WIN-NEXT: popq %rcx
|
||||
; SSE3_64_WIN-NEXT: retq
|
||||
;
|
||||
; SSE3_64_LIN-LABEL: x_to_u64:
|
||||
@ -1174,17 +1170,14 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
|
||||
; SSE3_64_LIN-NEXT: flds {{.*}}(%rip)
|
||||
; SSE3_64_LIN-NEXT: fld %st(1)
|
||||
; SSE3_64_LIN-NEXT: fsub %st(1)
|
||||
; SSE3_64_LIN-NEXT: xorl %eax, %eax
|
||||
; SSE3_64_LIN-NEXT: fxch %st(1)
|
||||
; SSE3_64_LIN-NEXT: fucompi %st(2)
|
||||
; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; SSE3_64_LIN-NEXT: fstp %st(1)
|
||||
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; SSE3_64_LIN-NEXT: fld %st(1)
|
||||
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
||||
; SSE3_64_LIN-NEXT: fucompi %st(1)
|
||||
; SSE3_64_LIN-NEXT: fstp %st(0)
|
||||
; SSE3_64_LIN-NEXT: jbe .LBB4_1
|
||||
; SSE3_64_LIN-NEXT: # %bb.2:
|
||||
; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE3_64_LIN-NEXT: retq
|
||||
; SSE3_64_LIN-NEXT: .LBB4_1:
|
||||
; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; SSE3_64_LIN-NEXT: setbe %al
|
||||
; SSE3_64_LIN-NEXT: shlq $63, %rax
|
||||
; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE3_64_LIN-NEXT: retq
|
||||
;
|
||||
@ -1246,37 +1239,27 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
|
||||
;
|
||||
; SSE2_64_WIN-LABEL: x_to_u64:
|
||||
; SSE2_64_WIN: # %bb.0:
|
||||
; SSE2_64_WIN-NEXT: subq $24, %rsp
|
||||
; SSE2_64_WIN-NEXT: subq $16, %rsp
|
||||
; SSE2_64_WIN-NEXT: fldt (%rcx)
|
||||
; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip)
|
||||
; SSE2_64_WIN-NEXT: fld %st(1)
|
||||
; SSE2_64_WIN-NEXT: fsub %st(1)
|
||||
; SSE2_64_WIN-NEXT: xorl %eax, %eax
|
||||
; SSE2_64_WIN-NEXT: fxch %st(1)
|
||||
; SSE2_64_WIN-NEXT: fucompi %st(2)
|
||||
; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; SSE2_64_WIN-NEXT: fstp %st(1)
|
||||
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fld %st(1)
|
||||
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
|
||||
; SSE2_64_WIN-NEXT: fucompi %st(1)
|
||||
; SSE2_64_WIN-NEXT: fstp %st(0)
|
||||
; SSE2_64_WIN-NEXT: jbe .LBB4_1
|
||||
; SSE2_64_WIN-NEXT: # %bb.2:
|
||||
; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE2_64_WIN-NEXT: addq $24, %rsp
|
||||
; SSE2_64_WIN-NEXT: retq
|
||||
; SSE2_64_WIN-NEXT: .LBB4_1:
|
||||
; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; SSE2_64_WIN-NEXT: setbe %al
|
||||
; SSE2_64_WIN-NEXT: shlq $63, %rax
|
||||
; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE2_64_WIN-NEXT: addq $24, %rsp
|
||||
; SSE2_64_WIN-NEXT: addq $16, %rsp
|
||||
; SSE2_64_WIN-NEXT: retq
|
||||
;
|
||||
; SSE2_64_LIN-LABEL: x_to_u64:
|
||||
@ -1285,29 +1268,20 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
|
||||
; SSE2_64_LIN-NEXT: flds {{.*}}(%rip)
|
||||
; SSE2_64_LIN-NEXT: fld %st(1)
|
||||
; SSE2_64_LIN-NEXT: fsub %st(1)
|
||||
; SSE2_64_LIN-NEXT: xorl %eax, %eax
|
||||
; SSE2_64_LIN-NEXT: fxch %st(1)
|
||||
; SSE2_64_LIN-NEXT: fucompi %st(2)
|
||||
; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
|
||||
; SSE2_64_LIN-NEXT: fstp %st(1)
|
||||
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
|
||||
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
||||
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fld %st(1)
|
||||
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; SSE2_64_LIN-NEXT: fucompi %st(1)
|
||||
; SSE2_64_LIN-NEXT: fstp %st(0)
|
||||
; SSE2_64_LIN-NEXT: jbe .LBB4_1
|
||||
; SSE2_64_LIN-NEXT: # %bb.2:
|
||||
; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE2_64_LIN-NEXT: retq
|
||||
; SSE2_64_LIN-NEXT: .LBB4_1:
|
||||
; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
; SSE2_64_LIN-NEXT: setbe %al
|
||||
; SSE2_64_LIN-NEXT: shlq $63, %rax
|
||||
; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE2_64_LIN-NEXT: retq
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user