mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[AArch64] Optimize floating point materialization
This patch changes isFPImmLegal to return if the value can be enconded as the immediate operand of a logical instruction besides checking if for immediate field for fmov. This optimizes some floating point materization, inclusive values used on isinf lowering. Reviewed By: rengolin, efriedma, evandro Differential Revision: https://reviews.llvm.org/D57044 llvm-svn: 352866
This commit is contained in:
parent
a17796bd63
commit
36b7b3c0fa
@ -405,10 +405,9 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
|
||||
bool Is64Bit = (VT == MVT::f64);
|
||||
// This checks to see if we can use FMOV instructions to materialize
|
||||
// a constant, otherwise we have to materialize via the constant pool.
|
||||
if (TLI.isFPImmLegal(Val, VT)) {
|
||||
int Imm =
|
||||
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
|
||||
assert((Imm != -1) && "Cannot encode floating-point constant.");
|
||||
int Imm =
|
||||
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
|
||||
if (Imm != -1) {
|
||||
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
|
||||
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
|
||||
}
|
||||
|
@ -5424,34 +5424,30 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
||||
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
|
||||
// FIXME: We should be able to handle f128 as well with a clever lowering.
|
||||
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
|
||||
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
|
||||
LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsLegal = false;
|
||||
SmallString<128> ImmStrVal;
|
||||
Imm.toString(ImmStrVal);
|
||||
|
||||
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
|
||||
// 16-bit case when target has full fp16 support.
|
||||
// FIXME: We should be able to handle f128 as well with a clever lowering.
|
||||
const APInt ImmInt = Imm.bitcastToAPInt();
|
||||
if (VT == MVT::f64)
|
||||
IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
|
||||
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
|
||||
else if (VT == MVT::f32)
|
||||
IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
|
||||
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
|
||||
else if (VT == MVT::f16 && Subtarget->hasFullFP16())
|
||||
IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
|
||||
IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
|
||||
// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
|
||||
// generate that fmov.
|
||||
|
||||
if (IsLegal) {
|
||||
LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
|
||||
<< " imm value: " << ImmStrVal << "\n");
|
||||
return true;
|
||||
}
|
||||
// If we can not materialize in immediate field for fmov, check if the
|
||||
// value can be encoded as the immediate operand of a logical instruction.
|
||||
// The immediate value will be created with either MOVZ, MOVN, or ORR.
|
||||
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32))
|
||||
IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(),
|
||||
VT.getSizeInBits());
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
|
||||
<< " imm value: " << ImmStrVal << "\n");
|
||||
return false;
|
||||
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
|
||||
<< " imm value: "; Imm.dump(););
|
||||
return IsLegal;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -22,11 +22,11 @@ define double @not_fabs(double %x) #0 {
|
||||
define float @still_not_fabs(float %x) #0 {
|
||||
; CHECK-LABEL: still_not_fabs:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_0
|
||||
; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: fneg s2, s0
|
||||
; CHECK-NEXT: fcmp s0, s1
|
||||
; CHECK-NEXT: fcsel s0, s0, s2, ge
|
||||
; CHECK-NEXT: orr w8, wzr, #0x80000000
|
||||
; CHECK-NEXT: fmov s2, w8
|
||||
; CHECK-NEXT: fneg s1, s0
|
||||
; CHECK-NEXT: fcmp s0, s2
|
||||
; CHECK-NEXT: fcsel s0, s0, s1, ge
|
||||
; CHECK-NEXT: ret
|
||||
%cmp = fcmp nnan oge float %x, -0.0
|
||||
%sub = fsub nnan float -0.0, %x
|
||||
|
@ -132,13 +132,13 @@ define double @test7(double %a, double %b) nounwind {
|
||||
define float @fadd_const_multiuse_fmf(float %x) {
|
||||
; CHECK-LABEL: fadd_const_multiuse_fmf:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI10_0
|
||||
; CHECK-NEXT: adrp x9, .LCPI10_1
|
||||
; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI10_0]
|
||||
; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI10_1]
|
||||
; CHECK-NEXT: fadd s1, s0, s1
|
||||
; CHECK-NEXT: fadd s0, s0, s2
|
||||
; CHECK-NEXT: fadd s0, s1, s0
|
||||
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
|
||||
; CHECK-DAG: mov [[W42:w[0-9]+]], #1109917696
|
||||
; CHECK-DAG: fmov [[FP59:s[0-9]+]], [[W59]]
|
||||
; CHECK-DAG: fmov [[FP42:s[0-9]+]], [[W42]]
|
||||
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP42]]
|
||||
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
|
||||
; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret
|
||||
%a1 = fadd float %x, 42.0
|
||||
%a2 = fadd nsz reassoc float %a1, 17.0
|
||||
@ -153,13 +153,13 @@ define float @fadd_const_multiuse_fmf(float %x) {
|
||||
define float @fadd_const_multiuse_attr(float %x) #0 {
|
||||
; CHECK-LABEL: fadd_const_multiuse_attr:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x9, .LCPI11_1
|
||||
; CHECK-NEXT: adrp x8, .LCPI11_0
|
||||
; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI11_1]
|
||||
; CHECK-NEXT: ldr s2, [x8, :lo12:.LCPI11_0]
|
||||
; CHECK-NEXT: fadd s1, s0, s1
|
||||
; CHECK-NEXT: fadd s1, s2, s1
|
||||
; CHECK-NEXT: fadd s0, s0, s1
|
||||
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
|
||||
; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696
|
||||
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
|
||||
; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]]
|
||||
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]]
|
||||
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]]
|
||||
; CHECK-NEXT: fadd s0, s0, [[TMP2]]
|
||||
; CHECK-NEXT: ret
|
||||
%a1 = fadd float %x, 42.0
|
||||
%a2 = fadd float %a1, 17.0
|
||||
|
@ -18,8 +18,10 @@ define void @check_float() {
|
||||
|
||||
%newval2 = fadd float %val, 128.0
|
||||
store volatile float %newval2, float* @varf32
|
||||
; CHECK-DAG: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI0_0
|
||||
; TINY-DAG: ldr {{s[0-9]+}}, .LCPI0_0
|
||||
; CHECK-DAG: mov [[W128:w[0-9]+]], #1124073472
|
||||
; CHECK-DAG: fmov {{s[0-9]+}}, [[W128]]
|
||||
; TINY-DAG: mov [[W128:w[0-9]+]], #1124073472
|
||||
; TINY-DAG: fmov {{s[0-9]+}}, [[W128]]
|
||||
|
||||
; CHECK: ret
|
||||
; TINY: ret
|
||||
@ -38,8 +40,10 @@ define void @check_double() {
|
||||
|
||||
%newval2 = fadd double %val, 128.0
|
||||
store volatile double %newval2, double* @varf64
|
||||
; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI1_0
|
||||
; TINY-DAG: ldr {{d[0-9]+}}, .LCPI1_0
|
||||
; CHECK-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
|
||||
; CHECK-DAG: fmov {{d[0-9]+}}, [[X128]]
|
||||
; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
|
||||
; TINY-DAG: fmov {{d[0-9]+}}, [[X128]]
|
||||
|
||||
; CHECK: ret
|
||||
; TINY: ret
|
||||
|
62
test/CodeGen/AArch64/isinf.ll
Normal file
62
test/CodeGen/AArch64/isinf.ll
Normal file
@ -0,0 +1,62 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 < %s -o -| FileCheck %s
|
||||
|
||||
declare half @llvm.fabs.f16(half)
|
||||
declare float @llvm.fabs.f32(float)
|
||||
declare double @llvm.fabs.f64(double)
|
||||
declare fp128 @llvm.fabs.f128(fp128)
|
||||
|
||||
; INFINITY requires loading the constant for _Float16
|
||||
define i32 @replace_isinf_call_f16(half %x) {
|
||||
; CHECK-LABEL: replace_isinf_call_f16:
|
||||
; CHECK: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
|
||||
; CHECK: ldr [[INFINITY:h[0-9]+]], {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
|
||||
; CHECK-NEXT: fabs [[ABS:h[0-9]+]], h0
|
||||
; CHECK-NEXT: fcmp [[ABS]], [[INFINITY]]
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%abs = tail call half @llvm.fabs.f16(half %x)
|
||||
%cmpinf = fcmp oeq half %abs, 0xH7C00
|
||||
%ret = zext i1 %cmpinf to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Check if INFINITY for float is materialized
|
||||
define i32 @replace_isinf_call_f32(float %x) {
|
||||
; CHECK-LABEL: replace_isinf_call_f32:
|
||||
; CHECK: orr [[INFSCALARREG:w[0-9]+]], wzr, #0x7f800000
|
||||
; CHECK-NEXT: fabs [[ABS:s[0-9]+]], s0
|
||||
; CHECK-NEXT: fmov [[INFREG:s[0-9]+]], [[INFSCALARREG]]
|
||||
; CHECK-NEXT: fcmp [[ABS]], [[INFREG]]
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%abs = tail call float @llvm.fabs.f32(float %x)
|
||||
%cmpinf = fcmp oeq float %abs, 0x7FF0000000000000
|
||||
%ret = zext i1 %cmpinf to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Check if INFINITY for double is materialized
|
||||
define i32 @replace_isinf_call_f64(double %x) {
|
||||
; CHECK-LABEL: replace_isinf_call_f64:
|
||||
; CHECK: orr [[INFSCALARREG:x[0-9]+]], xzr, #0x7ff0000000000000
|
||||
; CHECK-NEXT: fabs [[ABS:d[0-9]+]], d0
|
||||
; CHECK-NEXT: fmov [[INFREG:d[0-9]+]], [[INFSCALARREG]]
|
||||
; CHECK-NEXT: fcmp [[ABS]], [[INFREG]]
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%abs = tail call double @llvm.fabs.f64(double %x)
|
||||
%cmpinf = fcmp oeq double %abs, 0x7FF0000000000000
|
||||
%ret = zext i1 %cmpinf to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; For long double it still requires loading the constant.
|
||||
define i32 @replace_isinf_call_f128(fp128 %x) {
|
||||
; CHECK-LABEL: replace_isinf_call_f128:
|
||||
; CHECK: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
|
||||
; CHECK: ldr q1, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
|
||||
; CHECK: bl __eqtf2
|
||||
; CHECK: cmp w0, #0
|
||||
; CHECK: cset w0, eq
|
||||
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
|
||||
%cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
|
||||
%ret = zext i1 %cmpinf to i32
|
||||
ret i32 %ret
|
||||
}
|
@ -28,13 +28,13 @@ define float @fmaxnm(i32 %i1, i32 %i2) #0 {
|
||||
define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 {
|
||||
; CHECK-LABEL: not_fmaxnm_maybe_nan:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_0
|
||||
; CHECK-NEXT: ldr s0, [x8, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: ucvtf s1, w0
|
||||
; CHECK-NEXT: ucvtf s2, w1
|
||||
; CHECK-NEXT: fmov s3, #17.00000000
|
||||
; CHECK-NEXT: fmul s0, s1, s0
|
||||
; CHECK-NEXT: fadd s1, s2, s3
|
||||
; CHECK-NEXT: orr w8, wzr, #0xff800000
|
||||
; CHECK-NEXT: ucvtf s0, w0
|
||||
; CHECK-NEXT: ucvtf s1, w1
|
||||
; CHECK-NEXT: fmov s2, #17.00000000
|
||||
; CHECK-NEXT: fmov s3, w8
|
||||
; CHECK-NEXT: fmul s0, s0, s3
|
||||
; CHECK-NEXT: fadd s1, s1, s2
|
||||
; CHECK-NEXT: fcmp s0, s1
|
||||
; CHECK-NEXT: fcsel s0, s0, s1, pl
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -13,18 +13,16 @@ define void @floating_lits() {
|
||||
|
||||
%floatval = load float, float* @varfloat
|
||||
%newfloat = fadd float %floatval, 128.0
|
||||
; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
|
||||
; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
|
||||
; CHECK: mov [[W128:w[0-9]+]], #1124073472
|
||||
; CHECK: fmov [[LIT128:s[0-9]+]], [[W128]]
|
||||
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
|
||||
|
||||
; CHECK-TINY: ldr [[LIT128:s[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
|
||||
; CHECK-TINY: mov [[W128:w[0-9]+]], #1124073472
|
||||
; CHECK-TINE: fmov [[LIT128:s[0-9]+]], [[W128]]
|
||||
; CHECK-NOFP-TINY-NOT: ldr {{s[0-9]+}},
|
||||
|
||||
; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
|
||||
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
|
||||
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
|
||||
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]]
|
||||
; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
|
||||
; CHECK-LARGE: mov [[W128:w[0-9]+]], #1124073472
|
||||
; CHECK-LARGE: fmov [[LIT128:s[0-9]+]], [[W128]]
|
||||
; CHECK-LARGE: fadd
|
||||
; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
|
||||
; CHECK-NOFP-LARGE-NOT: fadd
|
||||
|
@ -2,22 +2,22 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s
|
||||
|
||||
define double @double() {
|
||||
ret double 0x0000000000800000
|
||||
ret double 0x0000000000800001
|
||||
}
|
||||
; CHECK: .globl __real@0000000000800000
|
||||
; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800000
|
||||
; CHECK: .globl __real@0000000000800001
|
||||
; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800001
|
||||
; CHECK-NEXT: .p2align 3
|
||||
; CHECK-NEXT: __real@0000000000800000:
|
||||
; CHECK-NEXT: .xword 8388608
|
||||
; CHECK-NEXT: __real@0000000000800001:
|
||||
; CHECK-NEXT: .xword 8388609
|
||||
; CHECK: double:
|
||||
; CHECK: adrp x8, __real@0000000000800000
|
||||
; CHECK-NEXT: ldr d0, [x8, __real@0000000000800000]
|
||||
; CHECK: adrp x8, __real@0000000000800001
|
||||
; CHECK-NEXT: ldr d0, [x8, __real@0000000000800001]
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
; MINGW: .section .rdata,"dr"
|
||||
; MINGW-NEXT: .p2align 3
|
||||
; MINGW-NEXT: [[LABEL:\.LC.*]]:
|
||||
; MINGW-NEXT: .xword 8388608
|
||||
; MINGW-NEXT: .xword 8388609
|
||||
; MINGW: double:
|
||||
; MINGW: adrp x8, [[LABEL]]
|
||||
; MINGW-NEXT: ldr d0, [x8, [[LABEL]]]
|
||||
|
Loading…
Reference in New Issue
Block a user