1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[AArch64] Don't materialize 0 with "fmov h0, .." when FullFP16 is not supported

We were generating "fmov h0, wzr" instructions when FullFP16 is not enabled.
I've not added any tests, because the problem was visible in:
test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll,
which I had to change: I don't think Cyclone has FullFP16 enabled
by default, so it shouldn't be using this v8.2a instruction.

I've also removed these rdar tags, please shout if there are any objections.

Differential Revision: https://reviews.llvm.org/D43020

llvm-svn: 324581
This commit is contained in:
Sjoerd Meijer 2018-02-08 08:39:05 +00:00
parent f7686eb8f6
commit 58f2da5889
3 changed files with 11 additions and 7 deletions

View File

@ -4924,7 +4924,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
// FIXME: We should be able to handle f128 as well with a clever lowering.
if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
return true;
}

View File

@ -2717,7 +2717,7 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
Sched<[WriteF]>, Requires<[HasFullFP16]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,

View File

@ -1,12 +1,10 @@
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS
; rdar://11481771
; rdar://13713797
declare void @bar(half, float, double, <2 x double>)
declare void @bari(i32, i32)
declare void @barl(i64, i64)
@ -16,11 +14,14 @@ define void @t1() nounwind ssp {
entry:
; ALL-LABEL: t1:
; ALL-NOT: fmov
; CYCLONE: fmov h0, wzr
; ALL: ldr h0,{{.*}}
; CYCLONE: fmov s1, wzr
; CYCLONE: fmov d2, xzr
; CYCLONE: movi.16b v3, #0
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; CYCLONE-FULLFP16: fmov h0, wzr
; CYCLONE-FULLFP16: fmov s1, wzr
; CYCLONE-FULLFP16: fmov d2, xzr
; CYCLONE-FULLFP16: movi.16b v3, #0
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
@ -53,6 +54,8 @@ define void @t4() nounwind ssp {
; ALL-NOT: fmov
; CYCLONE: fmov s{{[0-3]+}}, wzr
; CYCLONE: fmov s{{[0-3]+}}, wzr
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind