From 48ae22e14a13363aafaa02ebc80d27cafb91a575 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 17 Jul 2014 11:27:04 +0000 Subject: [PATCH] ARM: support direct f16 <-> f64 conversions ARMv8 has instructions to handle it, otherwise a libcall is needed. llvm-svn: 213254 --- lib/Target/ARM/ARMISelLowering.cpp | 9 ++++++- lib/Target/ARM/ARMInstrVFP.td | 19 ++++++++++----- test/CodeGen/ARM/fp16.ll | 39 ++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e74d0540eb0..27048f9a10d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -825,7 +825,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // v8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasV8Ops()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e709e59b3fa..55a6efcb4c0 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -551,12 +551,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : Pat<(f16_to_fp GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; @@ -619,6 +613,19 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, let Inst{5} = Dm{4}; } +def : Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + +def : Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + multiclass vcvt_inst rm> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll index 7a99c175751..d3f32556a09 100644 --- a/test/CodeGen/ARM/fp16.ll +++ b/test/CodeGen/ARM/fp16.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s | FileCheck %s ; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s +; RUN: llc -mtriple=armv8-eabi < %s | FileCheck --check-prefix=CHECK-ARMV8 %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "armv7-eabi" @@ -10,23 +11,61 @@ target triple = "armv7-eabi" define arm_aapcs_vfpcc void @foo() nounwind { ; CHECK-LABEL: foo: ; CHECK-FP16-LABEL: foo: +; CHECK-ARMV8-LABEL: foo: entry: %0 = load i16* @x, align 2 %1 = load i16* @y, align 2 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0) ; CHECK: __gnu_h2f_ieee ; CHECK-FP16: vcvtb.f32.f16 +; CHECK-ARMv8: vcvtb.f32.f16 %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1) ; CHECK: __gnu_h2f_ieee ; CHECK-FP16: vcvtb.f32.f16 +; CHECK-ARMV8: vcvtb.f32.f16 %4 = fadd float %2, %3 %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4) ; CHECK: __gnu_f2h_ieee ; CHECK-FP16: vcvtb.f16.f32 +; CHECK-ARMV8: vcvtb.f16.f32 store i16 %5, i16* @x, align 2 ret void } +define arm_aapcs_vfpcc double @test_from_fp16(i16 %in) { +; CHECK-LABEL: test_from_fp16: +; CHECK-FP-LABEL: test_from_fp16: +; CHECK-ARMV8-LABEL: test_from_fp16: + %val = call double @llvm.convert.from.fp16.f64(i16 %in) +; CHECK: bl __gnu_h2f_ieee +; CHECK: vmov [[TMP:s[0-9]+]], r0 +; CHECK: vcvt.f64.f32 d0, [[TMP]] + +; CHECK-FP16: vmov [[TMP16:s[0-9]+]], r0 +; CHECK-FP16: vcvtb.f32.f16 [[TMP32:s[0-9]+]], [[TMP16]] +; CHECK-FP16: vcvt.f64.f32 d0, [[TMP32]] + +; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0 +; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]] + ret double %val +} + +define arm_aapcs_vfpcc i16 @test_to_fp16(double %in) { +; CHECK-LABEL: test_to_fp16: +; CHECK-FP-LABEL: test_to_fp16: +; CHECK-ARMV8-LABEL: test_to_fp16: + %val = call i16 @llvm.convert.to.fp16.f64(double %in) +; CHECK: bl __truncdfhf2 + +; CHECK-FP16: bl __truncdfhf2 + +; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0 +; CHECK-ARMV8: vmov r0, [[TMP]] + ret i16 %val +} + declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone +declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone +declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone