mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
ARM: support direct f16 <-> f64 conversions
ARMv8 has instructions to handle it, otherwise a libcall is needed. llvm-svn: 213254
This commit is contained in:
parent
a1eab159d8
commit
48ae22e14a
@ -825,7 +825,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
||||||
}
|
}
|
||||||
// Special handling for half-precision FP.
|
|
||||||
|
// v8 adds f64 <-> f16 conversion. Before that it should be expanded.
|
||||||
|
if (!Subtarget->hasV8Ops()) {
|
||||||
|
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||||
|
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
|
||||||
if (!Subtarget->hasFP16()) {
|
if (!Subtarget->hasFP16()) {
|
||||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
||||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
||||||
|
@ -551,12 +551,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
|||||||
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
|
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
|
||||||
[/* For disassembly only; pattern left blank */]>;
|
[/* For disassembly only; pattern left blank */]>;
|
||||||
|
|
||||||
def : Pat<(fp_to_f16 SPR:$a),
|
|
||||||
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
|
|
||||||
|
|
||||||
def : Pat<(f16_to_fp GPR:$a),
|
|
||||||
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
||||||
|
|
||||||
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||||
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
|
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
|
||||||
[/* For disassembly only; pattern left blank */]>;
|
[/* For disassembly only; pattern left blank */]>;
|
||||||
@ -619,6 +613,19 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
|
|||||||
let Inst{5} = Dm{4};
|
let Inst{5} = Dm{4};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def : Pat<(fp_to_f16 SPR:$a),
|
||||||
|
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
|
||||||
|
|
||||||
|
def : Pat<(fp_to_f16 (f64 DPR:$a)),
|
||||||
|
(i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
|
||||||
|
|
||||||
|
def : Pat<(f16_to_fp GPR:$a),
|
||||||
|
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
||||||
|
|
||||||
|
def : Pat<(f64 (f16_to_fp GPR:$a)),
|
||||||
|
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
||||||
|
|
||||||
|
|
||||||
multiclass vcvt_inst<string opc, bits<2> rm> {
|
multiclass vcvt_inst<string opc, bits<2> rm> {
|
||||||
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
||||||
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
; RUN: llc < %s | FileCheck %s
|
; RUN: llc < %s | FileCheck %s
|
||||||
; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
|
; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
|
||||||
|
; RUN: llc -mtriple=armv8-eabi < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
|
||||||
target triple = "armv7-eabi"
|
target triple = "armv7-eabi"
|
||||||
|
|
||||||
@ -10,23 +11,61 @@ target triple = "armv7-eabi"
|
|||||||
define arm_aapcs_vfpcc void @foo() nounwind {
|
define arm_aapcs_vfpcc void @foo() nounwind {
|
||||||
; CHECK-LABEL: foo:
|
; CHECK-LABEL: foo:
|
||||||
; CHECK-FP16-LABEL: foo:
|
; CHECK-FP16-LABEL: foo:
|
||||||
|
; CHECK-ARMV8-LABEL: foo:
|
||||||
entry:
|
entry:
|
||||||
%0 = load i16* @x, align 2
|
%0 = load i16* @x, align 2
|
||||||
%1 = load i16* @y, align 2
|
%1 = load i16* @y, align 2
|
||||||
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
|
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
|
||||||
; CHECK: __gnu_h2f_ieee
|
; CHECK: __gnu_h2f_ieee
|
||||||
; CHECK-FP16: vcvtb.f32.f16
|
; CHECK-FP16: vcvtb.f32.f16
|
||||||
|
; CHECK-ARMv8: vcvtb.f32.f16
|
||||||
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
|
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
|
||||||
; CHECK: __gnu_h2f_ieee
|
; CHECK: __gnu_h2f_ieee
|
||||||
; CHECK-FP16: vcvtb.f32.f16
|
; CHECK-FP16: vcvtb.f32.f16
|
||||||
|
; CHECK-ARMV8: vcvtb.f32.f16
|
||||||
%4 = fadd float %2, %3
|
%4 = fadd float %2, %3
|
||||||
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
|
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
|
||||||
; CHECK: __gnu_f2h_ieee
|
; CHECK: __gnu_f2h_ieee
|
||||||
; CHECK-FP16: vcvtb.f16.f32
|
; CHECK-FP16: vcvtb.f16.f32
|
||||||
|
; CHECK-ARMV8: vcvtb.f16.f32
|
||||||
store i16 %5, i16* @x, align 2
|
store i16 %5, i16* @x, align 2
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc double @test_from_fp16(i16 %in) {
|
||||||
|
; CHECK-LABEL: test_from_fp16:
|
||||||
|
; CHECK-FP-LABEL: test_from_fp16:
|
||||||
|
; CHECK-ARMV8-LABEL: test_from_fp16:
|
||||||
|
%val = call double @llvm.convert.from.fp16.f64(i16 %in)
|
||||||
|
; CHECK: bl __gnu_h2f_ieee
|
||||||
|
; CHECK: vmov [[TMP:s[0-9]+]], r0
|
||||||
|
; CHECK: vcvt.f64.f32 d0, [[TMP]]
|
||||||
|
|
||||||
|
; CHECK-FP16: vmov [[TMP16:s[0-9]+]], r0
|
||||||
|
; CHECK-FP16: vcvtb.f32.f16 [[TMP32:s[0-9]+]], [[TMP16]]
|
||||||
|
; CHECK-FP16: vcvt.f64.f32 d0, [[TMP32]]
|
||||||
|
|
||||||
|
; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0
|
||||||
|
; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]]
|
||||||
|
ret double %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i16 @test_to_fp16(double %in) {
|
||||||
|
; CHECK-LABEL: test_to_fp16:
|
||||||
|
; CHECK-FP-LABEL: test_to_fp16:
|
||||||
|
; CHECK-ARMV8-LABEL: test_to_fp16:
|
||||||
|
%val = call i16 @llvm.convert.to.fp16.f64(double %in)
|
||||||
|
; CHECK: bl __truncdfhf2
|
||||||
|
|
||||||
|
; CHECK-FP16: bl __truncdfhf2
|
||||||
|
|
||||||
|
; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
|
||||||
|
; CHECK-ARMV8: vmov r0, [[TMP]]
|
||||||
|
ret i16 %val
|
||||||
|
}
|
||||||
|
|
||||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||||
|
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||||
|
|
||||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
||||||
|
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
|
||||||
|
Loading…
Reference in New Issue
Block a user