1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[ARM] FP16 VSEL codegen

This is a follow up of rL327695 to instruction select more variants of VSELGT
and VSELGE, for which it is necessary to custom lower SELECT.

More work is required in this area, which will be addressed soon:
- more variants need to be regression tested, but this depends on the next point.
- first LowerConstantFP need to be adjusted for fp16 values.

Differential Revision: https://reviews.llvm.org/D45205

llvm-svn: 329788
This commit is contained in:
Sjoerd Meijer 2018-04-11 09:28:04 +00:00
parent 512991d3bb
commit 10e7e8538f
2 changed files with 152 additions and 16 deletions

View File

@ -1047,16 +1047,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f16, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SETCC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
}
// Thumb-1 cannot currently select ARMISD::SUBE.
if (!Subtarget->isThumb1Only())
@ -1064,7 +1067,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
if (Subtarget->hasFullFP16())
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
@ -4522,7 +4526,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Normalize the fp compare. If RHS is zero we keep it there so we match
// CMPFPw0 instead of CMPFP.
if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
(TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) {
(TrueVal.getValueType() == MVT::f16 ||
TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);

View File

@ -4,11 +4,11 @@
; SOFTFP:
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
; Test fast-isel
@ -703,37 +703,167 @@ define half @select_cc1() {
ret half %2
; CHECK-LABEL: select_cc1:
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0
; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32: it eq
; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
}
; FIXME: more tests need to be added for VSELGE and VSELGT.
; That is, more combinations of immediate operands that can or can't
; be encoded as an FP16 immediate need to be added here.
;
; 36. VSELGE
define half @select_cc2() {
define half @select_cc_ge1() {
%1 = fcmp nsz oge half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2
; CHECK-LABEL: select_cc2:
; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}}
; CHECK-LABEL: select_cc_ge1:
; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32-NEXT: it ge
; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
}
;
; FIXME: add fcmp ole, ult here.
;
define half @select_cc_ge3() {
%1 = fcmp nsz ugt half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2
; CHECK-LABEL: select_cc_ge3:
; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32-NEXT: it hi
; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
}
; 37. VSELGT
define half @select_cc3() {
define half @select_cc_gt1() {
%1 = fcmp nsz ogt half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2
; CHECK-LABEL: select_cc3:
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-LABEL: select_cc_gt1:
; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32-NEXT: it gt
; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
}
; 38. VSELVS
define half @select_cc4() {
%1 = fcmp nsz ueq half undef, 0xH0001
define half @select_cc_gt2() {
%1 = fcmp nsz uge half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2
; CHECK-LABEL: select_cc_gt2:
; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32-NEXT: it pl
; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
}
;
; FIXME: add fcmp ule, olt here.
;
; 38. VSELVS
define float @select_cc4(float %a.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
%2 = fcmp nsz ueq half %1, 0xH0001
%3 = select i1 %2, half 0xHC000, half 0xH0002
%4 = bitcast half %3 to i16
%tmp4.0.insert.ext = zext i16 %4 to i32
%5 = bitcast i32 %tmp4.0.insert.ext to float
ret float %5
; CHECK-LABEL: select_cc4:
; CHECK-HARDFP-FULLFP16: vselvs.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32: it eq
; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
; CHECK-SOFTFP-FP16-T32: it vs
; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
}
; 39. VSQRT - TODO