mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[ARM] Support for v4f16 and v8f16 vectors
This is the groundwork for adding the Armv8.2-A FP16 vector intrinsics, which uses v4f16 and v8f16 vector operands and return values. All the moving parts are tested with two intrinsics, a 1-operand v8f16 and a 2-operand v4f16 intrinsic. In a follow-up patch the rest of the intrinsics and tests will be added. Differential Revision: https://reviews.llvm.org/D44538 llvm-svn: 327839
This commit is contained in:
parent
4898f781f7
commit
4c4a37be92
@ -217,12 +217,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
|
||||
|
||||
break;
|
||||
}
|
||||
case MVT::f16:
|
||||
case MVT::f32:
|
||||
RegList = SRegList;
|
||||
break;
|
||||
case MVT::v4f16:
|
||||
case MVT::f64:
|
||||
RegList = DRegList;
|
||||
break;
|
||||
case MVT::v8f16:
|
||||
case MVT::v2f64:
|
||||
RegList = QRegList;
|
||||
break;
|
||||
|
@ -160,8 +160,8 @@ def CC_ARM_AAPCS : CallingConv<[
|
||||
CCIfNest<CCAssignToReg<[R12]>>,
|
||||
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
|
||||
@ -176,8 +176,8 @@ def CC_ARM_AAPCS : CallingConv<[
|
||||
|
||||
def RetCC_ARM_AAPCS : CallingConv<[
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
|
||||
@ -201,8 +201,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
|
||||
CCIfByVal<CCPassByVal<4, 4>>,
|
||||
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
|
||||
@ -222,8 +222,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
|
||||
|
||||
def RetCC_ARM_AAPCS_VFP : CallingConv<[
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
|
||||
|
@ -1893,12 +1893,14 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
default: llvm_unreachable("unhandled vst type");
|
||||
// Double-register operations:
|
||||
case MVT::v8i8: OpcodeIndex = 0; break;
|
||||
case MVT::v4f16:
|
||||
case MVT::v4i16: OpcodeIndex = 1; break;
|
||||
case MVT::v2f32:
|
||||
case MVT::v2i32: OpcodeIndex = 2; break;
|
||||
case MVT::v1i64: OpcodeIndex = 3; break;
|
||||
// Quad-register operations:
|
||||
case MVT::v16i8: OpcodeIndex = 0; break;
|
||||
case MVT::v8f16:
|
||||
case MVT::v8i16: OpcodeIndex = 1; break;
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32: OpcodeIndex = 2; break;
|
||||
|
@ -565,6 +565,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
addQRTypeForNEON(MVT::v4i32);
|
||||
addQRTypeForNEON(MVT::v2i64);
|
||||
|
||||
if (Subtarget->hasFullFP16()) {
|
||||
addQRTypeForNEON(MVT::v8f16);
|
||||
addDRTypeForNEON(MVT::v4f16);
|
||||
}
|
||||
|
||||
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
|
||||
// neither Neon nor VFP support any arithmetic operations on it.
|
||||
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
|
||||
@ -3727,9 +3732,9 @@ SDValue ARMTargetLowering::LowerFormalArguments(
|
||||
RC = &ARM::HPRRegClass;
|
||||
else if (RegVT == MVT::f32)
|
||||
RC = &ARM::SPRRegClass;
|
||||
else if (RegVT == MVT::f64)
|
||||
else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
|
||||
RC = &ARM::DPRRegClass;
|
||||
else if (RegVT == MVT::v2f64)
|
||||
else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
|
||||
RC = &ARM::QPRRegClass;
|
||||
else if (RegVT == MVT::i32)
|
||||
RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
|
||||
|
@ -6971,9 +6971,11 @@ def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
}
|
||||
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
@ -7002,6 +7004,7 @@ let Predicates = [IsLE] in {
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
@ -7019,6 +7022,7 @@ def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
}
|
||||
@ -7044,6 +7048,7 @@ let Predicates = [IsBE] in {
|
||||
def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
@ -7065,6 +7070,7 @@ let Predicates = [IsBE] in {
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
@ -7073,10 +7079,12 @@ let Predicates = [IsBE] in {
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
}
|
||||
|
39
test/CodeGen/ARM/fp16-intrinsic-vector-1op.ll
Normal file
39
test/CodeGen/ARM/fp16-intrinsic-vector-1op.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+v8.2a,+fullfp16,+neon -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-HARD
|
||||
; RUN: llc < %s -mtriple=armeb-none-eabi -mattr=+v8.2a,+fullfp16,+neon -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-HARD-BE
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+v8.2a,+fullfp16,+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP
|
||||
; RUN: llc < %s -mtriple=armeb-none-eabi -mattr=+v8.2a,+fullfp16,+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-BE
|
||||
|
||||
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
|
||||
|
||||
define dso_local <8 x half> @t_vabsq_f16(<8 x half> %a) {
|
||||
; CHECK-LABEL: t_vabsq_f16:
|
||||
|
||||
; CHECK-HARD: vabs.f16 q0, q0
|
||||
; CHECK-HARD-NEXT: bx lr
|
||||
|
||||
; CHECK-HARD-BE: vrev64.16 [[Q8:q[0-9]+]], q0
|
||||
; CHECK-HARD-BE-NEXT: vabs.f16 [[Q8]], [[Q8]]
|
||||
; CHECK-HARD-BE-NEXT: vrev64.16 q0, [[Q8]]
|
||||
; CHECK-HARD-BE-NEXT: bx lr
|
||||
|
||||
; CHECK-SOFTFP: vmov d{{.*}}, r2, r3
|
||||
; CHECK-SOFTFP: vmov d{{.*}}, r0, r1
|
||||
; CHECK-SOFTFP: vabs.f16 q{{.*}}, q{{.*}}
|
||||
; CHECK-SOFTFP: vmov r0, r1, d{{.*}}
|
||||
; CHECK-SOFTFP: vmov r2, r3, d{{.*}}
|
||||
; CHECK-SOFTFP: bx lr
|
||||
|
||||
; CHECK-SOFTFP-BE: vmov [[D17:d[0-9]+]], r3, r2
|
||||
; CHECK-SOFTFP-BE: vmov [[D16:d[0-9]+]], r1, r0
|
||||
; CHECK-SOFTFP-BE: vrev64.16 [[Q8:q[0-9]+]], [[Q8]]
|
||||
; CHECK-SOFTFP-BE: vabs.f16 [[Q8]], [[Q8]]
|
||||
; CHECK-SOFTFP-BE: vrev64.16 [[Q8]], [[Q8]]
|
||||
; CHECK-SOFTFP-BE: vmov r1, r0, [[D16]]
|
||||
; CHECK-SOFTFP-BE: vmov r3, r2, [[D17]]
|
||||
; CHECK-SOFTFP-BE: bx lr
|
||||
|
||||
entry:
|
||||
%vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) #3
|
||||
ret <8 x half> %vabs1.i
|
||||
}
|
||||
|
21
test/CodeGen/ARM/fp16-intrinsic-vector-2op.ll
Normal file
21
test/CodeGen/ARM/fp16-intrinsic-vector-2op.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+v8.2a,+fullfp16,+neon -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-HARD
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+v8.2a,+fullfp16,+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP
|
||||
|
||||
declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
|
||||
|
||||
define dso_local <4 x half> @t_vpadd_f16(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK: t_vpadd_f16:
|
||||
|
||||
; CHECK-HARD: vpadd.f16 d0, d0, d1
|
||||
; CHECK-HARD-NEXT: bx lr
|
||||
|
||||
; CHECK-SOFTFP: vmov [[D1:d[0-9]+]], r2, r3
|
||||
; CHECK-SOFTFP: vmov [[D2:d[0-9]+]], r0, r1
|
||||
; CHECK-SOFTFP: vpadd.f16 [[D3:d[0-9]+]], [[D2]], [[D1]]
|
||||
; CHECK-SOFTFP: vmov r0, r1, [[D3]]
|
||||
; CHECK-SOFTFP: bx lr
|
||||
|
||||
entry:
|
||||
%vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
|
||||
ret <4 x half> %vpadd_v2.i
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user