mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AArch64] Implement FLT_ROUNDS macro.
Very similar to ARM implementation, just maps to an MRS. Should fix PR25191. Patch by Michael Brase. llvm-svn: 335118
This commit is contained in:
parent
5d00aadd9a
commit
e031e390d0
@ -583,6 +583,14 @@ def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
|
|||||||
def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
|
def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
|
||||||
def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
|
def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
|
||||||
|
|
||||||
|
let TargetPrefix = "aarch64" in {
|
||||||
|
class FPCR_Get_Intrinsic
|
||||||
|
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FPCR
|
||||||
|
def int_aarch64_get_fpcr : FPCR_Get_Intrinsic;
|
||||||
|
|
||||||
let TargetPrefix = "aarch64" in {
|
let TargetPrefix = "aarch64" in {
|
||||||
class Crypto_AES_DataKey_Intrinsic
|
class Crypto_AES_DataKey_Intrinsic
|
||||||
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||||
|
@ -469,6 +469,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||||||
|
|
||||||
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
|
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
|
||||||
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
|
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
|
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
|
||||||
@ -2494,6 +2496,26 @@ static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
// The rounding mode is in bits 23:22 of the FPSCR.
|
||||||
|
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
|
||||||
|
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
|
||||||
|
// so that the shift + and get folded into a bitfield extract.
|
||||||
|
SDLoc dl(Op);
|
||||||
|
|
||||||
|
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
|
||||||
|
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
|
||||||
|
MVT::i64));
|
||||||
|
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
|
||||||
|
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
|
||||||
|
DAG.getConstant(1U << 22, dl, MVT::i32));
|
||||||
|
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
|
||||||
|
DAG.getConstant(22, dl, MVT::i32));
|
||||||
|
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||||
|
DAG.getConstant(3, dl, MVT::i32));
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
|
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
|
||||||
// Multiplications are only custom-lowered for 128-bit vectors so that
|
// Multiplications are only custom-lowered for 128-bit vectors so that
|
||||||
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
|
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
|
||||||
@ -2753,6 +2775,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||||||
return LowerFP_TO_INT(Op, DAG);
|
return LowerFP_TO_INT(Op, DAG);
|
||||||
case ISD::FSINCOS:
|
case ISD::FSINCOS:
|
||||||
return LowerFSINCOS(Op, DAG);
|
return LowerFSINCOS(Op, DAG);
|
||||||
|
case ISD::FLT_ROUNDS_:
|
||||||
|
return LowerFLT_ROUNDS_(Op, DAG);
|
||||||
case ISD::MUL:
|
case ISD::MUL:
|
||||||
return LowerMUL(Op, DAG);
|
return LowerMUL(Op, DAG);
|
||||||
case ISD::MULHS:
|
case ISD::MULHS:
|
||||||
|
@ -593,6 +593,7 @@ private:
|
|||||||
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
@ -566,6 +566,9 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
|
|||||||
let Predicates = [HasPerfMon] in
|
let Predicates = [HasPerfMon] in
|
||||||
def : Pat<(readcyclecounter), (MRS 0xdce8)>;
|
def : Pat<(readcyclecounter), (MRS 0xdce8)>;
|
||||||
|
|
||||||
|
// FPCR register
|
||||||
|
def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
|
||||||
|
|
||||||
// Generic system instructions
|
// Generic system instructions
|
||||||
def SYSxt : SystemXtI<0, "sys">;
|
def SYSxt : SystemXtI<0, "sys">;
|
||||||
def SYSLxt : SystemLXtI<1, "sysl">;
|
def SYSLxt : SystemLXtI<1, "sysl">;
|
||||||
|
23
test/CodeGen/AArch64/arm64-fpcr.ll
Normal file
23
test/CodeGen/AArch64/arm64-fpcr.ll
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
|
||||||
|
|
||||||
|
define i64 @GetFpcr() {
|
||||||
|
; CHECK-LABEL: GetFpcr
|
||||||
|
; CHECK: mrs x0, FPCR
|
||||||
|
; CHECK: ret
|
||||||
|
%1 = tail call i64 @llvm.aarch64.get.fpcr()
|
||||||
|
ret i64 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i64 @llvm.aarch64.get.fpcr() #0
|
||||||
|
|
||||||
|
define i32 @GetFltRounds() {
|
||||||
|
; CHECK-LABEL: GetFltRounds
|
||||||
|
; CHECK: mrs x8, FPCR
|
||||||
|
; CHECK: add w8, w8, #1024, lsl #12
|
||||||
|
; CHECK: ubfx w0, w8, #22, #2
|
||||||
|
; CHECK: ret
|
||||||
|
%1 = tail call i32 @llvm.flt.rounds()
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @llvm.flt.rounds() #0
|
Loading…
Reference in New Issue
Block a user