mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[AArch64] Fold a floating-point divide by power of two into fp conversion.
Part of http://reviews.llvm.org/D13442 llvm-svn: 249579
This commit is contained in:
parent
b2e3d5ab9f
commit
1b95669ac4
@ -480,6 +480,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
setTargetDAGCombine(ISD::FP_TO_SINT);
|
||||
setTargetDAGCombine(ISD::FP_TO_UINT);
|
||||
setTargetDAGCombine(ISD::FDIV);
|
||||
|
||||
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
|
||||
|
||||
@ -7596,6 +7597,70 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return FixConv;
|
||||
}
|
||||
|
||||
/// Fold a floating-point divide by power of two into fixed-point to
|
||||
/// floating-point conversion.
|
||||
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
if (!Subtarget->hasNEON())
|
||||
return SDValue();
|
||||
|
||||
SDValue Op = N->getOperand(0);
|
||||
unsigned Opc = Op->getOpcode();
|
||||
if (!Op.getValueType().isVector() ||
|
||||
(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
|
||||
return SDValue();
|
||||
|
||||
SDValue ConstVec = N->getOperand(1);
|
||||
if (!isa<BuildVectorSDNode>(ConstVec))
|
||||
return SDValue();
|
||||
|
||||
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
|
||||
int32_t IntBits = IntTy.getSizeInBits();
|
||||
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
|
||||
return SDValue();
|
||||
|
||||
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
|
||||
int32_t FloatBits = FloatTy.getSizeInBits();
|
||||
if (FloatBits != 32 && FloatBits != 64)
|
||||
return SDValue();
|
||||
|
||||
// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
|
||||
if (IntBits > FloatBits)
|
||||
return SDValue();
|
||||
|
||||
BitVector UndefElements;
|
||||
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
|
||||
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
|
||||
if (C == -1 || C == 0 || C > FloatBits)
|
||||
return SDValue();
|
||||
|
||||
MVT ResTy;
|
||||
unsigned NumLanes = Op.getValueType().getVectorNumElements();
|
||||
switch (NumLanes) {
|
||||
default:
|
||||
return SDValue();
|
||||
case 2:
|
||||
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
|
||||
break;
|
||||
case 4:
|
||||
ResTy = MVT::v4i32;
|
||||
break;
|
||||
}
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue ConvInput = Op.getOperand(0);
|
||||
bool IsSigned = Opc == ISD::SINT_TO_FP;
|
||||
if (IntBits < FloatBits)
|
||||
ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
|
||||
ResTy, ConvInput);
|
||||
|
||||
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
|
||||
: Intrinsic::aarch64_neon_vcvtfxu2fp;
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
|
||||
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
|
||||
DAG.getConstant(C, DL, MVT::i32));
|
||||
}
|
||||
|
||||
/// An EXTR instruction is made up of two shifts, ORed together. This helper
|
||||
/// searches for and classifies those shifts.
|
||||
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
|
||||
@ -9470,6 +9535,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
return performFpToIntCombine(N, DAG, Subtarget);
|
||||
case ISD::FDIV:
|
||||
return performFDivCombine(N, DAG, Subtarget);
|
||||
case ISD::OR:
|
||||
return performORCombine(N, DCI, Subtarget);
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
|
115
test/CodeGen/AArch64/fdiv_combine.ll
Normal file
115
test/CodeGen/AArch64/fdiv_combine.ll
Normal file
@ -0,0 +1,115 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
; Test signed conversion.
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: scvtf.2s v0, v0, #4
|
||||
; CHECK: ret
|
||||
define <2 x float> @test1(<2 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = sitofp <2 x i32> %in to <2 x float>
|
||||
%div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
|
||||
ret <2 x float> %div.i
|
||||
}
|
||||
|
||||
; Test unsigned conversion.
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: ucvtf.2s v0, v0, #3
|
||||
; CHECK: ret
|
||||
define <2 x float> @test2(<2 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = uitofp <2 x i32> %in to <2 x float>
|
||||
%div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
|
||||
ret <2 x float> %div.i
|
||||
}
|
||||
|
||||
; Test which should not fold due to non-power of 2.
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: scvtf.2s v0, v0
|
||||
; CHECK: fmov.2s v1, #9.00000000
|
||||
; CHECK: fdiv.2s v0, v0, v1
|
||||
; CHECK: ret
|
||||
define <2 x float> @test3(<2 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = sitofp <2 x i32> %in to <2 x float>
|
||||
%div.i = fdiv <2 x float> %vcvt.i, <float 9.0, float 9.0>
|
||||
ret <2 x float> %div.i
|
||||
}
|
||||
|
||||
; Test which should not fold due to power of 2 out of range.
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK: scvtf.2s v0, v0
|
||||
; CHECK: movi.2s v1, #0x50, lsl #24
|
||||
; CHECK: fdiv.2s v0, v0, v1
|
||||
; CHECK: ret
|
||||
define <2 x float> @test4(<2 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = sitofp <2 x i32> %in to <2 x float>
|
||||
%div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
|
||||
ret <2 x float> %div.i
|
||||
}
|
||||
|
||||
; Test case where const is max power of 2 (i.e., 2^32).
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK: scvtf.2s v0, v0, #32
|
||||
; CHECK: ret
|
||||
define <2 x float> @test5(<2 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = sitofp <2 x i32> %in to <2 x float>
|
||||
%div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
|
||||
ret <2 x float> %div.i
|
||||
}
|
||||
|
||||
; Test quadword.
|
||||
; CHECK-LABEL: @test6
|
||||
; CHECK: scvtf.4s v0, v0, #2
|
||||
; CHECK: ret
|
||||
define <4 x float> @test6(<4 x i32> %in) {
|
||||
entry:
|
||||
%vcvt.i = sitofp <4 x i32> %in to <4 x float>
|
||||
%div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
ret <4 x float> %div.i
|
||||
}
|
||||
|
||||
; Test unsigned i16 to float
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: ushll.4s v0, v0, #0
|
||||
; CHECK: ucvtf.4s v0, v0, #1
|
||||
; CHECK: ret
|
||||
define <4 x float> @test7(<4 x i16> %in) {
|
||||
%conv = uitofp <4 x i16> %in to <4 x float>
|
||||
%shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
|
||||
ret <4 x float> %shift
|
||||
}
|
||||
|
||||
; Test signed i16 to float
|
||||
; CHECK-LABEL: @test8
|
||||
; CHECK: sshll.4s v0, v0, #0
|
||||
; CHECK: scvtf.4s v0, v0, #2
|
||||
; CHECK: ret
|
||||
define <4 x float> @test8(<4 x i16> %in) {
|
||||
%conv = sitofp <4 x i16> %in to <4 x float>
|
||||
%shift = fdiv <4 x float> %conv, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
ret <4 x float> %shift
|
||||
}
|
||||
|
||||
; Can't convert i64 to float.
|
||||
; CHECK-LABEL: @test9
|
||||
; CHECK: ucvtf.2d v0, v0
|
||||
; CHECK: fcvtn v0.2s, v0.2d
|
||||
; CHECK: movi.2s v1, #0x40, lsl #24
|
||||
; CHECK: fdiv.2s v0, v0, v1
|
||||
; CHECK: ret
|
||||
define <2 x float> @test9(<2 x i64> %in) {
|
||||
%conv = uitofp <2 x i64> %in to <2 x float>
|
||||
%shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
|
||||
ret <2 x float> %shift
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test10
|
||||
; CHECK: ucvtf.2d v0, v0, #1
|
||||
; CHECK: ret
|
||||
define <2 x double> @test10(<2 x i64> %in) {
|
||||
%conv = uitofp <2 x i64> %in to <2 x double>
|
||||
%shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
|
||||
ret <2 x double> %shift
|
||||
}
|
Loading…
Reference in New Issue
Block a user