From 1b95669ac453ccdaf0aed54efe48f62c318ec3fd Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 7 Oct 2015 17:51:37 +0000 Subject: [PATCH] [AArch64] Fold a floating-point divide by power of two into fp conversion. Part of http://reviews.llvm.org/D13442 llvm-svn: 249579 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 67 ++++++++++++ test/CodeGen/AArch64/fdiv_combine.ll | 115 +++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 test/CodeGen/AArch64/fdiv_combine.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6af2d9d9d06..fa8cad82795 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -480,6 +480,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -7596,6 +7597,70 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, return FixConv; } +/// Fold a floating-point divide by power of two into fixed-point to +/// floating-point conversion. +static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasNEON()) + return SDValue(); + + SDValue Op = N->getOperand(0); + unsigned Opc = Op->getOpcode(); + if (!Op.getValueType().isVector() || + (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP)) + return SDValue(); + + SDValue ConstVec = N->getOperand(1); + if (!isa(ConstVec)) + return SDValue(); + + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + int32_t IntBits = IntTy.getSizeInBits(); + if (IntBits != 16 && IntBits != 32 && IntBits != 64) + return SDValue(); + + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + int32_t FloatBits = FloatTy.getSizeInBits(); + if (FloatBits != 32 && FloatBits != 64) + return SDValue(); + + // Avoid conversions where iN is larger than the float (e.g., i64 -> float). + if (IntBits > FloatBits) + return SDValue(); + + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); + if (C == -1 || C == 0 || C > FloatBits) + return SDValue(); + + MVT ResTy; + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + switch (NumLanes) { + default: + return SDValue(); + case 2: + ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; + break; + case 4: + ResTy = MVT::v4i32; + break; + } + + SDLoc DL(N); + SDValue ConvInput = Op.getOperand(0); + bool IsSigned = Opc == ISD::SINT_TO_FP; + if (IntBits < FloatBits) + ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, + ResTy, ConvInput); + + unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp + : Intrinsic::aarch64_neon_vcvtfxu2fp; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput, + DAG.getConstant(C, DL, MVT::i32)); +} + /// An EXTR instruction is made up of two shifts, ORed together. This helper /// searches for and classifies those shifts. static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, @@ -9470,6 +9535,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return performFpToIntCombine(N, DAG, Subtarget); + case ISD::FDIV: + return performFDivCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll new file mode 100644 index 00000000000..6f38a267ec3 --- /dev/null +++ b/test/CodeGen/AArch64/fdiv_combine.ll @@ -0,0 +1,115 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s + +; Test signed conversion. +; CHECK-LABEL: @test1 +; CHECK: scvtf.2s v0, v0, #4 +; CHECK: ret +define <2 x float> @test1(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test unsigned conversion. +; CHECK-LABEL: @test2 +; CHECK: ucvtf.2s v0, v0, #3 +; CHECK: ret +define <2 x float> @test2(<2 x i32> %in) { +entry: + %vcvt.i = uitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test which should not fold due to non-power of 2. +; CHECK-LABEL: @test3 +; CHECK: scvtf.2s v0, v0 +; CHECK: fmov.2s v1, #9.00000000 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test3(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test which should not fold due to power of 2 out of range. +; CHECK-LABEL: @test4 +; CHECK: scvtf.2s v0, v0 +; CHECK: movi.2s v1, #0x50, lsl #24 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test4(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test case where const is max power of 2 (i.e., 2^32). +; CHECK-LABEL: @test5 +; CHECK: scvtf.2s v0, v0, #32 +; CHECK: ret +define <2 x float> @test5(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test quadword. +; CHECK-LABEL: @test6 +; CHECK: scvtf.4s v0, v0, #2 +; CHECK: ret +define <4 x float> @test6(<4 x i32> %in) { +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +; Test unsigned i16 to float +; CHECK-LABEL: @test7 +; CHECK: ushll.4s v0, v0, #0 +; CHECK: ucvtf.4s v0, v0, #1 +; CHECK: ret +define <4 x float> @test7(<4 x i16> %in) { + %conv = uitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +; Test signed i16 to float +; CHECK-LABEL: @test8 +; CHECK: sshll.4s v0, v0, #0 +; CHECK: scvtf.4s v0, v0, #2 +; CHECK: ret +define <4 x float> @test8(<4 x i16> %in) { + %conv = sitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +; Can't convert i64 to float. +; CHECK-LABEL: @test9 +; CHECK: ucvtf.2d v0, v0 +; CHECK: fcvtn v0.2s, v0.2d +; CHECK: movi.2s v1, #0x40, lsl #24 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test9(<2 x i64> %in) { + %conv = uitofp <2 x i64> %in to <2 x float> + %shift = fdiv <2 x float> %conv, + ret <2 x float> %shift +} + +; CHECK-LABEL: @test10 +; CHECK: ucvtf.2d v0, v0, #1 +; CHECK: ret +define <2 x double> @test10(<2 x i64> %in) { + %conv = uitofp <2 x i64> %in to <2 x double> + %shift = fdiv <2 x double> %conv, + ret <2 x double> %shift +}