Add ISD::FROUND for libm round()

All libm floating-point rounding functions, except for round(), had their own ISD nodes. Recent PowerPC cores have an instruction for round(), and so here I'm adding ISD::FROUND so that round() can be custom lowered as well. For the most part, this is straightforward. I've added an intrinsic and a matching ISD node just like those for nearbyint() and friends. The SelectionDAG pattern I've named frnd (because ISD::FP_ROUND has already claimed fround). This will be used by the PowerPC backend in a follow-up commit. llvm-svn: 187926
2024-11-25 20:23:11 +01:00 · 2013-08-07 22:49:12 +00:00 · 2013-08-07 22:49:12 +00:00 · bdc7aa32c1
commit bdc7aa32c1
parent efa9025062
19 changed files with 168 additions and 3 deletions
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -7526,6 +7526,42 @@ Semantics:
 This function returns the same values as the libm ``nearbyint``
 functions would, and handles error conditions in the same way.

+'``llvm.round.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.round`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.round.f32(float  %Val)
+      declare double    @llvm.round.f64(double %Val)
+      declare x86_fp80  @llvm.round.f80(x86_fp80  %Val)
+      declare fp128     @llvm.round.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.round.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.round.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``round``
+functions would, and handles error conditions in the same way.
+
 Bit Manipulation Intrinsics
 ---------------------------

--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@ -440,11 +440,11 @@ namespace ISD {

    /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
    /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR - Perform various unary
+    /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
    /// floating point operations. These are inspired by libm.
    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
    FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
+    FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
    
    /// FSINCOS - Compute both fsin and fcos as a single operation.
    FSINCOS,
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@ -188,6 +188,11 @@ namespace RTLIB {
    NEARBYINT_F80,
    NEARBYINT_F128,
    NEARBYINT_PPCF128,
+    ROUND_F32,
+    ROUND_F64,
+    ROUND_F80,
+    ROUND_F128,
+    ROUND_PPCF128,
    FLOOR_F32,
    FLOOR_F64,
    FLOOR_F80,
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@ -298,6 +298,7 @@ let Properties = [IntrReadMem] in {
  def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
  def int_rint  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
  def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 }

 let Properties = [IntrNoMem] in {
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@ -695,6 +695,7 @@ public:
    case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
    case LibFunc::ceil:      case LibFunc::ceilf:      case LibFunc::ceill:
    case LibFunc::rint:      case LibFunc::rintf:      case LibFunc::rintl:
+    case LibFunc::round:     case LibFunc::roundf:     case LibFunc::roundl:
    case LibFunc::trunc:     case LibFunc::truncf:     case LibFunc::truncl:
    case LibFunc::log2:      case LibFunc::log2f:      case LibFunc::log2l:
    case LibFunc::exp2:      case LibFunc::exp2f:      case LibFunc::exp2l:
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@ -383,6 +383,7 @@ def ftrunc     : SDNode<"ISD::FTRUNC"     , SDTFPUnaryOp>;
 def fceil      : SDNode<"ISD::FCEIL"      , SDTFPUnaryOp>;
 def ffloor     : SDNode<"ISD::FFLOOR"     , SDTFPUnaryOp>;
 def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
+def frnd       : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;

 def fround     : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
 def fextend    : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@ -2946,6 +2946,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
  case Intrinsic::trunc:
  case Intrinsic::rint:
  case Intrinsic::nearbyint:
+  case Intrinsic::round:
    return true;
  }
 }
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@ -449,6 +449,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
  case Intrinsic::nearbyint:
                           ISD = ISD::FNEARBYINT; break;
  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
+  case Intrinsic::round:   ISD = ISD::FROUND; break;
  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
  case Intrinsic::fma:     ISD = ISD::FMA;    break;
  case Intrinsic::fmuladd: ISD = ISD::FMA;    break; // FIXME: mul + add?
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -3231,6 +3231,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                      RTLIB::NEARBYINT_F128,
                                      RTLIB::NEARBYINT_PPCF128));
    break;
+  case ISD::FROUND:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+                                      RTLIB::ROUND_F64,
+                                      RTLIB::ROUND_F80,
+                                      RTLIB::ROUND_F128,
+                                      RTLIB::ROUND_PPCF128));
+    break;
  case ISD::FPOWI:
    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
    case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
    case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
    case ISD::FRINT:       R = SoftenFloatRes_FRINT(N); break;
+    case ISD::FROUND:      R = SoftenFloatRes_FROUND(N); break;
    case ISD::FSIN:        R = SoftenFloatRes_FSIN(N); break;
    case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
    case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
@ -444,6 +445,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
                         NVT, &Op, 1, false, SDLoc(N));
 }

+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::ROUND_F32,
+                                           RTLIB::ROUND_F64,
+                                           RTLIB::ROUND_F80,
+                                           RTLIB::ROUND_F128,
+                                           RTLIB::ROUND_PPCF128),
+                         NVT, &Op, 1, false, SDLoc(N));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  SDValue Op = GetSoftenedFloat(N->getOperand(0));
@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
  case ISD::FPOW:       ExpandFloatRes_FPOW(N, Lo, Hi); break;
  case ISD::FPOWI:      ExpandFloatRes_FPOWI(N, Lo, Hi); break;
  case ISD::FRINT:      ExpandFloatRes_FRINT(N, Lo, Hi); break;
+  case ISD::FROUND:     ExpandFloatRes_FROUND(N, Lo, Hi); break;
  case ISD::FSIN:       ExpandFloatRes_FSIN(N, Lo, Hi); break;
  case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
  case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
  GetPairElements(Call, Lo, Hi);
 }

+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::ROUND_F32,
+                                         RTLIB::ROUND_F64,
+                                         RTLIB::ROUND_F80,
+                                         RTLIB::ROUND_F128,
+                                         RTLIB::ROUND_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -410,6 +410,7 @@ private:
  SDValue SoftenFloatRes_FPOWI(SDNode *N);
  SDValue SoftenFloatRes_FREM(SDNode *N);
  SDValue SoftenFloatRes_FRINT(SDNode *N);
+  SDValue SoftenFloatRes_FROUND(SDNode *N);
  SDValue SoftenFloatRes_FSIN(SDNode *N);
  SDValue SoftenFloatRes_FSQRT(SDNode *N);
  SDValue SoftenFloatRes_FSUB(SDNode *N);
@ -470,6 +471,7 @@ private:
  void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandFloatRes_FREM      (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FROUND    (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@ -241,6 +241,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
  case ISD::FTRUNC:
  case ISD::FRINT:
  case ISD::FNEARBYINT:
+  case ISD::FROUND:
  case ISD::FFLOOR:
  case ISD::FP_ROUND:
  case ISD::FP_EXTEND:
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
  case ISD::FP_TO_SINT:
  case ISD::FP_TO_UINT:
  case ISD::FRINT:
+  case ISD::FROUND:
  case ISD::FSIN:
  case ISD::FSQRT:
  case ISD::FTRUNC:
@ -540,6 +541,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
  case ISD::FP_TO_SINT:
  case ISD::FP_TO_UINT:
  case ISD::FRINT:
+  case ISD::FROUND:
  case ISD::FSIN:
  case ISD::FSQRT:
  case ISD::FTRUNC:
@ -1507,6 +1509,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
  case ISD::FNEARBYINT:
  case ISD::FNEG:
  case ISD::FRINT:
+  case ISD::FROUND:
  case ISD::FSIN:
  case ISD::FSQRT:
  case ISD::FTRUNC:
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -4902,7 +4902,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  case Intrinsic::ceil:
  case Intrinsic::trunc:
  case Intrinsic::rint:
-  case Intrinsic::nearbyint: {
+  case Intrinsic::nearbyint:
+  case Intrinsic::round: {
    unsigned Opcode;
    switch (Intrinsic) {
    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
@ -4915,6 +4916,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
    case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
    case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+    case Intrinsic::round:     Opcode = ISD::FROUND;     break;
    }

    setValue(&I, DAG.getNode(Opcode, sdl,
@ -5644,6 +5646,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
        if (visitUnaryFloatCall(I, ISD::FRINT))
          return;
        break;
+      case LibFunc::round:
+      case LibFunc::roundf:
+      case LibFunc::roundl:
+        if (visitUnaryFloatCall(I, ISD::FROUND))
+          return;
+        break;
      case LibFunc::trunc:
      case LibFunc::truncf:
      case LibFunc::truncl:
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
  case ISD::FCEIL:                      return "fceil";
  case ISD::FRINT:                      return "frint";
  case ISD::FNEARBYINT:                 return "fnearbyint";
+  case ISD::FROUND:                     return "fround";
  case ISD::FEXP:                       return "fexp";
  case ISD::FEXP2:                      return "fexp2";
  case ISD::FLOG:                       return "flog";
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
  Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+  Names[RTLIB::ROUND_F32] = "roundf";
+  Names[RTLIB::ROUND_F64] = "round";
+  Names[RTLIB::ROUND_F80] = "roundl";
+  Names[RTLIB::ROUND_F128] = "roundl";
+  Names[RTLIB::ROUND_PPCF128] = "roundl";
  Names[RTLIB::FLOOR_F32] = "floorf";
  Names[RTLIB::FLOOR_F64] = "floor";
  Names[RTLIB::FLOOR_F80] = "floorl";
@ -706,6 +711,7 @@ void TargetLoweringBase::initActions() {
  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
+  setOperationAction(ISD::FROUND, MVT::f16, Expand);
  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
@ -716,6 +722,7 @@ void TargetLoweringBase::initActions() {
  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+  setOperationAction(ISD::FROUND, MVT::f32, Expand);
  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
@ -726,6 +733,7 @@ void TargetLoweringBase::initActions() {
  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FROUND, MVT::f64, Expand);
  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
  setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
  setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
@ -736,6 +744,7 @@ void TargetLoweringBase::initActions() {
  setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
  setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
  setOperationAction(ISD::FRINT,  MVT::f128, Expand);
+  setOperationAction(ISD::FROUND, MVT::f128, Expand);
  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);

  // Default ISD::TRAP to expand (which turns it into abort).
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@ -259,6 +259,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
          case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
          case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
          case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+          case Intrinsic::round:     Opcode = ISD::FROUND;     break;
          }
        }

@ -309,6 +310,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
          case LibFunc::rintf:
          case LibFunc::rintl:
            Opcode = ISD::FRINT; break;
+          case LibFunc::round:
+          case LibFunc::roundf:
+          case LibFunc::roundl:
+            Opcode = ISD::FROUND; break;
          case LibFunc::trunc:
          case LibFunc::truncf:
          case LibFunc::truncl:
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -1772,6 +1772,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
    case Intrinsic::trunc:
    case Intrinsic::rint:
    case Intrinsic::nearbyint:
+    case Intrinsic::round:
    case Intrinsic::pow:
    case Intrinsic::fma:
    case Intrinsic::fmuladd:
@ -1850,6 +1851,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
  case LibFunc::nearbyintf:
  case LibFunc::nearbyintl:
    return Intrinsic::nearbyint;
+  case LibFunc::round:
+  case LibFunc::roundf:
+  case LibFunc::roundl:
+    return Intrinsic::round;
  case LibFunc::pow:
  case LibFunc::powf:
  case LibFunc::powl:
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@ -728,6 +728,58 @@ for.end:                                          ; preds = %for.body, %entry

 declare double @llvm.nearbyint.f64(double) nounwind readnone

+;CHECK-LABEL: @round_f32(
+;CHECK: llvm.round.v4f32
+;CHECK: ret void
+define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.round.f32(float) nounwind readnone
+
+;CHECK-LABEL: @round_f64(
+;CHECK: llvm.round.v4f64
+;CHECK: ret void
+define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.round.f64(double) nounwind readnone
+
 ;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void