From 49686118332c78800e95147e16ceef41145f92e7 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Tue, 21 Apr 2020 22:10:55 +0300 Subject: [PATCH] [LoopFusion] Remove unreachable blocks from DT and LI after fusion This patch removes FC0.ExitBlock and FC1GuardBlock from DT and LI after fusion of guarded loops. They become unreachable and LI verification failed when they happened to be inside another loop. Reviewed By: kbarton Differential Revision: https://reviews.llvm.org/D78679 --- lib/Transforms/Scalar/LoopFuse.cpp | 3 + .../double_loop_nest_inner_guard.ll | 116 +++++++++++++ .../triple_loop_nest_inner_guard.ll | 160 ++++++++++++++++++ 3 files changed, 279 insertions(+) create mode 100644 test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll create mode 100644 test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp index 8d591d783f2..e2b65f5dfa5 100644 --- a/lib/Transforms/Scalar/LoopFuse.cpp +++ b/lib/Transforms/Scalar/LoopFuse.cpp @@ -1536,7 +1536,10 @@ private: // Update DT/PDT DTU.applyUpdates(TreeUpdates); + LI.removeBlock(FC1GuardBlock); LI.removeBlock(FC1.Preheader); + LI.removeBlock(FC0.ExitBlock); + DTU.deleteBB(FC1GuardBlock); DTU.deleteBB(FC1.Preheader); DTU.deleteBB(FC0.ExitBlock); DTU.flush(); diff --git a/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll b/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll new file mode 100644 index 00000000000..d94c2229a0f --- /dev/null +++ b/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll @@ -0,0 +1,116 @@ +; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s + +; Verify that LoopFusion can fuse two double-loop nests with guarded inner +; loops. Loops are in canonical form. + +@a = common global [10 x [10 x i32]] zeroinitializer +@b = common global [10 x [10 x i32]] zeroinitializer +@c = common global [10 x [10 x i32]] zeroinitializer + +; CHECK-LABEL: @double_loop_nest_inner_guard +; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]] + +; CHECK: [[OUTER_PH]]: +; CHECK: br label %[[OUTER_BODY_INNER_GUARD:outer1.body.inner.guard]] + +; CHECK: [[OUTER_BODY_INNER_GUARD]]: +; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[OUTER_LATCH:outer2.latch]] + +; CHECK: [[INNER_PH]]: +; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]] + +; CHECK: [[INNER_BODY]]: +; First loop body. +; CHECK: load +; CHECK: add +; CHECK: store +; Second loop body. +; CHECK: load +; CHECK: mul +; CHECK: store +; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]] + +; CHECK: [[INNER_EXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]] + +; CHECK: [[OUTER_LATCH]]: +; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_INNER_GUARD]] + +; CHECK: [[OUTER_EXIT]]: +; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]] + +; CHECK: [[FUNC_EXIT]]: +; CHECK-NEXT: ret + +define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) { +entry: + %cmp63 = icmp sgt i32 %m, 0 + br i1 %cmp63, label %outer1.ph, label %func_exit + +outer1.ph: + %cmp261 = icmp sgt i32 %n, 0 + %wide.trip.count76 = zext i32 %m to i64 + %wide.trip.count72 = zext i32 %n to i64 + br label %outer1.body.inner.guard + +outer1.body.inner.guard: + %iv74 = phi i64 [ 0, %outer1.ph ], [ %iv.next75, %outer1.latch ] + br i1 %cmp261, label %inner1.ph, label %outer1.latch + +inner1.ph: + br label %inner1.body + +inner1.body: + %iv70 = phi i64 [ %iv.next71, %inner1.body ], [ 0, %inner1.ph ] + %idx6 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv74, i64 %iv70 + %0 = load i32, i32* %idx6 + %add = add nsw i32 %0, 2 + %idx10 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @b, i64 0, i64 %iv74, i64 %iv70 + store i32 %add, i32* %idx10 + %iv.next71 = add nuw nsw i64 %iv70, 1 + %exitcond73 = icmp eq i64 %iv.next71, %wide.trip.count72 + br i1 %exitcond73, label %inner1.exit, label %inner1.body + +inner1.exit: + br label %outer1.latch + +outer1.latch: + %iv.next75 = add nuw nsw i64 %iv74, 1 + %exitcond77 = icmp eq i64 %iv.next75, %wide.trip.count76 + br i1 %exitcond77, label %outer2.ph, label %outer1.body.inner.guard + +outer2.ph: + br label %outer2.body.inner.guard + +outer2.body.inner.guard: + %iv66 = phi i64 [ %iv.next67, %outer2.latch ], [ 0, %outer2.ph ] + br i1 %cmp261, label %inner2.ph, label %outer2.latch + +inner2.ph: + br label %inner2.body + +inner2.body: + %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ] + %idx27 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv66, i64 %iv + %1 = load i32, i32* %idx27 + %mul = shl nsw i32 %1, 1 + %idx31 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @c, i64 0, i64 %iv66, i64 %iv + store i32 %mul, i32* %idx31 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count72 + br i1 %exitcond, label %inner2.exit, label %inner2.body + +inner2.exit: + br label %outer2.latch + +outer2.latch: + %iv.next67 = add nuw nsw i64 %iv66, 1 + %exitcond69 = icmp eq i64 %iv.next67, %wide.trip.count76 + br i1 %exitcond69, label %outer2.exit, label %outer2.body.inner.guard + +outer2.exit: + br label %func_exit + +func_exit: + ret i32 undef +} diff --git a/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll b/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll new file mode 100644 index 00000000000..065b250c0c1 --- /dev/null +++ b/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s + +; Verify that LoopFusion can fuse two triple-loop nests with guarded inner +; loops. Loops are in canonical form. + +@a = common global [10 x [10 x [10 x i32]]] zeroinitializer +@b = common global [10 x [10 x [10 x i32]]] zeroinitializer +@c = common global [10 x [10 x [10 x i32]]] zeroinitializer + +; CHECK-LABEL: @triple_loop_nest_inner_guard +; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]] + +; CHECK: [[OUTER_PH]]: +; CHECK: br label %[[OUTER_BODY_MIDDLE_GUARD:outer1.body.middle1.guard]] + +; CHECK: [[OUTER_BODY_MIDDLE_GUARD]]: +; CHECK: br i1 %{{.*}}, label %[[MIDDLE_PH:middle1.ph]], label %[[OUTER_LATCH:outer2.latch]] + +; CHECK: [[MIDDLE_PH]]: +; CHECK-NEXT: br label %[[MIDDLE_BODY_INNER_GUARD:middle1.body.inner1.guard]] + +; CHECK: [[MIDDLE_BODY_INNER_GUARD]]: +; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[MIDDLE_LATCH:middle2.latch]] + +; CHECK: [[INNER_PH]]: +; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]] + +; CHECK: [[INNER_BODY]]: +; First loop body. +; CHECK: load +; CHECK: add +; CHECK: store +; Second loop body. +; CHECK: load +; CHECK: mul +; CHECK: store +; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]] + +; CHECK: [[INNER_EXIT]]: +; CHECK-NEXT: br label %[[MIDDLE_LATCH:middle2.latch]] + +; CHECK: [[MIDDLE_LATCH]]: +; CHECK: br i1 %{{.*}}, label %[[MIDDLE_EXIT:middle2.exit]], label %[[MIDDLE_BODY_INNER_GUARD]] + +; CHECK: [[MIDDLE_EXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]] + +; CHECK: [[OUTER_LATCH]]: +; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_MIDDLE_GUARD]] + +; CHECK: [[OUTER_EXIT]]: +; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]] + +; CHECK: [[FUNC_EXIT]]: +; CHECK-NEXT: ret + +define i32 @triple_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) { +entry: + %cmp101 = icmp sgt i32 %m, 0 + br i1 %cmp101, label %outer1.ph, label %func_exit + +outer1.ph: + %cmp298 = icmp sgt i32 %n, 0 + %cmp696 = icmp sgt i32 %M, 0 + %wide.trip.count122 = zext i32 %m to i64 + %wide.trip.count118 = zext i32 %n to i64 + %wide.trip.count114 = zext i32 %M to i64 + br label %outer1.body.middle1.guard + +outer1.body.middle1.guard: + %iv120 = phi i64 [ 0, %outer1.ph ], [ %iv.next121, %outer1.latch ] + br i1 %cmp298, label %middle1.ph, label %outer1.latch + +middle1.ph: + br label %middle1.body.inner1.guard + +middle1.body.inner1.guard: + %iv116 = phi i64 [ %iv.next117, %middle1.latch ], [ 0, %middle1.ph ] + br i1 %cmp696, label %inner1.ph, label %middle1.latch + +inner1.ph: + br label %inner1.body + +inner1.body: + %iv112 = phi i64 [ %iv.next113, %inner1.body ], [ 0, %inner1.ph ] + %idx12 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv120, i64 %iv116, i64 %iv112 + %0 = load i32, i32* %idx12 + %add = add nsw i32 %0, 2 + %idx18 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @b, i64 0, i64 %iv120, i64 %iv116, i64 %iv112 + store i32 %add, i32* %idx18 + %iv.next113 = add nuw nsw i64 %iv112, 1 + %exitcond115 = icmp eq i64 %iv.next113, %wide.trip.count114 + br i1 %exitcond115, label %inner1.exit, label %inner1.body + +inner1.exit: + br label %middle1.latch + +middle1.latch: + %iv.next117 = add nuw nsw i64 %iv116, 1 + %exitcond119 = icmp eq i64 %iv.next117, %wide.trip.count118 + br i1 %exitcond119, label %middle1.exit, label %middle1.body.inner1.guard + +middle1.exit: + br label %outer1.latch + +outer1.latch: + %iv.next121 = add nuw nsw i64 %iv120, 1 + %exitcond123 = icmp eq i64 %iv.next121, %wide.trip.count122 + br i1 %exitcond123, label %outer2.ph, label %outer1.body.middle1.guard + +outer2.ph: + br label %outer2.middle2.guard + +outer2.middle2.guard: + %iv108 = phi i64 [ %iv.next109, %outer2.latch ], [ 0, %outer2.ph ] + br i1 %cmp298, label %middle2.ph, label %outer2.latch + +middle2.ph: + br label %middle2.body.inner2.guard + +middle2.body.inner2.guard: + %iv104 = phi i64 [ %iv.next105, %middle2.latch ], [ 0, %middle2.ph ] + br i1 %cmp696, label %inner2.ph, label %middle2.latch + +inner2.ph: + br label %inner2.body + +inner2.body: + %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ] + %idx45 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv108, i64 %iv104, i64 %iv + %1 = load i32, i32* %idx45 + %mul = shl nsw i32 %1, 1 + %idx51 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @c, i64 0, i64 %iv108, i64 %iv104, i64 %iv + store i32 %mul, i32* %idx51 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count114 + br i1 %exitcond, label %inner2.exit, label %inner2.body + +inner2.exit: + br label %middle2.latch + +middle2.latch: + %iv.next105 = add nuw nsw i64 %iv104, 1 + %exitcond107 = icmp eq i64 %iv.next105, %wide.trip.count118 + br i1 %exitcond107, label %middle2.exit, label %middle2.body.inner2.guard + +middle2.exit: + br label %outer2.latch + +outer2.latch: + %iv.next109 = add nuw nsw i64 %iv108, 1 + %exitcond111 = icmp eq i64 %iv.next109, %wide.trip.count122 + br i1 %exitcond111, label %outer2.exit, label %outer2.middle2.guard + +outer2.exit: + br label %func_exit + +func_exit: + ret i32 undef +}