From 728dc9361060f96f5fc44efb049bf843ba504478 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Thu, 21 Dec 2017 01:22:13 +0000 Subject: [PATCH] [SimplifyCFG] Avoid quadratic on a predecessors number behavior in instruction sinking. If a block has N predecessors, then the current algorithm will try to sink common code to this block N times (whenever we visit a predecessor). Every attempt to sink the common code includes going through all predecessors, so the complexity of the algorithm becomes O(N^2). With this patch we try to sink common code only when we visit the block itself. With this, the complexity goes down to O(N). As a side effect, the moment the code is sunk is slightly different than before (the order of simplifications has been changed), that's why I had to adjust two tests (note that neither of the tests is supposed to test SimplifyCFG): * test/CodeGen/AArch64/arm64-jumptable.ll - changes in this test mimic the changes that previous implementation of SimplifyCFG would do. * test/CodeGen/ARM/avoid-cpsr-rmw.ll - in this test I disabled common code sinking by a command line flag. llvm-svn: 321236 --- lib/Transforms/Utils/SimplifyCFG.cpp | 24 ++++++++++-------------- test/CodeGen/AArch64/arm64-jumptable.ll | 16 +++++++--------- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 4 ++-- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f02f80cc1b7..e7358dbcb62 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1654,14 +1654,11 @@ namespace { } // end anonymous namespace -/// Given an unconditional branch that goes to BBEnd, -/// check whether BBEnd has only two predecessors and the other predecessor -/// ends with an unconditional branch. If it is true, sink any common code -/// in the two predecessors to BBEnd. -static bool SinkThenElseCodeToEnd(BranchInst *BI1) { - assert(BI1->isUnconditional()); - BasicBlock *BBEnd = BI1->getSuccessor(0); - +/// Check whether BB's predecessors end with unconditional branches. If it is +/// true, sink any common code from the predecessors to BB. +/// We also allow one predecessor to end with conditional branch (but no more +/// than one). +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // We support two situations: // (1) all incoming arcs are unconditional // (2) one incoming arc is conditional @@ -1705,7 +1702,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // SmallVector UnconditionalPreds; Instruction *Cond = nullptr; - for (auto *B : predecessors(BBEnd)) { + for (auto *B : predecessors(BB)) { auto *T = B->getTerminator(); if (isa(T) && cast(T)->isUnconditional()) UnconditionalPreds.push_back(B); @@ -1773,8 +1770,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds, - ".sink.split")) + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) // Edges couldn't be split. return false; Changed = true; @@ -5728,9 +5724,6 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); - if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI)) - return true; - // If the Terminator is the only non-phi instruction, simplify the block. // If LoopHeader is provided, check if the block or its successor is a loop // header. (This is for early invocations before loop simplify and @@ -6008,6 +6001,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (MergeBlockIntoPredecessor(BB)) return true; + if (SinkCommon && Options.SinkCommonInsts) + Changed |= SinkCommonCodeFromPredecessors(BB); + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can diff --git a/test/CodeGen/AArch64/arm64-jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll index f5c2ee6da0b..fac3e5704d1 100644 --- a/test/CodeGen/AArch64/arm64-jumptable.ll +++ b/test/CodeGen/AArch64/arm64-jumptable.ll @@ -6,22 +6,20 @@ define void @sum(i32 %a, i32* %to, i32 %c) { entry: switch i32 %a, label %exit [ i32 1, label %bb1 - i32 2, label %bb2 + i32 2, label %exit.sink.split i32 3, label %bb3 i32 4, label %bb4 ] bb1: %b = add i32 %c, 1 - store i32 %b, i32* %to - br label %exit -bb2: - store i32 2, i32* %to - br label %exit + br label %exit.sink.split bb3: - store i32 3, i32* %to - br label %exit + br label %exit.sink.split bb4: - store i32 5, i32* %to + br label %exit.sink.split +exit.sink.split: + %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 3, %bb3 ], [ %a, %entry ] + store i32 %.sink, i32* %to br label %exit exit: ret void diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 78d3ebf371a..9373c5d4421 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false ; dependency) when it isn't dependent on last CPSR defining instruction. ; rdar://8928208