diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 355410ad045..f0ed075cce6 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2539,6 +2539,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); + const unsigned PredCount = pred_size(BB); + Instruction *Cond = nullptr; if (BI->isConditional()) Cond = dyn_cast(BI->getCondition()); @@ -2588,7 +2590,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // too many instructions and these involved instructions can be executed // unconditionally. We denote all involved instructions except the condition // as "bonus instructions", and only allow this transformation when the - // number of the bonus instructions does not exceed a certain threshold. + // number of the bonus instructions we'll need to create when cloning into + // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; for (auto I = BB->begin(); Cond != &*I; ++I) { // Ignore dbg intrinsics. @@ -2603,7 +2606,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // I is used in the same BB. Since BI uses Cond and doesn't have more slots // to use any other instruction, User must be an instruction between next(I) // and Cond. - ++NumBonusInsts; + + // Account for the cost of duplicating this instruction into each + // predecessor. + NumBonusInsts += PredCount; // Early exits once we reach the limit. if (NumBonusInsts > BonusInstThreshold) return false; diff --git a/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll index fad5fce1d4b..7b564955856 100644 --- a/test/Transforms/SimplifyCFG/branch-fold-threshold.ll +++ b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll @@ -1,5 +1,6 @@ ; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL ; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE +; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) { ; NORMAL-LABEL: @foo( @@ -26,3 +27,54 @@ cond.end: %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ] ret i32 %cond } + +declare void @distinct_a(); +declare void @distinct_b(); + +;; Like foo, but have to duplicate into multiple predecessors +define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) { +; NORMAL-LABEL: @bar( +; AGGRESSIVE-LABEL: @bar( +entry: + %cmp_split = icmp slt i32 %d, %b + %cmp = icmp sgt i32 %d, 3 + br i1 %cmp_split, label %pred_a, label %pred_b +pred_a: +; NORMAL-LABEL: pred_a: +; AGGRESSIVE-LABEL: pred_a: +; WAYAGGRESSIVE-LABEL: pred_a: +; NORMAL: br i1 +; AGGRESSIVE: br i1 +; WAYAGGRESSIVE: br i1 + call void @distinct_a(); + br i1 %cmp, label %cond.end, label %lor.lhs.false +pred_b: +; NORMAL-LABEL: pred_b: +; AGGRESSIVE-LABEL: pred_b: +; WAYAGGRESSIVE-LABEL: pred_b: +; NORMAL: br i1 +; AGGRESSIVE: br i1 +; WAYAGGRESSIVE: br i1 + call void @distinct_b(); + br i1 %cmp, label %cond.end, label %lor.lhs.false + +lor.lhs.false: + %mul = shl i32 %c, 1 + %add = add nsw i32 %mul, %a + %cmp1 = icmp slt i32 %add, %b + br i1 %cmp1, label %cond.false, label %cond.end +; NORMAL-LABEL: lor.lhs.false: +; AGGRESIVE-LABEL: lor.lhs.false: +; WAYAGGRESIVE-LABEL: lor.lhs.false: +; NORMAL: br i1 +; AGGRESSIVE: br i1 +; WAYAGGRESSIVE-NOT: br i1 + +cond.false: + %0 = load i32, i32* %input, align 4 + br label %cond.end + +cond.end: + %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ],[ 0, %pred_a ],[ 0, %pred_b ] + ret i32 %cond +}