diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b529637e44c..6190367c649 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -63,6 +63,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2840,31 +2841,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, } } -// Determine if the two branches share a common destination, -// and deduce a glue that we need to use to join branch's conditions -// to arrive at the common destination. +/// Determine if the two branches share a common destination and deduce a glue +/// that joins the branches' conditions to arrive at the common destination if +/// that would be profitable. static Optional> -CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { +shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, + const TargetTransformInfo *TTI) { assert(BI && PBI && BI->isConditional() && PBI->isConditional() && "Both blocks must end with a conditional branches."); assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && "PredBB must be a predecessor of BB."); - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - return {{Instruction::Or, false}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - return {{Instruction::And, false}}; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - return {{Instruction::And, true}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - return {{Instruction::Or, true}}; + // We have the potential to fold the conditions together, but if the + // predecessor branch is predictable, we may not want to merge them. + uint64_t PTWeight, PFWeight; + BranchProbability PBITrueProb, Likely; + if (TTI && PBI->extractProfMetadata(PTWeight, PFWeight) && + (PTWeight + PFWeight) != 0) { + PBITrueProb = + BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight); + Likely = TTI->getPredictableBranchThreshold(); + } + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::Or, false}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::And, false}}; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::And, true}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::Or, true}}; + } return None; } -static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, +static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, - bool PoisonSafe) { + bool PoisonSafe, + const TargetTransformInfo *TTI) { BasicBlock *BB = BI->getParent(); BasicBlock *PredBlock = PBI->getParent(); @@ -2872,7 +2895,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, Instruction::BinaryOps Opc; bool InvertPredCond; std::tie(Opc, InvertPredCond) = - *CheckIfCondBranchesShareCommonDestination(BI, PBI); + *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -3070,8 +3093,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; - if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) - std::tie(Opc, InvertPredCond) = *Recepie; + if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI)) + std::tie(Opc, InvertPredCond) = *Recipe; else continue; @@ -3088,7 +3111,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, continue; } - return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe); + return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe, + TTI); } return Changed; } diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll index 5cdf8148778..4efc7e96304 100644 --- a/test/Transforms/LoopSimplify/merge-exits.ll +++ b/test/Transforms/LoopSimplify/merge-exits.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s ; Loopsimplify should be able to merge the two loop exits @@ -7,42 +8,143 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64" -; CHECK-LABEL: @test1 -; CHECK: bb: -; CHECK: phi i64 -; CHECK-NOT: phi i64 -; CHECK-NOT: sext - define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4 +; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1 +; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0 +; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]] +; CHECK: bb.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64 +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ] +; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ] +; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ] +; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4 +; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4 +; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]] +; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00 +; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]] +; CHECK: bb1.bb3_crit_edge: +; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ] +; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]] +; CHECK-NEXT: ret float [[T13]] +; entry: - %t0 = load float, float* %peakWeight, align 4 - br label %bb1 + %t0 = load float, float* %peakWeight, align 4 + br label %bb1 bb: ; preds = %bb2 - %t1 = sext i32 %hiPart.0 to i64 - %t2 = getelementptr float, float* %pTmp1, i64 %t1 - %t3 = load float, float* %t2, align 4 - %t4 = fadd float %t3, %distERBhi.0 - %t5 = add i32 %hiPart.0, 1 - %t6 = sext i32 %t5 to i64 - %t7 = getelementptr float, float* %peakWeight, i64 %t6 - %t8 = load float, float* %t7, align 4 - %t9 = fadd float %t8, %peakCount.0 - br label %bb1 + %t1 = sext i32 %hiPart.0 to i64 + %t2 = getelementptr float, float* %pTmp1, i64 %t1 + %t3 = load float, float* %t2, align 4 + %t4 = fadd float %t3, %distERBhi.0 + %t5 = add i32 %hiPart.0, 1 + %t6 = sext i32 %t5 to i64 + %t7 = getelementptr float, float* %peakWeight, i64 %t6 + %t8 = load float, float* %t7, align 4 + %t9 = fadd float %t8, %peakCount.0 + br label %bb1 bb1: ; preds = %bb, %entry - %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] - %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] - %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] - %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 - br i1 %t10, label %bb3, label %bb2 + %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] + %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] + %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] + %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 + br i1 %t10, label %bb3, label %bb2 bb2: ; preds = %bb1 - %t11 = add i32 %bandEdgeIndex, -1 - %t12 = icmp sgt i32 %t11, %hiPart.0 - br i1 %t12, label %bb, label %bb3 + %t11 = add i32 %bandEdgeIndex, -1 + %t12 = icmp sgt i32 %t11, %hiPart.0 + br i1 %t12, label %bb, label %bb3 bb3: ; preds = %bb2, %bb1 - %t13 = fdiv float %peakCount.0, %distERBhi.0 - ret float %t13 + %t13 = fdiv float %peakCount.0, %distERBhi.0 + ret float %t13 } + +; Same test as above. +; This would crash because we assumed TTI was available to process the metadata. + +define float @merge_branches_profile_metadata(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind { +; CHECK-LABEL: @merge_branches_profile_metadata( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4 +; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1 +; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0 +; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0 +; CHECK: bb.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64 +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ] +; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ] +; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ] +; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4 +; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4 +; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]] +; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00 +; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !0 +; CHECK: bb1.bb3_crit_edge: +; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ] +; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]] +; CHECK-NEXT: ret float [[T13]] +; +entry: + %t0 = load float, float* %peakWeight, align 4 + br label %bb1 + +bb: ; preds = %bb2 + %t1 = sext i32 %hiPart.0 to i64 + %t2 = getelementptr float, float* %pTmp1, i64 %t1 + %t3 = load float, float* %t2, align 4 + %t4 = fadd float %t3, %distERBhi.0 + %t5 = add i32 %hiPart.0, 1 + %t6 = sext i32 %t5 to i64 + %t7 = getelementptr float, float* %peakWeight, i64 %t6 + %t8 = load float, float* %t7, align 4 + %t9 = fadd float %t8, %peakCount.0 + br label %bb1 + +bb1: ; preds = %bb, %entry + %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] + %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] + %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] + %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 + br i1 %t10, label %bb3, label %bb2, !prof !0 + +bb2: ; preds = %bb1 + %t11 = add i32 %bandEdgeIndex, -1 + %t12 = icmp sgt i32 %t11, %hiPart.0 + br i1 %t12, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb1 + %t13 = fdiv float %peakCount.0, %distERBhi.0 + ret float %t13 +} + +!0 = !{!"branch_weights", i32 2000, i32 1} diff --git a/test/Transforms/PGOProfile/chr.ll b/test/Transforms/PGOProfile/chr.ll index ff3a6b1b5e4..ddf4811a036 100644 --- a/test/Transforms/PGOProfile/chr.ll +++ b/test/Transforms/PGOProfile/chr.ll @@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 ; CHECK-LABEL: @test_chr_14( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1 +; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1 +; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: entry.split.nonchr: ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[V1]], i1 [[V3_NONCHR]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16 ; CHECK: bb0.nonchr: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] @@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 { ; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [ ; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]] ; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]] -; CHECK-NEXT: ], !prof !20 +; CHECK-NEXT: ], !prof !19 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo() @@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] @@ -2550,4 +2551,3 @@ bb3: ; CHECK: !16 = !{!"branch_weights", i32 0, i32 1} ; CHECK: !17 = !{!"branch_weights", i32 1, i32 1} ; CHECK: !18 = !{!"branch_weights", i32 1, i32 0} -; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000} diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll index 3d5fe94ade4..38d82bb9448 100644 --- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -636,16 +636,17 @@ exit: ret i32 %outval } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -668,16 +669,17 @@ exit: ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -700,7 +702,8 @@ exit: ret void } -; The probability threshold is set by a builtin_expect setting. +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful( @@ -708,7 +711,7 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -731,13 +734,16 @@ exit: ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -760,13 +766,15 @@ exit: ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -789,13 +797,15 @@ exit: ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -849,16 +859,17 @@ exit: ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -881,16 +892,17 @@ exit: ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -913,6 +925,9 @@ exit: ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful( ; CHECK-NEXT: entry: @@ -942,6 +957,9 @@ exit: ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: @@ -971,6 +989,8 @@ exit: ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful( ; CHECK-NEXT: entry: @@ -1000,6 +1020,8 @@ exit: ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful_inverted( ; CHECK-NEXT: entry: