mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[SimplifyCFG] use profile metadata to refine merging branch conditions
2nd try (original: 27ae17a6b014) with fix/test for crash. We must make sure that TTI is available before trying to use it because it is not required (might be another bug). Original commit message: This is one step towards solving: https://llvm.org/PR49336 In that example, we disregard the recommended usage of builtin_expect, so an expensive (unpredictable) branch is folded into another branch that is guarding it. Here, we read the profile metadata to see if the 1st (predecessor) condition is likely to cause execution to bypass the 2nd (successor) condition before merging conditions by using logic ops. Differential Revision: https://reviews.llvm.org/D98898
This commit is contained in:
parent
66720927f3
commit
1e54216c88
@ -63,6 +63,7 @@
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Support/BranchProbability.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -2840,31 +2841,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if the two branches share a common destination,
|
||||
// and deduce a glue that we need to use to join branch's conditions
|
||||
// to arrive at the common destination.
|
||||
/// Determine if the two branches share a common destination and deduce a glue
|
||||
/// that joins the branches' conditions to arrive at the common destination if
|
||||
/// that would be profitable.
|
||||
static Optional<std::pair<Instruction::BinaryOps, bool>>
|
||||
CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
|
||||
shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
|
||||
const TargetTransformInfo *TTI) {
|
||||
assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
|
||||
"Both blocks must end with a conditional branches.");
|
||||
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
|
||||
"PredBB must be a predecessor of BB.");
|
||||
|
||||
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
|
||||
return {{Instruction::Or, false}};
|
||||
else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
|
||||
return {{Instruction::And, false}};
|
||||
else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
|
||||
return {{Instruction::And, true}};
|
||||
else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
|
||||
return {{Instruction::Or, true}};
|
||||
// We have the potential to fold the conditions together, but if the
|
||||
// predecessor branch is predictable, we may not want to merge them.
|
||||
uint64_t PTWeight, PFWeight;
|
||||
BranchProbability PBITrueProb, Likely;
|
||||
if (TTI && PBI->extractProfMetadata(PTWeight, PFWeight) &&
|
||||
(PTWeight + PFWeight) != 0) {
|
||||
PBITrueProb =
|
||||
BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
|
||||
Likely = TTI->getPredictableBranchThreshold();
|
||||
}
|
||||
|
||||
if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
|
||||
// Speculate the 2nd condition unless the 1st is probably true.
|
||||
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
|
||||
return {{Instruction::Or, false}};
|
||||
} else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
|
||||
// Speculate the 2nd condition unless the 1st is probably false.
|
||||
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
|
||||
return {{Instruction::And, false}};
|
||||
} else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
|
||||
// Speculate the 2nd condition unless the 1st is probably true.
|
||||
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
|
||||
return {{Instruction::And, true}};
|
||||
} else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
|
||||
// Speculate the 2nd condition unless the 1st is probably false.
|
||||
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
|
||||
return {{Instruction::Or, true}};
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
|
||||
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
|
||||
DomTreeUpdater *DTU,
|
||||
MemorySSAUpdater *MSSAU,
|
||||
bool PoisonSafe) {
|
||||
bool PoisonSafe,
|
||||
const TargetTransformInfo *TTI) {
|
||||
BasicBlock *BB = BI->getParent();
|
||||
BasicBlock *PredBlock = PBI->getParent();
|
||||
|
||||
@ -2872,7 +2895,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
|
||||
Instruction::BinaryOps Opc;
|
||||
bool InvertPredCond;
|
||||
std::tie(Opc, InvertPredCond) =
|
||||
*CheckIfCondBranchesShareCommonDestination(BI, PBI);
|
||||
*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
|
||||
|
||||
@ -3070,8 +3093,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
|
||||
// Determine if the two branches share a common destination.
|
||||
Instruction::BinaryOps Opc;
|
||||
bool InvertPredCond;
|
||||
if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
|
||||
std::tie(Opc, InvertPredCond) = *Recepie;
|
||||
if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
|
||||
std::tie(Opc, InvertPredCond) = *Recipe;
|
||||
else
|
||||
continue;
|
||||
|
||||
@ -3088,7 +3111,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
|
||||
continue;
|
||||
}
|
||||
|
||||
return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe);
|
||||
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe,
|
||||
TTI);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s
|
||||
|
||||
; Loopsimplify should be able to merge the two loop exits
|
||||
@ -7,42 +8,143 @@
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: bb:
|
||||
; CHECK: phi i64
|
||||
; CHECK-NOT: phi i64
|
||||
; CHECK-NOT: sext
|
||||
|
||||
define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
|
||||
; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
|
||||
; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0
|
||||
; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]]
|
||||
; CHECK: bb.lr.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64
|
||||
; CHECK-NEXT: br label [[BB:%.*]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4
|
||||
; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4
|
||||
; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
|
||||
; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
|
||||
; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]]
|
||||
; CHECK: bb1.bb3_crit_edge:
|
||||
; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
|
||||
; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
|
||||
; CHECK-NEXT: ret float [[T13]]
|
||||
;
|
||||
entry:
|
||||
%t0 = load float, float* %peakWeight, align 4
|
||||
br label %bb1
|
||||
%t0 = load float, float* %peakWeight, align 4
|
||||
br label %bb1
|
||||
|
||||
bb: ; preds = %bb2
|
||||
%t1 = sext i32 %hiPart.0 to i64
|
||||
%t2 = getelementptr float, float* %pTmp1, i64 %t1
|
||||
%t3 = load float, float* %t2, align 4
|
||||
%t4 = fadd float %t3, %distERBhi.0
|
||||
%t5 = add i32 %hiPart.0, 1
|
||||
%t6 = sext i32 %t5 to i64
|
||||
%t7 = getelementptr float, float* %peakWeight, i64 %t6
|
||||
%t8 = load float, float* %t7, align 4
|
||||
%t9 = fadd float %t8, %peakCount.0
|
||||
br label %bb1
|
||||
%t1 = sext i32 %hiPart.0 to i64
|
||||
%t2 = getelementptr float, float* %pTmp1, i64 %t1
|
||||
%t3 = load float, float* %t2, align 4
|
||||
%t4 = fadd float %t3, %distERBhi.0
|
||||
%t5 = add i32 %hiPart.0, 1
|
||||
%t6 = sext i32 %t5 to i64
|
||||
%t7 = getelementptr float, float* %peakWeight, i64 %t6
|
||||
%t8 = load float, float* %t7, align 4
|
||||
%t9 = fadd float %t8, %peakCount.0
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb, %entry
|
||||
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
|
||||
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
|
||||
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
|
||||
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
|
||||
br i1 %t10, label %bb3, label %bb2
|
||||
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
|
||||
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
|
||||
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
|
||||
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
|
||||
br i1 %t10, label %bb3, label %bb2
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%t11 = add i32 %bandEdgeIndex, -1
|
||||
%t12 = icmp sgt i32 %t11, %hiPart.0
|
||||
br i1 %t12, label %bb, label %bb3
|
||||
%t11 = add i32 %bandEdgeIndex, -1
|
||||
%t12 = icmp sgt i32 %t11, %hiPart.0
|
||||
br i1 %t12, label %bb, label %bb3
|
||||
|
||||
bb3: ; preds = %bb2, %bb1
|
||||
%t13 = fdiv float %peakCount.0, %distERBhi.0
|
||||
ret float %t13
|
||||
%t13 = fdiv float %peakCount.0, %distERBhi.0
|
||||
ret float %t13
|
||||
}
|
||||
|
||||
; Same test as above.
|
||||
; This would crash because we assumed TTI was available to process the metadata.
|
||||
|
||||
define float @merge_branches_profile_metadata(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
|
||||
; CHECK-LABEL: @merge_branches_profile_metadata(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
|
||||
; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
|
||||
; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0
|
||||
; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0
|
||||
; CHECK: bb.lr.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64
|
||||
; CHECK-NEXT: br label [[BB:%.*]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
|
||||
; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4
|
||||
; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4
|
||||
; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
|
||||
; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
|
||||
; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !0
|
||||
; CHECK: bb1.bb3_crit_edge:
|
||||
; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
|
||||
; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
|
||||
; CHECK-NEXT: ret float [[T13]]
|
||||
;
|
||||
entry:
|
||||
%t0 = load float, float* %peakWeight, align 4
|
||||
br label %bb1
|
||||
|
||||
bb: ; preds = %bb2
|
||||
%t1 = sext i32 %hiPart.0 to i64
|
||||
%t2 = getelementptr float, float* %pTmp1, i64 %t1
|
||||
%t3 = load float, float* %t2, align 4
|
||||
%t4 = fadd float %t3, %distERBhi.0
|
||||
%t5 = add i32 %hiPart.0, 1
|
||||
%t6 = sext i32 %t5 to i64
|
||||
%t7 = getelementptr float, float* %peakWeight, i64 %t6
|
||||
%t8 = load float, float* %t7, align 4
|
||||
%t9 = fadd float %t8, %peakCount.0
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb, %entry
|
||||
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
|
||||
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
|
||||
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
|
||||
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
|
||||
br i1 %t10, label %bb3, label %bb2, !prof !0
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%t11 = add i32 %bandEdgeIndex, -1
|
||||
%t12 = icmp sgt i32 %t11, %hiPart.0
|
||||
br i1 %t12, label %bb, label %bb3
|
||||
|
||||
bb3: ; preds = %bb2, %bb1
|
||||
%t13 = fdiv float %peakCount.0, %distERBhi.0
|
||||
ret float %t13
|
||||
}
|
||||
|
||||
!0 = !{!"branch_weights", i32 2000, i32 1}
|
||||
|
@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
|
||||
; CHECK-LABEL: @test_chr_14(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
||||
; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
|
||||
; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1
|
||||
; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
||||
; CHECK: entry.split.nonchr:
|
||||
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
|
||||
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[V1]], i1 [[V3_NONCHR]], i1 false
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
|
||||
; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16
|
||||
; CHECK: bb0.nonchr:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB1]]
|
||||
@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
|
||||
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
|
||||
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
|
||||
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
|
||||
; CHECK-NEXT: ], !prof !20
|
||||
; CHECK-NEXT: ], !prof !19
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: call void @foo()
|
||||
@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 {
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20
|
||||
; CHECK: bb0:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB1]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
@ -2550,4 +2551,3 @@ bb3:
|
||||
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
|
||||
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
|
||||
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
|
||||
; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}
|
||||
|
@ -636,16 +636,17 @@ exit:
|
||||
ret i32 %outval
|
||||
}
|
||||
|
||||
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; We can't tell which condition is expensive if they are combined.
|
||||
|
||||
define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_harmful(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
|
||||
; CHECK: rare:
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
|
||||
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -668,16 +669,17 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; We can't tell which condition is expensive if they are combined.
|
||||
|
||||
define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_harmful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
|
||||
; CHECK: rare:
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
|
||||
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -700,7 +702,8 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The probability threshold is set by a builtin_expect setting.
|
||||
; The probability threshold is determined by a TTI setting.
|
||||
; In this example, we are just short of strongly expected, so speculate.
|
||||
|
||||
define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_not_that_harmful(
|
||||
@ -708,7 +711,7 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -731,13 +734,16 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The probability threshold is determined by a TTI setting.
|
||||
; In this example, we are just short of strongly expected, so speculate.
|
||||
|
||||
define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_not_that_harmful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -760,13 +766,15 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
|
||||
|
||||
define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_useful(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -789,13 +797,15 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
|
||||
|
||||
define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @or_icmps_useful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -849,16 +859,17 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; We can't tell which condition is expensive if they are combined.
|
||||
|
||||
define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_harmful(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
|
||||
; CHECK: rare:
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
|
||||
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -881,16 +892,17 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; Merging the icmps with logic-op defeats the purpose of the metadata.
|
||||
; We can't tell which condition is expensive if they are combined.
|
||||
|
||||
define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_harmful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
|
||||
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
|
||||
; CHECK: rare:
|
||||
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
|
||||
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
|
||||
; CHECK: false:
|
||||
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
@ -913,6 +925,9 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The probability threshold is determined by a TTI setting.
|
||||
; In this example, we are just short of strongly expected, so speculate.
|
||||
|
||||
define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_not_that_harmful(
|
||||
; CHECK-NEXT: entry:
|
||||
@ -942,6 +957,9 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The probability threshold is determined by a TTI setting.
|
||||
; In this example, we are just short of strongly expected, so speculate.
|
||||
|
||||
define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_not_that_harmful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
@ -971,6 +989,8 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
|
||||
|
||||
define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_useful(
|
||||
; CHECK-NEXT: entry:
|
||||
@ -1000,6 +1020,8 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
|
||||
|
||||
define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @and_icmps_useful_inverted(
|
||||
; CHECK-NEXT: entry:
|
||||
|
Loading…
Reference in New Issue
Block a user