[CodeGenPrepare] use branch weight metadata to decide if a select should be turned into a branch

This is part of solving PR27344: https://llvm.org/bugs/show_bug.cgi?id=27344 CGP should undo the SimplifyCFG transform for the same reason that earlier patches have used this same mechanism: it's possible that passes between SimplifyCFG and CGP may be able to optimize the IR further with a select in place. For the TLI hook default, >99% taken or not taken is chosen as the default threshold for a highly predictable branch. Even the most limited HW branch predictors will be correct on this branch almost all the time, so even a massive mispredict penalty perf loss would be overcome by the win from all the times the branch was predicted correctly. As a follow-up, we could make the default target hook less conservative by using the SchedMachineModel's MispredictPenalty. Or we could just let targets override the default by implementing the hook with that and other target-specific options. Note that trying to statically determine mispredict rates for close-to-balanced profile weight data is generally impossible if the HW is sufficiently advanced. Ie, 50/50 taken/not-taken might still be 100% predictable. Finally, note that this patch as-is will not solve PR27344 because the current __builtin_unpredictable() branch weight default values are 4 and 64. A proposal to change that is in D19435. Differential Revision: http://reviews.llvm.org/D19488 llvm-svn: 267572
2024-10-19 02:52:53 +02:00 · 2016-04-26 17:11:17 +00:00 · 2016-04-26 17:11:17 +00:00 · 7177ce0576
commit 7177ce0576
parent 7fe33892f2
8 changed files with 96 additions and 41 deletions
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@ -217,6 +217,11 @@ public:
  /// Sets the metadata on this instruction from the AAMDNodes structure.
  void setAAMetadata(const AAMDNodes &N);

+  /// Retrieve the raw weight values of a conditional branch or select.
+  /// Returns true on success with profile weights filled in.
+  /// Returns false if no metadata or invalid metadata was found.
+  bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal);
+
  /// Set the debug location information for this instruction.
  void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }

--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@ -2906,11 +2906,6 @@ public:
  /// continues to map correctly to each operand.
  void swapSuccessors();

-  /// Retrieve the raw weight values of a conditional branch.
-  /// Returns true on success with profile weights filled in.
-  /// Returns false if no metadata or invalid metadata was found.
-  bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal);
-
  // Methods for support type inquiry through isa, cast, and dyn_cast:
  static inline bool classof(const Instruction *I) {
    return (I->getOpcode() == Instruction::Br);
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -41,6 +41,7 @@
 #include <vector>

 namespace llvm {
+  class BranchProbability;
  class CallInst;
  class CCState;
  class CCValAssign;
@ -261,6 +262,10 @@ public:
    return PredictableSelectIsExpensive;
  }

+  /// If a branch or a select condition is skewed in one direction by more than
+  /// this factor, it is very likely to be predicted correctly.
+  virtual BranchProbability getPredictableBranchThreshold() const;
+
  /// isLoadBitCastBeneficial() - Return true if the following transform
  /// is beneficial.
  /// fold (conv (load x)) -> (load (conv*)x)
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -40,6 +40,7 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@ -4538,11 +4539,25 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {

 /// Returns true if a SelectInst should be turned into an explicit branch.
 static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+                                                const TargetLowering *TLI,
                                                SelectInst *SI) {
+  // If even a predictable select is cheap, then a branch can't be cheaper.
+  if (!TLI->isPredictableSelectExpensive())
+    return false;
+
  // FIXME: This should use the same heuristics as IfConversion to determine
-  // whether a select is better represented as a branch.  This requires that
-  // branch probability metadata is preserved for the select, which is not the
-  // case currently.
+  // whether a select is better represented as a branch.
+
+  // If metadata tells us that the select condition is obviously predictable,
+  // then we want to replace the select with a branch.
+  uint64_t TrueWeight, FalseWeight;
+  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+    uint64_t Max = std::max(TrueWeight, FalseWeight);
+    uint64_t Sum = TrueWeight + FalseWeight;
+    auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+    if (Probability > TLI->getPredictableBranchThreshold())
+      return true;
+  }

  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());

@ -4580,14 +4595,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
  else
    SelectKind = TargetLowering::ScalarValSelect;

-  // Do we have efficient codegen support for this kind of 'selects' ?
-  if (TLI->isSelectSupported(SelectKind)) {
-    // We have efficient codegen support for the select instruction.
-    // Check if it is profitable to keep this 'select'.
-    if (!TLI->isPredictableSelectExpensive() ||
-        !isFormingBranchFromSelectProfitable(TTI, SI))
-      return false;
-  }
+  if (TLI->isSelectSupported(SelectKind) &&
+      !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+    return false;

  ModifiedDT = true;

--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -28,6 +28,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@ -43,6 +44,17 @@ static cl::opt<bool> JumpIsExpensiveOverride(
    cl::desc("Do not create extra branches to split comparison logic."),
    cl::Hidden);

+// Although this default value is arbitrary, it is not random. It is assumed
+// that a condition that evaluates the same way by a higher percentage than this
+// is best represented as control flow. Therefore, the default value N should be
+// set such that the win from N% correct executions is greater than the loss
+// from (100 - N)% mispredicted executions for the majority of intended targets.
+static cl::opt<int> MinPercentageForPredictableBranch(
+    "min-predictable-branch", cl::init(99),
+    cl::desc("Minimum percentage (0-100) that a condition must be either true "
+             "or false to assume that the condition is predictable"),
+    cl::Hidden);
+
 /// InitLibcallNames - Set default libcall names.
 ///
 static void InitLibcallNames(const char **Names, const Triple &TT) {
@ -1625,6 +1637,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
 }

+BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
+  return BranchProbability(MinPercentageForPredictableBranch, 100);
+}

 //===----------------------------------------------------------------------===//
 //  TargetTransformInfo Helpers
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@ -1120,28 +1120,6 @@ void BranchInst::swapSuccessors() {
              MDNode::get(ProfileData->getContext(), Ops));
 }

-bool BranchInst::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) {
-  assert(isConditional() &&
-         "Looking for probabilities on unconditional branch?");
-  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (!ProfileData || ProfileData->getNumOperands() != 3)
-    return false;
-
-  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
-  if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
-    return false;
-
-  auto *CITrue = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
-  auto *CIFalse = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
-  if (!CITrue || !CIFalse)
-    return false;
-
-  TrueVal = CITrue->getValue().getZExtValue();
-  FalseVal = CIFalse->getValue().getZExtValue();
-
-  return true;
-}
-
 BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
  return getSuccessor(idx);
 }
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@ -1251,6 +1251,30 @@ void Instruction::getAllMetadataOtherThanDebugLocImpl(
  Info.getAll(Result);
 }

+bool Instruction::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) {
+  assert((getOpcode() == Instruction::Br ||
+          getOpcode() == Instruction::Select) &&
+         "Looking for branch weights on something besides branch or select");
+
+  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
+  if (!ProfileData || ProfileData->getNumOperands() != 3)
+    return false;
+
+  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+  if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
+    return false;
+
+  auto *CITrue = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
+  auto *CIFalse = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
+  if (!CITrue || !CIFalse)
+    return false;
+
+  TrueVal = CITrue->getValue().getZExtValue();
+  FalseVal = CIFalse->getValue().getZExtValue();
+
+  return true;
+}
+
 void Instruction::clearMetadataHashEntries() {
  assert(hasMetadataHashEntry() && "Caller should check");
  getContext().pImpl->InstructionMetadata.erase(this);
--- a/test/CodeGen/X86/cmov-into-branch.ll
+++ b/test/CodeGen/X86/cmov-into-branch.ll
@ -79,13 +79,15 @@ define i32 @weighted_select1(i32 %a, i32 %b) {
  ret i32 %sel
 }

-; TODO: If a select is obviously predictable, turn it into a branch.
+; If a select is obviously predictable, turn it into a branch.
 define i32 @weighted_select2(i32 %a, i32 %b) {
 ; CHECK-LABEL: weighted_select2:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovnel %edi, %esi
-; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    jne [[LABEL_BB5:.*]]
+; CHECK:         movl %esi, %edi
+; CHECK-NEXT:  [[LABEL_BB5]]
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
 ;
  %cmp = icmp ne i32 %a, 0
@ -93,6 +95,27 @@ define i32 @weighted_select2(i32 %a, i32 %b) {
  ret i32 %sel
 }

+; Note the reversed profile weights: it doesn't matter if it's
+; obviously true or obviously false.
+; Either one should become a branch rather than conditional move.
+; TODO: But likely true vs. likely false should affect basic block placement?
+define i32 @weighted_select3(i32 %a, i32 %b) {
+; CHECK-LABEL: weighted_select3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne [[LABEL_BB6:.*]]
+; CHECK:         movl %esi, %edi
+; CHECK-NEXT:  [[LABEL_BB6]]
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    retq
+;
+  %cmp = icmp ne i32 %a, 0
+  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
+  ret i32 %sel
+}
+
+
 !0 = !{!"branch_weights", i32 1, i32 99}
 !1 = !{!"branch_weights", i32 1, i32 100}
+!2 = !{!"branch_weights", i32 100, i32 1}