[WC] Fix a subtle bug in our definition of widenable branch

We had a subtle, but nasty bug in our definition of a widenable branch, and thus in the transforms which used that utility. Specifically, we returned true for any branch which included a widenable condition within it's condition, regardless of whether that widenable condition also had other uses. The problem is that the result of the WC() call is defined to be one particular value. As such, all users must agree as to what that value is. If we widen a branch without also updating *all other users* of the WC in the same way, we have broken the required semantics. Most of the textual diff is updating existing transforms not to leave dead uses hanging around. They're largely NFC as the dead instructions would be immediately deleted by other passes. The reason to make these changes is so that the transforms preserve the widenable branch form. In practice, we don't get bitten by this only because it isn't profitable to CSE WC() calls and the lowering pass from guards uses distinct WC calls per branch. Differential Revision: https://reviews.llvm.org/D69916
2024-10-19 02:52:53 +02:00 · 2019-11-06 14:05:59 -08:00 · 2019-11-06 14:05:59 -08:00 · e77f587e1a
commit e77f587e1a
parent 0ed7a94cab
6 changed files with 78 additions and 23 deletions
--- a/lib/Analysis/GuardUtils.cpp
+++ b/lib/Analysis/GuardUtils.cpp
@ -42,6 +42,11 @@ bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
  if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
                     IfTrueBB, IfFalseBB)))
    return false;
+  // For the branch to be (easily) widenable, it must not correlate with other
+  // branches.  Thus, the widenable condition must have a single use.
+  if (!WidenableCondition->hasOneUse() ||
+      !cast<BranchInst>(U)->getCondition()->hasOneUse())
+    return false;
  // TODO: At the moment, we only recognize the branch if the WC call in this
  // specific position.  We should generalize!
  return match(WidenableCondition,
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@ -271,16 +271,17 @@ class GuardWideningImpl {
  void widenGuard(Instruction *ToWiden, Value *NewCondition,
                  bool InvertCondition) {
    Value *Result;
+    
    widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
                    InvertCondition);
-    Value *WidenableCondition = nullptr;
    if (isGuardAsWidenableBranch(ToWiden)) {
-      auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
-      WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+      auto *BI = cast<BranchInst>(ToWiden);
+      auto *And = cast<Instruction>(BI->getCondition());
+      And->setOperand(0, Result);
+      And->moveBefore(ToWiden);
+      assert(isGuardAsWidenableBranch(ToWiden) && "still widenable?");
+      return;
    }
-    if (WidenableCondition)
-      Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
-                                         "guard.chk", ToWiden);
    setCondition(ToWiden, Result);
  }

--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@ -823,9 +823,9 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
  Value *AllChecks = Builder.CreateAnd(Checks);
  auto *OldCond = BI->getCondition();
  BI->setCondition(AllChecks);
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  assert(isGuardAsWidenableBranch(BI) &&
         "Stopped being a guard after transform?");
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);

  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
  return true;
--- a/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
+++ b/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
@ -8,8 +8,7 @@ define void @f_0(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@ -52,8 +51,7 @@ define void @f_1(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@ -113,7 +111,6 @@ define void @f_2(i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@ -235,7 +232,6 @@ define void @f_4(i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@ -294,7 +290,6 @@ define void @f_5(i32 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ugt i32 [[A:%.*]], 7
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = icmp uge i32 [[A]], 11
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
@ -403,7 +398,6 @@ define void @f_7(i32 %a, i1* %cond_buf) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_1:%.*]] = load volatile i1, i1* [[COND_BUF:%.*]]
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_3]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@ -491,9 +485,8 @@ define void @f_8(i32 %a, i1 %cond_1, i1 %cond_2) {
 ; CHECK-NEXT:    br i1 undef, label [[LOOP]], label [[LEAVE:%.*]]
 ; CHECK:       leave:
 ; CHECK-NEXT:    [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_2:%.*]], [[WIDENABLE_COND3]]
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_2]], [[COND_3]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_2:%.*]], [[COND_3]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND3]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0
 ; CHECK:       deopt2:
@ -671,8 +664,7 @@ define void @f_11(i32 %a, i1 %cond_0, i1 %cond_1) {
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer_header:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@ -734,7 +726,6 @@ define void @f_12(i32 %a0) {
 ; CHECK-LABEL: @f_12(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 true, [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A0:%.*]], [[A0]]
 ; CHECK-NEXT:    [[A2:%.*]] = mul i32 [[A1]], [[A1]]
 ; CHECK-NEXT:    [[A3:%.*]] = mul i32 [[A2]], [[A2]]
@ -840,7 +831,6 @@ define void @f_13(i32 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 14
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = icmp ult i32 [[A]], 10
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
--- a/test/Transforms/GuardWidening/mixed_guards.ll
+++ b/test/Transforms/GuardWidening/mixed_guards.ll
@ -45,8 +45,7 @@ define void @test_02(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
--- a/test/Transforms/SimplifyCFG/wc-widen-block.ll
+++ b/test/Transforms/SimplifyCFG/wc-widen-block.ll
@ -305,6 +305,66 @@ deopt2:
  ret i32 %deoptret2
 }

+; This one is subtle - We can't widen only one branch use of the
+; widenable condition as two branches are correlated.  We'd have to
+; widen them *both*.
+define i32 @neg_correlated(i1 %cond_0, i1 %cond_1, i32* %p) {
+; CHECK-LABEL: @neg_correlated(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND2:%.*]] = and i1 [[COND_1:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND2]], label [[GUARDED2:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+; CHECK:       guarded2:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]]
+; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[COND_2]], label [[RETURN:%.*]], label [[DEOPT3:%.*]], !prof !0
+; CHECK:       deopt3:
+; CHECK-NEXT:    [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET3]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  %exiplicit_guard_cond2 = and i1 %cond_1, %widenable_cond
+  br i1 %exiplicit_guard_cond2, label %guarded2, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+guarded2:
+  %v = load i32, i32* %p
+  %cond_2 = icmp eq i32 %v, 0
+  br i1 %cond_2, label %return, label %deopt3, !prof !0
+
+deopt3:
+  %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret3
+
+return:
+  ret i32 0
+}
+
+
+

 declare void @unknown()
 declare i32 @unknown_i32()