[BranchFolding] Fix PR43964 about branch folder not being debug invariant

Summary: The fix in BranchFolder related to non debug invariant problems done in commit ec32dff0b075055 actually introduced some new problems with debug invariance. Before that patch ComputeCommonTailLength would move iterators back, past debug instructions, in order to make ProfitableToMerge make consistent answers "when one block differs from the other only by whether debugging pseudos are present at the beginning". But the changes in ec32dff0b075055 undid that by moving the iterators forward again. This patch refactors ComputeCommonTailLength. The function was really complex, considering that the SkipTopCFIAndReturn part always moved the iterators forward to the first "real" instruction in the found tail after ec32dff0b075055. The patch also restores the logic to "back past possible debugging pseudos at beginning of block" to make sure ProfitableToMerge gives consistent answers independent of DBG_VALUE instructions before the tail. That is now done by ProfitableToMerge instead of being hidden as a side-effect in ComputeCommonTailLength. Reviewers: probinson, yechunliang, jmorse Reviewed By: jmorse Subscribers: Orlando, mehdi_amini, dexonsmith, aprantl, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70091
2024-11-22 18:54:02 +01:00 · 2019-11-11 18:20:40 +01:00 · 2019-11-11 18:20:40 +01:00 · 420f00e2a9
commit 420f00e2a9
parent 402e6db3f5
2 changed files with 183 additions and 95 deletions
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@ -302,114 +302,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
  return HashMachineInstr(*I);
 }

-///  Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
 static bool countsAsInstruction(const MachineInstr &MI) {
  return !(MI.isDebugInstr() || MI.isCFIInstruction());
 }

-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end.  Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+                                MachineBasicBlock *MBB) {
+  while (I != MBB->begin()) {
+    --I;
+    if (countsAsInstruction(*I))
+      return I;
+  }
+  return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
 static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
                                        MachineBasicBlock *MBB2,
                                        MachineBasicBlock::iterator &I1,
                                        MachineBasicBlock::iterator &I2) {
-  I1 = MBB1->end();
-  I2 = MBB2->end();
+  MachineBasicBlock::iterator MBBI1 = MBB1->end();
+  MachineBasicBlock::iterator MBBI2 = MBB2->end();

  unsigned TailLen = 0;
-  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
-    --I1; --I2;
-    // Skip debugging pseudos; necessary to avoid changing the code.
-    while (!countsAsInstruction(*I1)) {
-      if (I1==MBB1->begin()) {
-        while (!countsAsInstruction(*I2)) {
-          if (I2==MBB2->begin()) {
-            // I1==DBG at begin; I2==DBG at begin
-            goto SkipTopCFIAndReturn;
-          }
-          --I2;
-        }
-        ++I2;
-        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I1;
-    }
-    // I1==first (untested) non-DBG preceding known match
-    while (!countsAsInstruction(*I2)) {
-      if (I2==MBB2->begin()) {
-        ++I1;
-        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I2;
-    }
-    // I1, I2==first (untested) non-DBGs preceding known match
-    if (!I1->isIdenticalTo(*I2) ||
+  while (true) {
+    MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+    MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+    if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+      break;
+    if (!MBBI1->isIdenticalTo(*MBBI2) ||
        // FIXME: This check is dubious. It's used to get around a problem where
        // people incorrectly expect inline asm directives to remain in the same
        // relative order. This is untenable because normal compiler
        // optimizations (like this one) may reorder and/or merge these
        // directives.
-        I1->isInlineAsm()) {
-      ++I1; ++I2;
+        MBBI1->isInlineAsm()) {
      break;
    }
    ++TailLen;
-  }
-  // Back past possible debugging pseudos at beginning of block.  This matters
-  // when one block differs from the other only by whether debugging pseudos
-  // are present at the beginning. (This way, the various checks later for
-  // I1==MBB1->begin() work as expected.)
-  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
-    --I2;
-    while (I2->isDebugInstr()) {
-      if (I2 == MBB2->begin())
-        return TailLen;
-      --I2;
-    }
-    ++I2;
-  }
-  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
-    --I1;
-    while (I1->isDebugInstr()) {
-      if (I1 == MBB1->begin())
-        return TailLen;
-      --I1;
-    }
-    ++I1;
-  }
-
-SkipTopCFIAndReturn:
-  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
-  // I1 and I2 are non-identical when compared and then one or both of them ends
-  // up pointing to a CFI instruction after being incremented. For example:
-  /*
-    BB1:
-    ...
-    INSTRUCTION_A
-    ADD32ri8  <- last common instruction
-    ...
-    BB2:
-    ...
-    INSTRUCTION_B
-    CFI_INSTRUCTION
-    ADD32ri8  <- last common instruction
-    ...
-  */
-  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
-  // incrementing the iterators, I1 will point to ADD, however I2 will point to
-  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
-  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
-  // instruction.
-  // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code.
-  while (I1 != MBB1->end() && !countsAsInstruction(*I1)) {
-    ++I1;
-  }
-
-  while (I2 != MBB2->end() && !countsAsInstruction(*I2)) {
-    ++I2;
+    I1 = MBBI1;
+    I2 = MBBI2;
  }

  return TailLen;
@ -661,6 +603,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
                    << " and " << printMBBReference(*MBB2) << " is "
                    << CommonTailLen << '\n');

+  // Move the iterators to the beginning of the MBB if we only got debug
+  // instructions before the tail. This is to avoid splitting a block when we
+  // only got debug instructions before the tail (to be invariant on -g).
+  if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+    I1 = MBB1->begin();
+  if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+    I2 = MBB2->begin();
+
+  bool FullBlockTail1 = I1 == MBB1->begin();
+  bool FullBlockTail2 = I2 == MBB2->begin();
+
  // It's almost always profitable to merge any number of non-terminator
  // instructions with the block that falls through into the common successor.
  // This is true only for a single successor. For multiple successors, we are
@ -679,7 +632,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
  // are unlikely to become a fallthrough target after machine block placement.
  // Tail merging these blocks is unlikely to create additional unconditional
  // branches, and will reduce the size of this cold code.
-  if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+  if (FullBlockTail1 && FullBlockTail2 &&
      blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
    return true;

@ -687,16 +640,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
  // a position where the other could fall through into it, merge any number
  // of instructions, because it can be done without a branch.
  // TODO: If the blocks are not adjacent, move one of them so that they are?
-  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+  if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
    return true;
-  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+  if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
    return true;

  // If both blocks are identical and end in a branch, merge them unless they
  // both have a fallthrough predecessor and successor.
  // We can only do this after block placement because it depends on whether
  // there are fallthroughs, and we don't know until after layout.
-  if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+  if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
    auto BothFallThrough = [](MachineBasicBlock *MBB) {
      if (MBB->succ_size() != 0 && !MBB->canFallThrough())
        return false;
@ -730,7 +683,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
  // instructions that would be deleted in the merge.
  MachineFunction *MF = MBB1->getParent();
  return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
-         (I1 == MBB1->begin() || I2 == MBB2->begin());
+         (FullBlockTail1 || FullBlockTail2);
 }

 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
--- a/test/CodeGen/X86/branchfolding-debug-invariant.mir
+++ b/test/CodeGen/X86/branchfolding-debug-invariant.mir
@ -0,0 +1,135 @@
+# RUN: llc -mtriple=x86_64-- -run-pass branch-folder -O3 -o - %s | FileCheck %s
+
+---
+name:            test1a
+body:             |
+  ; CHECK-LABEL: name: test1a
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK-NOT: RET
+  ; CHECK: bb.2:
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test1b
+body:             |
+
+  ; Verify that we get the same rewrites as in test1a when adding some
+  ; DBG_VALUE instructions in the mix.
+  ;
+  ; CHECK-LABEL: name: test1b
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK-NOT: RET
+  ; CHECK: bb.2:
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    DBG_VALUE
+    DBG_VALUE
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test2a
+body:             |
+  ; CFI instruction currently prevents the rewrite here (although technically
+  ; I suppose that branch folding could let bb.1 fallthrough into bb.2 here).
+  ;
+  ; CHECK-LABEL: name: test2a
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit killed $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  ; CHECK: bb.2:
+  ; CHECK:   CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    CFI_INSTRUCTION def_cfa_offset 8
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test2b
+body:             |
+  ; Verify that we get the same rewrites as in test1a when adding some
+  ; DBG_VALUE instructions in the mix.
+  ;
+  ; CHECK-LABEL: name: test2b
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit killed $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  ; CHECK: bb.2:
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    DBG_VALUE
+    CFI_INSTRUCTION def_cfa_offset 8
+    DBG_VALUE
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...