1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

Remove redundant copy in recurrences

Summary:
If there is a chain of instructions formulating a recurrence, commuting operands can help removing a redundant copy. In the following example code,

```
BB#1: ; Loop Header
  %vreg0<def> = COPY %vreg13<kill>; GR32:%vreg0,%vreg13
  ...

BB#6: ; Loop Latch
  %vreg2<def> = COPY %vreg15<kill>; GR32:%vreg2,%vreg15
  %vreg10<def,tied1> = ADD32rr %vreg1<kill,tied0>, %vreg0<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg1,%vreg0
  %vreg3<def,tied1> = ADD32rr %vreg2<kill,tied0>, %vreg10<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg3,%vreg2,%vreg10
  CMP32ri8 %vreg3, 10, %EFLAGS<imp-def>; GR32:%vreg3
  %vreg13<def> = COPY %vreg3<kill>; GR32:%vreg13,%vreg3
  JL_1 <BB#1>, %EFLAGS<imp-use,kill>
```

Existing two-address generation pass generates following code:

```
BB#1:
  %vreg0<def> = COPY %vreg13<kill>; GR32:%vreg0,%vreg13
  ...

BB#6:
    Predecessors according to CFG: BB#5 BB#4
  %vreg2<def> = COPY %vreg15<kill>; GR32:%vreg2,%vreg15
  %vreg10<def> = COPY %vreg1<kill>; GR32:%vreg10,%vreg1
  %vreg10<def,tied1> = ADD32rr %vreg10<tied0>, %vreg0<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg0
  %vreg3<def> = COPY %vreg10<kill>; GR32:%vreg3,%vreg10
  %vreg3<def,tied1> = ADD32rr %vreg3<tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg3,%vreg2
  CMP32ri8 %vreg3, 10, %EFLAGS<imp-def>; GR32:%vreg3
  %vreg13<def> = COPY %vreg3<kill>; GR32:%vreg13,%vreg3
  JL_1 <BB#1>, %EFLAGS<imp-use,kill>
  JMP_1 <BB#7>
```

This is suboptimal because the assembly code generated has a redundant copy at the end of #BB6 to feed %vreg13 to BB#1:

```
.LBB0_6:
  addl  %esi, %edi
  addl  %ebx, %edi
  cmpl  $10, %edi
  movl  %edi, %esi
  jl  .LBB0_1
```

This redundant copy can be elimiated by making instructions in the recurrence chain to compute the value "into" the register that actually holds the feedback value. In this example, this can be achieved by commuting %vreg0 and %vreg1 to compute %vreg10. With that change, code after two-address generation becomes

```
BB#1:
  %vreg0<def> = COPY %vreg13<kill>; GR32:%vreg0,%vreg13
  ...

BB#6: derived from LLVM BB %bb7
    Predecessors according to CFG: BB#5 BB#4
  %vreg2<def> = COPY %vreg15<kill>; GR32:%vreg2,%vreg15
  %vreg10<def> = COPY %vreg0<kill>; GR32:%vreg10,%vreg0
  %vreg10<def,tied1> = ADD32rr %vreg10<tied0>, %vreg1<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg1
  %vreg3<def> = COPY %vreg10<kill>; GR32:%vreg3,%vreg10
  %vreg3<def,tied1> = ADD32rr %vreg3<tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg3,%vreg2
  CMP32ri8 %vreg3, 10, %EFLAGS<imp-def>; GR32:%vreg3
  %vreg13<def> = COPY %vreg3<kill>; GR32:%vreg13,%vreg3
  JL_1 <BB#1>, %EFLAGS<imp-use,kill>
  JMP_1 <BB#7>
```

and the final assembly does not have redundant copy:

```
.LBB0_6:
  addl  %edi, %eax
  addl  %ebx, %eax
  cmpl  $10, %eax
  jl  .LBB0_1
```

Reviewers: qcolombet, MatzeB, wmi

Reviewed By: wmi

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D31821

llvm-svn: 306758
This commit is contained in:
Taewook Oh 2017-06-29 23:11:24 +00:00
parent 8cb64ce02c
commit 7e3c6fd16c
4 changed files with 410 additions and 6 deletions

View File

@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
// Limit the length of recurrence chain when evaluating the benefit of
// commuting operands.
static cl::opt<unsigned> MaxRecurrenceChain(
"recurrence-chain-limit", cl::Hidden, cl::init(3),
cl::desc("Maximum length of recurrence chain when evaluating the benefit "
"of commuting operands"));
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
/// \brief Sequence of instructions that formulate recurrence cycle.
typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
/// \brief Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
/// operand commutable with the tied use operand, find recurrence cycle
/// along that operand as well.
bool findTargetRecurrence(unsigned Reg,
const SmallSet<unsigned, 2> &TargetReg,
RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@ -222,6 +246,28 @@ namespace {
}
};
/// \brief Helper class to hold instructions that are inside recurrence
/// cycles. The recurrence cycle is formulated around 1) a def operand and its
/// tied use operand, or 2) a def operand and a use operand that is commutable
/// with another use operand which is tied to the def operand. In the latter
/// case, index of the tied use operand and the commutable use operand are
/// maintained with CommutePair.
class RecurrenceInstr {
public:
typedef std::pair<unsigned, unsigned> IndexPair;
RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
: MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
MachineInstr *getMI() const { return MI; }
Optional<IndexPair> getCommutePair() const { return CommutePair; }
private:
MachineInstr *MI;
Optional<IndexPair> CommutePair;
};
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
/// \bried Returns true if \p MO is a virtual register operand.
static bool isVirtualRegisterOperand(MachineOperand &MO) {
if (!MO.isReg())
return false;
return TargetRegisterInfo::isVirtualRegister(MO.getReg());
}
bool PeepholeOptimizer::findTargetRecurrence(
unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
RecurrenceCycle &RC) {
// Recurrence found if Reg is in TargetRegs.
if (TargetRegs.count(Reg))
return true;
// TODO: Curerntly, we only allow the last instruction of the recurrence
// cycle (the instruction that feeds the PHI instruction) to have more than
// one uses to guarantee that commuting operands does not tie registers
// with overlapping live range. Once we have actual live range info of
// each register, this constraint can be relaxed.
if (!MRI->hasOneNonDBGUse(Reg))
return false;
// Give up if the reccurrence chain length is longer than the limit.
if (RC.size() >= MaxRecurrenceChain)
return false;
MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
// Only interested in recurrences whose instructions have only one def, which
// is a virtual register.
if (MI.getDesc().getNumDefs() != 1)
return false;
MachineOperand &DefOp = MI.getOperand(0);
if (!isVirtualRegisterOperand(DefOp))
return false;
// Check if def operand of MI is tied to any use operand. We are only
// interested in the case that all the instructions in the recurrence chain
// have there def operand tied with one of the use operand.
unsigned TiedUseIdx;
if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
return false;
if (Idx == TiedUseIdx) {
RC.push_back(RecurrenceInstr(&MI));
return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
} else {
// If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
}
}
return false;
}
/// \brief Phi instructions will eventually be lowered to copy instructions. If
/// phi is in a loop header, a recurrence may formulated around the source and
/// destination of the phi. For such case commuting operands of the instructions
/// in the recurrence may enable coalescing of the copy instruction generated
/// from the phi. For example, if there is a recurrence of
///
/// LoopHeader:
/// %vreg1 = phi(%vreg0, %vreg100)
/// LoopLatch:
/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
///
/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
/// the coalescing of copy instruction generated from the phi in
/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
/// %vreg2 have overlapping live range. This introduces additional move
/// instruction to the final assembly. However, if we commute %vreg2 and
/// %vreg1 of ADD instruction, the redundant move instruction can be
/// avoided.
bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
SmallSet<unsigned, 2> TargetRegs;
for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
MachineOperand &MO = PHI.getOperand(Idx);
assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
TargetRegs.insert(MO.getReg());
}
bool Changed = false;
RecurrenceCycle RC;
if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
// Commutes operands of instructions in RC if necessary so that the copy to
// be generated from PHI can be coalesced.
DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
for (auto &RI : RC) {
DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
auto CP = RI.getCommutePair();
if (CP) {
Changed = true;
TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
(*CP).second);
DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
}
}
}
return Changed;
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
if (MI->isPosition() || MI->isPHI())
if (MI->isPosition())
continue;
if (IsLoopHeader && MI->isPHI()) {
if (optimizeRecurrence(*MI)) {
Changed = true;
continue;
}
}
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.

View File

@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule",
cl::desc("Coalesce copies by rescheduling (default=true)"),
cl::init(true), cl::Hidden);
// Limit the number of dataflow edges to traverse when evaluating the benefit
// of commuting operands.
static cl::opt<unsigned> MaxDataFlowEdge(
"dataflow-edge-limit", cl::Hidden, cl::init(3),
cl::desc("Maximum number of dataflow edges to traverse when evaluating "
"the benefit of commuting operands"));
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
if (isRevCopyChain(regC, regA, 3))
if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
return true;
if (isRevCopyChain(regB, regA, 3))
if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute

View File

@ -7,7 +7,8 @@
# This test ensures that the command line accepts
# several run passes on the same command line and
# actually create the proper pipeline for it.
# PSEUDO_PEEPHOLE: -expand-isel-pseudos {{(-machineverifier )?}}-peephole-opt
# PSEUDO_PEEPHOLE: -expand-isel-pseudos
# PSEUDO_PEEPHOLE-SAME: {{(-machineverifier )?}}-peephole-opt
# PEEPHOLE_PSEUDO: -peephole-opt {{(-machineverifier )?}}-expand-isel-pseudos
# Make sure there are no other passes happening after what we asked.

View File

@ -0,0 +1,232 @@
# RUN: llc -mtriple=x86_64-- -run-pass=peephole-opt -o - %s | FileCheck %s
--- |
define i32 @foo(i32 %a) {
bb0:
br label %bb1
bb1: ; preds = %bb7, %bb0
%vreg0 = phi i32 [ 0, %bb0 ], [ %vreg3, %bb7 ]
%cond0 = icmp eq i32 %a, 0
br i1 %cond0, label %bb4, label %bb3
bb3: ; preds = %bb1
br label %bb4
bb4: ; preds = %bb1, %bb3
%vreg5 = phi i32 [ 2, %bb3 ], [ 1, %bb1 ]
%cond1 = icmp eq i32 %vreg5, 0
br i1 %cond1, label %bb7, label %bb6
bb6: ; preds = %bb4
br label %bb7
bb7: ; preds = %bb4, %bb6
%vreg1 = phi i32 [ 2, %bb6 ], [ 1, %bb4 ]
%vreg2 = add i32 %vreg5, %vreg0
%vreg3 = add i32 %vreg1, %vreg2
%cond2 = icmp slt i32 %vreg3, 10
br i1 %cond2, label %bb1, label %bb8
bb8: ; preds = %bb7
ret i32 0
}
define i32 @bar(i32 %a, i32* %p) {
bb0:
br label %bb1
bb1: ; preds = %bb7, %bb0
%vreg0 = phi i32 [ 0, %bb0 ], [ %vreg3, %bb7 ]
%cond0 = icmp eq i32 %a, 0
br i1 %cond0, label %bb4, label %bb3
bb3: ; preds = %bb1
br label %bb4
bb4: ; preds = %bb1, %bb3
%vreg5 = phi i32 [ 2, %bb3 ], [ 1, %bb1 ]
%cond1 = icmp eq i32 %vreg5, 0
br i1 %cond1, label %bb7, label %bb6
bb6: ; preds = %bb4
br label %bb7
bb7: ; preds = %bb4, %bb6
%vreg1 = phi i32 [ 2, %bb6 ], [ 1, %bb4 ]
%vreg2 = add i32 %vreg5, %vreg0
store i32 %vreg0, i32* %p
%vreg3 = add i32 %vreg1, %vreg2
%cond2 = icmp slt i32 %vreg3, 10
br i1 %cond2, label %bb1, label %bb8
bb8: ; preds = %bb7
ret i32 0
}
...
---
# There is a recurrence formulated around %0, %10, and %3. Check that operands
# are commuted for ADD instructions in bb.5.bb7 so that the values involved in
# the recurrence are tied. This will remove redundant copy instruction.
name: foo
tracksRegLiveness: true
registers:
- { id: 0, class: gr32, preferred-register: '' }
- { id: 1, class: gr32, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr32, preferred-register: '' }
- { id: 4, class: gr32, preferred-register: '' }
- { id: 5, class: gr32, preferred-register: '' }
- { id: 6, class: gr32, preferred-register: '' }
- { id: 7, class: gr32, preferred-register: '' }
- { id: 8, class: gr32, preferred-register: '' }
- { id: 9, class: gr32, preferred-register: '' }
- { id: 10, class: gr32, preferred-register: '' }
- { id: 11, class: gr32, preferred-register: '' }
- { id: 12, class: gr32, preferred-register: '' }
liveins:
- { reg: '%edi', virtual-reg: '%4' }
body: |
bb.0.bb0:
successors: %bb.1.bb1(0x80000000)
liveins: %edi
%4 = COPY %edi
%5 = MOV32r0 implicit-def dead %eflags
bb.1.bb1:
successors: %bb.3.bb4(0x30000000), %bb.2.bb3(0x50000000)
; CHECK: %0 = PHI %5, %bb.0.bb0, %3, %bb.5.bb7
%0 = PHI %5, %bb.0.bb0, %3, %bb.5.bb7
%6 = MOV32ri 1
TEST32rr %4, %4, implicit-def %eflags
JE_1 %bb.3.bb4, implicit %eflags
JMP_1 %bb.2.bb3
bb.2.bb3:
successors: %bb.3.bb4(0x80000000)
%7 = MOV32ri 2
bb.3.bb4:
successors: %bb.5.bb7(0x30000000), %bb.4.bb6(0x50000000)
%1 = PHI %6, %bb.1.bb1, %7, %bb.2.bb3
TEST32rr %1, %1, implicit-def %eflags
JE_1 %bb.5.bb7, implicit %eflags
JMP_1 %bb.4.bb6
bb.4.bb6:
successors: %bb.5.bb7(0x80000000)
%9 = MOV32ri 2
bb.5.bb7:
successors: %bb.1.bb1(0x7c000000), %bb.6.bb8(0x04000000)
%2 = PHI %6, %bb.3.bb4, %9, %bb.4.bb6
%10 = ADD32rr %1, %0, implicit-def dead %eflags
; CHECK: %10 = ADD32rr
; CHECK-SAME: %0,
; CHECK-SAME: %1,
%3 = ADD32rr %2, killed %10, implicit-def dead %eflags
; CHECK: %3 = ADD32rr
; CHECK-SAME: %10,
; CHECK-SAME: %2,
%11 = SUB32ri8 %3, 10, implicit-def %eflags
JL_1 %bb.1.bb1, implicit %eflags
JMP_1 %bb.6.bb8
bb.6.bb8:
%12 = MOV32r0 implicit-def dead %eflags
%eax = COPY %12
RET 0, %eax
...
---
# Here a recurrence is formulated around %0, %11, and %3, but operands should
# not be commuted because %0 has a use outside of recurrence. This is to
# prevent the case of commuting operands ties the values with overlapping live
# ranges.
name: bar
tracksRegLiveness: true
registers:
- { id: 0, class: gr32, preferred-register: '' }
- { id: 1, class: gr32, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr32, preferred-register: '' }
- { id: 4, class: gr32, preferred-register: '' }
- { id: 5, class: gr64, preferred-register: '' }
- { id: 6, class: gr32, preferred-register: '' }
- { id: 7, class: gr32, preferred-register: '' }
- { id: 8, class: gr32, preferred-register: '' }
- { id: 9, class: gr32, preferred-register: '' }
- { id: 10, class: gr32, preferred-register: '' }
- { id: 11, class: gr32, preferred-register: '' }
- { id: 12, class: gr32, preferred-register: '' }
- { id: 13, class: gr32, preferred-register: '' }
liveins:
- { reg: '%edi', virtual-reg: '%4' }
- { reg: '%rsi', virtual-reg: '%5' }
body: |
bb.0.bb0:
successors: %bb.1.bb1(0x80000000)
liveins: %edi, %rsi
%5 = COPY %rsi
%4 = COPY %edi
%6 = MOV32r0 implicit-def dead %eflags
bb.1.bb1:
successors: %bb.3.bb4(0x30000000), %bb.2.bb3(0x50000000)
%0 = PHI %6, %bb.0.bb0, %3, %bb.5.bb7
; CHECK: %0 = PHI %6, %bb.0.bb0, %3, %bb.5.bb7
%7 = MOV32ri 1
TEST32rr %4, %4, implicit-def %eflags
JE_1 %bb.3.bb4, implicit %eflags
JMP_1 %bb.2.bb3
bb.2.bb3:
successors: %bb.3.bb4(0x80000000)
%8 = MOV32ri 2
bb.3.bb4:
successors: %bb.5.bb7(0x30000000), %bb.4.bb6(0x50000000)
%1 = PHI %7, %bb.1.bb1, %8, %bb.2.bb3
TEST32rr %1, %1, implicit-def %eflags
JE_1 %bb.5.bb7, implicit %eflags
JMP_1 %bb.4.bb6
bb.4.bb6:
successors: %bb.5.bb7(0x80000000)
%10 = MOV32ri 2
bb.5.bb7:
successors: %bb.1.bb1(0x7c000000), %bb.6.bb8(0x04000000)
%2 = PHI %7, %bb.3.bb4, %10, %bb.4.bb6
%11 = ADD32rr %1, %0, implicit-def dead %eflags
; CHECK: %11 = ADD32rr
; CHECK-SAME: %1,
; CHECK-SAME: %0,
MOV32mr %5, 1, _, 0, _, %0 :: (store 4 into %ir.p)
%3 = ADD32rr %2, killed %11, implicit-def dead %eflags
; CHECK: %3 = ADD32rr
; CHECK-SAME: %2,
; CHECK-SAME: %11,
%12 = SUB32ri8 %3, 10, implicit-def %eflags
JL_1 %bb.1.bb1, implicit %eflags
JMP_1 %bb.6.bb8
bb.6.bb8:
%13 = MOV32r0 implicit-def dead %eflags
%eax = COPY %13
RET 0, %eax
...