1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[ARM] LE support in ConstantIslands

The low-overhead branch extension provides a loop-end 'LE' instruction
that performs no decrement nor compare, it just jumps backwards. This
patch modifies the constant islands pass to try to insert LE
instructions in place of a Thumb2 conditional branch, instead of
shrinking it. This only happens if a cmp can be converted to a cbn/z
and used to exit the loop.

Differential Revision: https://reviews.llvm.org/D67404

llvm-svn: 372085
This commit is contained in:
Sam Parker 2019-09-17 09:08:05 +00:00
parent 9e1c9bd69d
commit 7b81fc68ca
6 changed files with 857 additions and 28 deletions

View File

@ -29,6 +29,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@ -70,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
@ -213,6 +215,7 @@ namespace {
const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
MachineDominatorTree *DT = nullptr;
bool isThumb;
bool isThumb1;
bool isThumb2;
@ -225,6 +228,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@ -350,6 +359,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isPositionIndependentOrROPI =
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
AFI = MF->getInfo<ARMFunctionInfo>();
DT = &getAnalysis<MachineDominatorTree>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
@ -1809,16 +1819,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
return MadeChange;
}
bool ARMConstantIslands::optimizeThumb2Branches() {
bool MadeChange = false;
// The order in which branches appear in ImmBranches is approximately their
// order within the function body. By visiting later branches first, we reduce
// the distance between earlier forward branches and their targets, making it
// more likely that the cbn?z optimization, which can only apply to forward
// branches, will succeed.
for (unsigned i = ImmBranches.size(); i != 0; --i) {
ImmBranch &Br = ImmBranches[i-1];
bool ARMConstantIslands::optimizeThumb2Branches() {
auto TryShrinkBranch = [this](ImmBranch &Br) {
unsigned Opcode = Br.MI->getOpcode();
unsigned NewOpc = 0;
unsigned Scale = 1;
@ -1846,47 +1850,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
BBUtils->adjustBBSize(MBB, -2);
BBUtils->adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
return true;
}
}
return false;
};
Opcode = Br.MI->getOpcode();
if (Opcode != ARM::tBcc)
continue;
struct ImmCompare {
MachineInstr* MI = nullptr;
unsigned NewOpc = 0;
};
auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
MachineBasicBlock *DestBB) {
ImmCmp.MI = nullptr;
ImmCmp.NewOpc = 0;
// If the conditional branch doesn't kill CPSR, then CPSR can be liveout
// so this transformation is not safe.
if (!Br.MI->killsRegister(ARM::CPSR))
continue;
return false;
NewOpc = 0;
unsigned PredReg = 0;
unsigned NewOpc = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
NewOpc = ARM::tCBNZ;
if (!NewOpc)
continue;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
else
return false;
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
continue;
return false;
// Search backwards to find a tCMPi8
auto *TRI = STI->getRegisterInfo();
MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
return false;
ImmCmp.MI = CmpMI;
ImmCmp.NewOpc = NewOpc;
return true;
};
auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
STI->hasMinSize())
return false;
MachineBasicBlock *MBB = Br.MI->getParent();
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) ||
!BBUtils->isBBInRange(Br.MI, DestBB, 4094))
return false;
if (!DT->dominates(DestBB, MBB))
return false;
// We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
// target of Br. So now we need to reverse the condition.
Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
TII->get(ARM::t2LE));
MIB.add(Br.MI->getOperand(0));
Br.MI->eraseFromParent();
Br.MI = MIB;
++NumLEInserted;
return true;
};
bool MadeChange = false;
// The order in which branches appear in ImmBranches is approximately their
// order within the function body. By visiting later branches first, we reduce
// the distance between earlier forward branches and their targets, making it
// more likely that the cbn?z optimization, which can only apply to forward
// branches, will succeed.
for (ImmBranch &Br : reverse(ImmBranches)) {
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
MachineBasicBlock *MBB = Br.MI->getParent();
MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
MBB->getFallThrough() :
MBB->back().getOperand(0).getMBB();
ImmCompare Cmp;
if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
DestBB = ExitBB;
MadeChange = true;
} else {
FindCmpForCBZ(Br, Cmp, DestBB);
MadeChange |= TryShrinkBranch(Br);
}
unsigned Opcode = Br.MI->getOpcode();
if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
continue;
Register Reg = CmpMI->getOperand(0).getReg();
Register Reg = Cmp.MI->getOperand(0).getReg();
// Check for Kill flags on Reg. If they are present remove them and set kill
// on the new CBZ.
auto *TRI = STI->getRegisterInfo();
MachineBasicBlock::iterator KillMI = Br.MI;
bool RegKilled = false;
do {
@ -1896,19 +1968,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
RegKilled = true;
break;
}
} while (KillMI != CmpMI);
} while (KillMI != Cmp.MI);
// Create the new CBZ/CBNZ
MachineBasicBlock *MBB = Br.MI->getParent();
LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
MachineInstr *NewBR =
BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))
.addReg(Reg, getKillRegState(RegKilled))
.addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
Cmp.MI->eraseFromParent();
BBInfoVector &BBInfo = BBUtils->getBBInfo();
BBInfo[MBB->getNumber()].Size -= 2;
if (Br.MI->getOpcode() == ARM::tBcc) {
Br.MI->eraseFromParent();
Br.MI = NewBR;
} else if (&MBB->back() != Br.MI) {
// We've generated an LE and already erased the original conditional
// branch. The CBN?Z is now used to branch to the other successor, so an
// unconditional branch terminator is now redundant.
MachineInstr *LastMI = &MBB->back();
if (LastMI != Br.MI) {
BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize();
LastMI->eraseFromParent();
}
}
BBUtils->adjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;

View File

@ -143,8 +143,8 @@
; CHECK-NEXT: Thumb2 instruction size reduce pass
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: optimise barriers pass
; CHECK-NEXT: ARM constant island placement and branch shortening pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: ARM constant island placement and branch shortening pass
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: ARM Low Overhead Loops pass
; CHECK-NEXT: Contiguously Lay Out Funclets

View File

@ -0,0 +1,201 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB
# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB
# CHECK-NOLOB-NOT: t2LE
# CHECK-LOB: bb.3.land.rhs:
# CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
# CHECK-LOB: tCBNZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.3
# CHECK-LOB: bb.7.while.body19:
# CHECK-LOB: tCBZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.6
# CHECK-LOB: bb.8:
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
%struct.head_s = type { %struct.head_s*, %struct.data_s* }
%struct.data_s = type { i16, i16 }
; Function Attrs: norecurse nounwind readonly
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 {
entry:
%idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
%0 = load i16, i16* %idx, align 2
%cmp = icmp sgt i16 %0, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
%1 = icmp eq %struct.head_s* %list, null
br i1 %1, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
%data16143 = bitcast %struct.data_s* %info to i16*
%2 = load i16, i16* %data16143, align 2
%conv15 = sext i16 %2 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
%3 = icmp eq %struct.head_s* %list, null
br i1 %3, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
%list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ]
%info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
%4 = load %struct.data_s*, %struct.data_s** %info2, align 4
%idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1
%5 = load i16, i16* %idx3, align 2
%cmp7 = icmp eq i16 %5, %0
br i1 %cmp7, label %return, label %while.body
while.body: ; preds = %land.rhs
%next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
%6 = load %struct.head_s*, %struct.head_s** %next4, align 4
%tobool = icmp ne %struct.head_s* %6, null
br i1 %tobool, label %return, label %land.rhs
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
%list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
%info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
%7 = load %struct.data_s*, %struct.data_s** %info12, align 4
%data165 = bitcast %struct.data_s* %7 to i16*
%8 = load i16, i16* %data165, align 2
%9 = and i16 %8, 255
%and = zext i16 %9 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
while.body19: ; preds = %land.rhs11
%next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
%10 = load %struct.head_s*, %struct.head_s** %next206, align 4
%tobool10 = icmp eq %struct.head_s* %10, null
br i1 %tobool10, label %return, label %land.rhs11
return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
ret %struct.head_s* %retval.0
}
attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
...
---
name: search
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.5(0x50000000), %bb.1(0x30000000)
liveins: $r0, $r1
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13, killed $cpsr
bb.5.while.cond.preheader:
successors: %bb.8(0x30000000), %bb.6(0x50000000)
liveins: $r0, $r2
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.6.land.rhs.preheader:
successors: %bb.7(0x80000000)
liveins: $r0, $r2
renamable $r1 = tUXTH killed renamable $r2, 14, $noreg
bb.7.land.rhs:
successors: %bb.8(0x04000000), %bb.7(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.7, 0, killed $cpsr
t2B %bb.8, 14, $noreg
bb.1.while.cond9.preheader:
successors: %bb.8(0x30000000), %bb.2(0x50000000)
liveins: $r0, $r1
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.2.land.rhs11.lr.ph:
successors: %bb.3(0x80000000)
liveins: $r0, $r1
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143)
bb.3.land.rhs11:
successors: %bb.9(0x04000000), %bb.4(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.9, 0, killed $cpsr
bb.4.while.body19:
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3, 1, killed $cpsr
bb.8:
successors: %bb.9(0x80000000)
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
bb.9.return:
liveins: $r0
tBX_RET 14, $noreg, implicit killed $r0
...

View File

@ -0,0 +1,158 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s
define void @cbz_exit(i32* %in, i32* %res) {
; CHECK-LABEL: cbz_exit:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: subs r2, r0, #4
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB0_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]!
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: cbz r3, .LBB0_2
; CHECK-NEXT: le .LBB0_1
; CHECK-NEXT: .LBB0_2: @ %exit
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
loop:
%offset = phi i32 [ 0, %entry ], [ %next, %loop ]
%ptr = getelementptr i32, i32* %in, i32 %offset
%val = load i32, i32* %ptr
%next = add i32 %offset, 1
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %exit, label %loop
exit:
store i32 %offset, i32* %res
ret void
}
define void @cbnz_exit(i32* %in, i32* %res) {
; CHECK-LABEL: cbnz_exit:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: subs r2, r0, #4
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB1_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]!
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: cbnz r3, .LBB1_2
; CHECK-NEXT: le .LBB1_1
; CHECK-NEXT: .LBB1_2: @ %exit
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
loop:
%offset = phi i32 [ 0, %entry ], [ %next, %loop ]
%ptr = getelementptr i32, i32* %in, i32 %offset
%val = load i32, i32* %ptr
%next = add i32 %offset, 1
%cmp = icmp ne i32 %val, 0
br i1 %cmp, label %exit, label %loop
exit:
store i32 %offset, i32* %res
ret void
}
define void @cbnz_exit_too_large(i32* %in, i32* %res) {
; CHECK-LABEL: cbnz_exit_too_large:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: subs r2, r0, #4
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB2_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r12, [r2, #4]!
; CHECK-NEXT: .zero 4090
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: beq.w .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %exit
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
loop:
%offset = phi i32 [ 0, %entry ], [ %next, %loop ]
%ptr = getelementptr i32, i32* %in, i32 %offset
%val = load i32, i32* %ptr
%next = add i32 %offset, 1
%cmp = icmp ne i32 %val, 0
%size = call i32 @llvm.arm.space(i32 4090, i32 undef)
br i1 %cmp, label %exit, label %loop
exit:
store i32 %offset, i32* %res
ret void
}
define void @cbz_exit_minsize(i32* %in, i32* %res) #0 {
; CHECK-LABEL: cbz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB3_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %exit
; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
loop:
%offset = phi i32 [ 0, %entry ], [ %next, %loop ]
%ptr = getelementptr i32, i32* %in, i32 %offset
%val = load i32, i32* %ptr
%next = add i32 %offset, 1
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %exit, label %loop
exit:
store i32 %offset, i32* %res
ret void
}
define void @cbnz_exit_minsize(i32* %in, i32* %res) #0 {
; CHECK-LABEL: cbnz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB4_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %exit
; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
loop:
%offset = phi i32 [ 0, %entry ], [ %next, %loop ]
%ptr = getelementptr i32, i32* %in, i32 %offset
%val = load i32, i32* %ptr
%next = add i32 %offset, 1
%cmp = icmp ne i32 %val, 0
br i1 %cmp, label %exit, label %loop
exit:
store i32 %offset, i32* %res
ret void
}
attributes #0 = { minsize optsize }
declare i32 @llvm.arm.space(i32 immarg, i32);

View File

@ -0,0 +1,184 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s
# CHECK-NOT: t2LE
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-unknown"
%struct.head_s = type { %struct.head_s*, %struct.data_s* }
%struct.data_s = type { i16, i16 }
; Function Attrs: norecurse nounwind readonly
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 {
entry:
%idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
%tmp = load i16, i16* %idx, align 2
%cmp = icmp sgt i16 %tmp, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
%0 = icmp eq %struct.head_s* %list, null
br i1 %0, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
%data16143 = bitcast %struct.data_s* %info to i16*
%tmp1 = load i16, i16* %data16143, align 2
%conv15 = sext i16 %tmp1 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
%1 = icmp eq %struct.head_s* %list, null
br i1 %1, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
while.body: ; preds = %land.rhs
%next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
%tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4
%tobool = icmp eq %struct.head_s* %tmp4, null
br i1 %tobool, label %return, label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
%list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ]
%info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
%tmp2 = load %struct.data_s*, %struct.data_s** %info2, align 4
%idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %tmp2, i32 0, i32 1
%tmp3 = load i16, i16* %idx3, align 2
%cmp7 = icmp eq i16 %tmp3, %tmp
br i1 %cmp7, label %return, label %while.body
while.body19: ; preds = %land.rhs11
%next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
%tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4
%tobool10 = icmp eq %struct.head_s* %tmp8, null
br i1 %tobool10, label %return, label %land.rhs11
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
%list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ]
%info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
%tmp5 = load %struct.data_s*, %struct.data_s** %info12, align 4
%data166 = bitcast %struct.data_s* %tmp5 to i16*
%tmp6 = load i16, i16* %data166, align 2
%2 = and i16 %tmp6, 255
%and = zext i16 %2 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
return: ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
ret %struct.head_s* %retval.0
}
attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
...
---
name: search
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.3(0x50000000), %bb.1(0x30000000)
liveins: $r0, $r1
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13, killed $cpsr
bb.3.while.cond.preheader:
successors: %bb.4(0x80000000)
liveins: $r0, $r2
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r1 = tUXTH killed renamable $r2, 14, $noreg
bb.4.land.rhs:
successors: %bb.6(0x04000000), %bb.5(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.6, 0, killed $cpsr
bb.5.while.body:
successors: %bb.4(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.4, 14, $noreg
bb.6.return:
liveins: $r0
tBX_RET 14, $noreg, implicit $r0
bb.1.while.cond9.preheader:
successors: %bb.2(0x80000000)
liveins: $r0, $r1
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143)
bb.2.land.rhs11:
successors: %bb.2(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data166, align 2)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next205)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.2, 14, $noreg
...

View File

@ -0,0 +1,201 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB
# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB
# CHECK-NOLOB-NOT: t2LE
# CHECK-LOB: bb.3.land.rhs:
# CHECK-LOB: tCBZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.3
# CHECK-LOB: bb.6.land.rhs11:
# CHECK-LOB: bb.7.while.body19:
# CHECK-LOB: tCBZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.6
# CHECK-LOB: bb.8:
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-unknown"
%struct.head_s = type { %struct.head_s*, %struct.data_s* }
%struct.data_s = type { i16, i16 }
; Function Attrs: norecurse nounwind readonly
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 {
entry:
%idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
%0 = load i16, i16* %idx, align 2
%cmp = icmp sgt i16 %0, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader
while.cond9.preheader: ; preds = %entry
%1 = icmp eq %struct.head_s* %list, null
br i1 %1, label %return, label %land.rhs11.lr.ph
land.rhs11.lr.ph: ; preds = %while.cond9.preheader
%data16143 = bitcast %struct.data_s* %info to i16*
%2 = load i16, i16* %data16143, align 2
%conv15 = sext i16 %2 to i32
br label %land.rhs11
while.cond.preheader: ; preds = %entry
%3 = icmp eq %struct.head_s* %list, null
br i1 %3, label %return, label %land.rhs.preheader
land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs
land.rhs: ; preds = %land.rhs.preheader, %while.body
%list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ]
%info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
%4 = load %struct.data_s*, %struct.data_s** %info2, align 4
%idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1
%5 = load i16, i16* %idx3, align 2
%cmp7 = icmp eq i16 %5, %0
br i1 %cmp7, label %return, label %while.body
while.body: ; preds = %land.rhs
%next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
%6 = load %struct.head_s*, %struct.head_s** %next4, align 4
%tobool = icmp eq %struct.head_s* %6, null
br i1 %tobool, label %return, label %land.rhs
land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
%list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
%info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
%7 = load %struct.data_s*, %struct.data_s** %info12, align 4
%data165 = bitcast %struct.data_s* %7 to i16*
%8 = load i16, i16* %data165, align 2
%9 = and i16 %8, 255
%and = zext i16 %9 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19
while.body19: ; preds = %land.rhs11
%next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
%10 = load %struct.head_s*, %struct.head_s** %next206, align 4
%tobool10 = icmp eq %struct.head_s* %10, null
br i1 %tobool10, label %return, label %land.rhs11
return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
ret %struct.head_s* %retval.0
}
attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
...
---
name: search
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.5(0x50000000), %bb.1(0x30000000)
liveins: $r0, $r1
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13, killed $cpsr
bb.5.while.cond.preheader:
successors: %bb.8(0x30000000), %bb.6(0x50000000)
liveins: $r0, $r2
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.6.land.rhs.preheader:
successors: %bb.7(0x80000000)
liveins: $r0, $r2
renamable $r1 = tUXTH killed renamable $r2, 14, $noreg
bb.7.land.rhs:
successors: %bb.8(0x04000000), %bb.7(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.7, 1, killed $cpsr
t2B %bb.8, 14, $noreg
bb.1.while.cond9.preheader:
successors: %bb.8(0x30000000), %bb.2(0x50000000)
liveins: $r0, $r1
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.2.land.rhs11.lr.ph:
successors: %bb.3(0x80000000)
liveins: $r0, $r1
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143)
bb.3.land.rhs11:
successors: %bb.9(0x04000000), %bb.4(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.9, 0, killed $cpsr
bb.4.while.body19:
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3, 1, killed $cpsr
bb.8:
successors: %bb.9(0x80000000)
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
bb.9.return:
liveins: $r0
tBX_RET 14, $noreg, implicit killed $r0
...