1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AArch64LoadStoreOptimizer] Skip debug insts during pattern matching [12/14]

Do not count the presence of debug insts against the limit set by
LdStLimit, and allow the optimizer to find matching insts by skipping
over debug insts.

Differential Revision: https://reviews.llvm.org/D78411
This commit is contained in:
Vedant Kumar 2020-04-17 18:11:46 -07:00
parent 3b1d3cce73
commit d36c3656f3
3 changed files with 402 additions and 22 deletions

View File

@ -678,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
"Expected promotable zero stores.");
MachineBasicBlock::iterator NextI = I;
++NextI;
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == MergeMI)
++NextI;
NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
@ -748,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
const TargetRegisterInfo *TRI, unsigned Limit,
std::function<bool(MachineInstr &, bool)> &Fn) {
auto MBB = MI.getParent();
for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
E = MBB->rend();
I != E; I++) {
for (MachineInstr &I :
instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
if (!Limit)
return false;
--Limit;
bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
TRI->regsOverlap(MOP.getReg(), DefReg);
});
if (!Fn(*I, isDef))
if (!Fn(I, isDef))
return false;
if (isDef)
break;
@ -783,14 +782,14 @@ MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
const LdStPairFlags &Flags) {
MachineBasicBlock::iterator NextI = I;
++NextI;
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == Paired)
++NextI;
NextI = next_nodbg(NextI, E);
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
@ -1009,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator
AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator StoreI) {
MachineBasicBlock::iterator NextI = LoadI;
++NextI;
MachineBasicBlock::iterator NextI =
next_nodbg(LoadI, LoadI->getParent()->end());
int LoadSize = TII->getMemScale(*LoadI);
int StoreSize = TII->getMemScale(*StoreI);
@ -1188,7 +1187,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
unsigned Count = 0;
do {
--MBBI;
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@ -1440,7 +1439,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MBBI = I;
MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
++MBBI;
MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@ -1468,7 +1467,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
for (unsigned Count = 0; MBBI != E && Count < Limit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
UsedInBetween.accumulate(MI);
@ -1637,12 +1637,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator NextI = I;
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
if (++NextI == Update)
++NextI;
if (NextI == Update)
NextI = next_nodbg(NextI, E);
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@ -1780,7 +1781,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
++MBBI;
MBBI = next_nodbg(MBBI, E);
// We can't post-increment the stack pointer if any instruction between
// the memory access (I) and the increment (MBBI) can access the memory
@ -1796,7 +1797,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
return E;
}
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
for (unsigned Count = 0; MBBI != E && Count < Limit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@ -1854,7 +1856,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
do {
--MBBI;
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there

View File

@ -0,0 +1,377 @@
# Strip out debug info, then run ldst-opt with limit=1.
# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s
#
# Run ldst-opt with limit=1, then strip out debug info.
# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s
---
### STG and its offset limits
# CHECK-LABEL: name: test_STG_post
# CHECK: STGPostIndex $x0, $x0, 7
name: test_STG_post
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post_same_reg
# CHECK: STGPostIndex $x1, $x0, 7
name: test_STG_post_same_reg
body: |
bb.0.entry:
liveins: $x0, $x1
STGOffset $x1, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post_unaligned
# CHECK: STGOffset $x0, $x0, 0
# CHECK-NEXT: ADDXri $x0, 8, 0
name: test_STG_post_unaligned
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 8, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post2
# CHECK: STGPostIndex $x0, $x0, -256
name: test_STG_post2
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = SUBXri $x0, 4096, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post3
# CHECK: STGOffset $x0, $x0, 0
# CHECK-NEXT: SUBXri $x0, 4112, 0
name: test_STG_post3
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = SUBXri $x0, 4112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post4
# CHECK: STGPostIndex $x0, $x0, 255
name: test_STG_post4
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 4080, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STG_post5
# CHECK: STGOffset $x0, $x0, 0
# CHECK-NEXT: ADDXri $x0, 4096, 0
name: test_STG_post5
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 4096, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
### The rest of ST*G variants.
# CHECK-LABEL: name: test_STZG_post
# CHECK: STZGPostIndex $x0, $x0, 7
name: test_STZG_post
body: |
bb.0.entry:
liveins: $x0
STZGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_ST2G_post
# CHECK: ST2GPostIndex $x0, $x0, 7
name: test_ST2G_post
body: |
bb.0.entry:
liveins: $x0
ST2GOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STZ2G_post
# CHECK: STZ2GPostIndex $x0, $x0, 7
name: test_STZ2G_post
body: |
bb.0.entry:
liveins: $x0
STZ2GOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
### STGP and its offset limits
# CHECK-LABEL: name: test_STGP_post
# CHECK: STGPpost $x1, $x2, $x0, 7
name: test_STGP_post
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_post2
# CHECK: STGPpost $x1, $x2, $x0, -64
name: test_STGP_post2
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = SUBXri $x0, 1024, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_post3
# CHECK: STGPi $x1, $x2, $x0, 0
# CHECK-NEXT: SUBXri $x0, 1040, 0
name: test_STGP_post3
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = SUBXri $x0, 1040, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_post4
# CHECK: STGPpost $x1, $x2, $x0, 63
name: test_STGP_post4
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 1008, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_post5
# CHECK: STGPi $x1, $x2, $x0, 0
# CHECK-NEXT: ADDXri $x0, 1024, 0
name: test_STGP_post5
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 1024, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
### Pre-indexed forms
# CHECK-LABEL: name: test_STG_pre
# CHECK: STGPreIndex $x0, $x0, 10
name: test_STG_pre
body: |
bb.0.entry:
liveins: $x0
STGOffset $x0, $x0, 10
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 160, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_pre
# CHECK: STGPpre $x1, $x2, $x0, 10
name: test_STGP_pre
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
STGPi $x1, $x2, $x0, 10
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 160, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
### Pre-indexed forms with add/sub coming before the store.
# CHECK-LABEL: name: test_STG_pre_back
# CHECK: STGPreIndex $x0, $x0, 2
name: test_STG_pre_back
body: |
bb.0.entry:
liveins: $x0
$x0 = ADDXri $x0, 32, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
STGOffset $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_pre_back
# CHECK: STGPpre $x1, $x2, $x0, -3
name: test_STGP_pre_back
body: |
bb.0.entry:
liveins: $x0, $x1, $x2
$x0 = SUBXri $x0, 48, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
STGPi $x1, $x2, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
### STGP with source register == address register
# CHECK-LABEL: name: test_STGP_post_same_reg
# CHECK: STGPpost $x0, $x0, $x0, 7
name: test_STGP_post_same_reg
body: |
bb.0.entry:
liveins: $x0
STGPi $x0, $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# CHECK-LABEL: name: test_STGP_pre_same_reg
# CHECK: STGPpre $x0, $x0, $x0, 7
name: test_STGP_pre_same_reg
body: |
bb.0.entry:
liveins: $x0
STGPi $x0, $x0, $x0, 7
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
$x0 = ADDXri $x0, 112, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...
# This case can not be merged because the source register is always read before writeback.
# CHECK-LABEL: name: test_STGP_pre_back_same_reg
# CHECK: SUBXri $x0, 48, 0
# CHECK-NEXT: STGPi $x0, $x0, $x0, 0
name: test_STGP_pre_back_same_reg
body: |
bb.0.entry:
liveins: $x0
$x0 = SUBXri $x0, 48, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
STGPi $x0, $x0, $x0, 0
DBG_VALUE $x0, 0
DBG_VALUE $x0, 0
RET_ReallyLR implicit $x0
...

View File

@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
---
### STG and its offset limits