1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Fold redundant load immediates of zero and delete if possible

This patch folds redundant load immediates into a zero for instructions
which recognise this as the value zero and not the register. If the load
immediate is no longer in use it is then deleted.

This is already done in earlier passes but the ppc-mi-peephole allows for
a more general implementation.

Differential Revision: https://reviews.llvm.org/D69168
This commit is contained in:
Kamau Bridgeman 2020-05-12 13:15:06 -05:00
parent 5e4512f721
commit 563aac5470
6 changed files with 82 additions and 13 deletions

View File

@ -1350,9 +1350,11 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
// For some instructions, it is legal to fold ZERO into the RA register field.
// For some instructions, it is legal to fold ZERO into the RA register field.
// This function performs that fold by replacing the operand with PPC::ZERO,
// it does not consider whether the load immediate zero is no longer in use.
bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg) const {
// A zero immediate should always be loaded with a single li.
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
@ -1372,6 +1374,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (UseMCID.isPseudo())
return false;
// We need to find which of the User's operands is to be folded, that will be
// the operand that matches the given register ID.
unsigned UseIdx;
for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
if (UseMI.getOperand(UseIdx).isReg() &&
@ -1409,15 +1413,21 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
PPC::ZERO8 : PPC::ZERO;
}
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.getOperand(UseIdx).setReg(ZeroReg);
if (DeleteDef)
DefMI.eraseFromParent();
return true;
}
// Folds zero into instructions which have a load immediate zero as an operand
// but also recognize zero as immediate zero. If the definition of the load
// has no more users it is deleted.
bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
if (MRI->use_nodbg_empty(Reg))
DefMI.eraseFromParent();
return Changed;
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I)

View File

@ -336,6 +336,9 @@ public:
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg) const;
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.

View File

@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
"Number of pairs of rotate left, clear left/right collapsed");
STATISTIC(NumEXTSWAndSLDICombined,
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
STATISTIC(NumLoadImmZeroFoldedAndRemoved,
"Number of LI(8) reg, 0 that are folded to r0 and removed");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@ -319,7 +321,22 @@ bool PPCMIPeephole::simplifyCode(void) {
default:
break;
case PPC::LI:
case PPC::LI8: {
// If we are materializing a zero, look for any use operands for which
// zero means immediate zero. All such operands can be replaced with
// PPC::ZERO.
if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
break;
unsigned MIDestReg = MI.getOperand(0).getReg();
for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
if (MRI->use_nodbg_empty(MIDestReg)) {
++NumLoadImmZeroFoldedAndRemoved;
ToErase = &MI;
}
break;
}
case PPC::STD: {
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.hasVarSizedObjects() ||

View File

@ -1615,7 +1615,7 @@ body: |
%0 = LI8 89
%2 = CMPDI %0, 87
%4 = ISEL8 $zero8, %0, %2.sub_gt
; CHECK: LI8 0
; CHECK: ADDI8 %1, 0
%5 = ADD8 killed %4, %1
$x3 = COPY %5
BLR8 implicit $lr8, implicit $rm, implicit $x3
@ -2017,7 +2017,7 @@ body: |
%3 = LI -3
%4 = CMPLWI %3, 87
%6 = ISEL $zero, %3, %4.sub_gt
; CHECK: LI 0
; CHECK: ADDI killed %2, 0
%7 = ADD4 killed %6, killed %2
%9 = IMPLICIT_DEF
%8 = INSERT_SUBREG %9, killed %7, 1

View File

@ -0,0 +1,40 @@
; NOTE: This test verifies that a redundant load immediate of zero is folded
; NOTE: from its use in an isel and deleted as it is no longer in use.
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
%0 = type { i32, i16 }
@val = common dso_local local_unnamed_addr global %0* null, align 8
define dso_local signext i32 @redunLoadImm(%0* %arg) {
; CHECK-LABEL: redunLoadImm:
; verify that the load immediate has been folded into the isel and deleted
; CHECK-NOT: li r[[REG1:[0-9]+]], 0
; CHECK: isel r[[REG2:[0-9]+]], 0, r[[REG3:[0-9]+]], eq
bb:
%tmp = icmp eq %0* %arg, null
br i1 %tmp, label %bb9, label %bb1
bb1: ; preds = %bb
%tmp2 = getelementptr inbounds %0, %0* %arg, i64 0, i32 1
br label %bb3
bb3: ; preds = %bb3, %bb1
%tmp4 = load i16, i16* %tmp2, align 4
%tmp5 = sext i16 %tmp4 to i64
%tmp6 = getelementptr inbounds %0, %0* %arg, i64 %tmp5
%tmp7 = icmp eq i16 %tmp4, 0
%tmp8 = select i1 %tmp7, %0* null, %0* %tmp6
store %0* %tmp8, %0** @val, align 8
br label %bb3
bb9: ; preds = %bb
%tmp10 = load %0*, %0** @val, align 8
%tmp11 = getelementptr inbounds %0, %0* %tmp10, i64 0, i32 0
%tmp12 = load i32, i32* %tmp11, align 4
ret i32 %tmp12
}

View File

@ -13,8 +13,7 @@
; CHECK: addic 29, 0, 20
; Save CR through R12 using R29 as the stack pointer (aligned base pointer).
; CHECK: mfcr 12
; CHECK: stw 28, -24(29)
; CHECK: stw 12, -28(29)
; CHECK: stw 12, -24(29)
target datalayout = "E-m:e-p:32:32-i64:64-n32"
target triple = "powerpc-unknown-freebsd"