1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[PowerPC] Add peephole to remove redundant accumulator prime/unprime instructions

In some situations, the compiler may insert an accumulator prime instruction and
an accumulator unprime instruction with no use of that accumulator between the two.
That's for example the case when we store an accumulator after assembling it or
restoring it. This patch adds a peephole to remove these prime and unprime instructions.

Differential Revision: https://reviews.llvm.org/D91386
This commit is contained in:
Baptiste Saleil 2020-11-18 14:58:28 -06:00
parent 78115bb465
commit fdfa32d644
3 changed files with 59 additions and 34 deletions

View File

@ -349,6 +349,64 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
return MadeChange;
}
// This function removes redundant pairs of accumulator prime/unprime
// instructions. In some situations, it's possible the compiler inserts an
// accumulator prime instruction followed by an unprime instruction (e.g.
// when we store an accumulator after restoring it from a spill). If the
// accumulator is not used between the two, they can be removed. This
// function removes these redundant pairs from basic blocks.
// The algorithm is quite straightforward - every time we encounter a prime
// instruction, the primed register is added to a candidate set. Any use
// other than a prime removes the candidate from the set and any de-prime
// of a current candidate marks both the prime and de-prime for removal.
// This way we ensure we only remove prime/de-prime *pairs* with no
// intervening uses.
bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
DenseSet<MachineInstr *> InstrsToErase;
// Initially, none of the acc registers are candidates.
SmallVector<MachineInstr *, 8> Candidates(
PPC::UACCRCRegClass.getNumRegs(), nullptr);
for (MachineInstr &BBI : MBB.instrs()) {
unsigned Opc = BBI.getOpcode();
// If we are visiting a xxmtacc instruction, we add it and its operand
// register to the candidate set.
if (Opc == PPC::XXMTACC) {
Register Acc = BBI.getOperand(0).getReg();
assert(PPC::ACCRCRegClass.contains(Acc) &&
"Unexpected register for XXMTACC");
Candidates[Acc - PPC::ACC0] = &BBI;
}
// If we are visiting a xxmfacc instruction and its operand register is
// in the candidate set, we mark the two instructions for removal.
else if (Opc == PPC::XXMFACC) {
Register Acc = BBI.getOperand(0).getReg();
assert(PPC::ACCRCRegClass.contains(Acc) &&
"Unexpected register for XXMFACC");
if (!Candidates[Acc - PPC::ACC0])
continue;
InstrsToErase.insert(&BBI);
InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
}
// If we are visiting an instruction using an accumulator register
// as operand, we remove it from the candidate set.
else {
for (MachineOperand &Operand : BBI.operands()) {
if (!Operand.isReg())
continue;
Register Reg = Operand.getReg();
if (PPC::ACCRCRegClass.contains(Reg))
Candidates[Reg - PPC::ACC0] = nullptr;
}
}
}
for (MachineInstr *MI : InstrsToErase)
MI->eraseFromParent();
NumRemovedInPreEmit += InstrsToErase.size();
return !InstrsToErase.empty();
}
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
@ -370,6 +428,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
Changed |= addLinkerOpt(MBB, TRI);
Changed |= removeAccPrimeUnprime(MBB);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {

View File

@ -16,8 +16,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: plxv vs0, f@PCREL+112(0), 1
; LE-PAIRED-NEXT: plxv vs3, f@PCREL+64(0), 1
; LE-PAIRED-NEXT: plxv vs2, f@PCREL+80(0), 1
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+176(0), 1
; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+160(0), 1
; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+144(0), 1
@ -32,8 +30,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: lxv vs0, 64(r3)
; BE-PAIRED-NEXT: lxv vs3, 112(r3)
; BE-PAIRED-NEXT: lxv vs2, 96(r3)
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxv vs1, 144(r3)
; BE-PAIRED-NEXT: stxv vs0, 128(r3)
; BE-PAIRED-NEXT: stxv vs3, 176(r3)
@ -58,8 +54,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: lxvx vs3, r5, r3
; LE-PAIRED-NEXT: lxv vs2, 16(r6)
; LE-PAIRED-NEXT: sldi r3, r4, 6
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: stxvx vs3, r5, r3
; LE-PAIRED-NEXT: add r3, r5, r3
; LE-PAIRED-NEXT: stxv vs0, 48(r3)
@ -78,8 +72,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: lxv vs1, 16(r6)
; BE-PAIRED-NEXT: lxv vs3, 48(r6)
; BE-PAIRED-NEXT: lxv vs2, 32(r6)
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxvx vs0, r5, r3
; BE-PAIRED-NEXT: add r3, r5, r3
; BE-PAIRED-NEXT: stxv vs1, 16(r3)
@ -101,8 +93,6 @@ define void @testUnalignedLdSt() {
; LE-PAIRED-NEXT: plxv vs0, f@PCREL+59(0), 1
; LE-PAIRED-NEXT: plxv vs3, f@PCREL+11(0), 1
; LE-PAIRED-NEXT: plxv vs2, f@PCREL+27(0), 1
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+67(0), 1
; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+51(0), 1
; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+35(0), 1
@ -122,8 +112,6 @@ define void @testUnalignedLdSt() {
; BE-PAIRED-NEXT: li r4, 59
; BE-PAIRED-NEXT: lxvx vs3, r3, r4
; BE-PAIRED-NEXT: li r4, 35
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxvx vs1, r3, r4
; BE-PAIRED-NEXT: li r4, 19
; BE-PAIRED-NEXT: stxvx vs0, r3, r4

View File

@ -16,8 +16,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
@ -31,8 +29,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
@ -77,8 +73,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
@ -93,8 +87,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
@ -119,9 +111,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
@ -135,9 +124,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
@ -262,8 +248,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
; CHECK-NEXT: xvi4ger8pp acc0, v2, v2
; CHECK-NEXT: .LBB7_3: # %if.end
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
@ -286,8 +270,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2
; CHECK-BE-NEXT: .LBB7_3: # %if.end
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
@ -637,8 +619,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-NEXT: lxvp vsp0, r1(r3)
; CHECK-NEXT: li r3, 32
; CHECK-NEXT: lxvp vsp2, r1(r3)
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 112(r30)
; CHECK-NEXT: stxv vs1, 96(r30)
; CHECK-NEXT: stxv vs2, 80(r30)
@ -675,8 +655,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-BE-NEXT: lxvp vsp0, r1(r3)
; CHECK-BE-NEXT: li r3, 144
; CHECK-BE-NEXT: lxvp vsp2, r1(r3)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs3, 112(r30)
; CHECK-BE-NEXT: stxv vs2, 96(r30)
; CHECK-BE-NEXT: stxv vs1, 80(r30)