mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
4b0ec23e84
This is to allow 64 bit constant rematerialization. If a constant is split into two separate moves initializing sub0 and sub1 like now RA cannot rematerizalize a 64 bit register. This gives 10-20% uplift in a set of huge apps heavily using double precession math. Fixes: SWDEV-292645 Differential Revision: https://reviews.llvm.org/D104874
163 lines
4.8 KiB
C++
163 lines
4.8 KiB
C++
//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// This pass combines split register tuple initialization into a single psuedo:
|
|
///
|
|
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
|
|
/// %0.sub0:sreg_64 = S_MOV_B32 2
|
|
/// =>
|
|
/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
|
|
///
|
|
/// This is to allow rematerialization of a value instead of spilling. It is
|
|
/// supposed to be done after register coalescer to allow it to do its job and
|
|
/// before actual register allocation to allow rematerialization.
|
|
///
|
|
/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
|
|
/// although the same shall be possible with other register classes and
|
|
/// instructions if necessary.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
|
|
|
|
namespace {
|
|
|
|
class GCNPreRAOptimizations : public MachineFunctionPass {
|
|
private:
|
|
const SIInstrInfo *TII;
|
|
MachineRegisterInfo *MRI;
|
|
LiveIntervals *LIS;
|
|
|
|
bool processReg(Register Reg);
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
GCNPreRAOptimizations() : MachineFunctionPass(ID) {
|
|
initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
StringRef getPassName() const override {
|
|
return "AMDGPU Pre-RA optimizations";
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<LiveIntervals>();
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
|
|
"AMDGPU Pre-RA optimizations", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
|
INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
|
|
false, false)
|
|
|
|
char GCNPreRAOptimizations::ID = 0;
|
|
|
|
char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
|
|
|
|
FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
|
|
return new GCNPreRAOptimizations();
|
|
}
|
|
|
|
bool GCNPreRAOptimizations::processReg(Register Reg) {
|
|
MachineInstr *Def0 = nullptr;
|
|
MachineInstr *Def1 = nullptr;
|
|
uint64_t Init = 0;
|
|
|
|
for (MachineInstr &I : MRI->def_instructions(Reg)) {
|
|
if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
|
|
!I.getOperand(1).isImm() || I.getNumOperands() != 2)
|
|
return false;
|
|
|
|
switch (I.getOperand(0).getSubReg()) {
|
|
default:
|
|
return false;
|
|
case AMDGPU::sub0:
|
|
if (Def0)
|
|
return false;
|
|
Def0 = &I;
|
|
Init |= I.getOperand(1).getImm() & 0xffffffff;
|
|
break;
|
|
case AMDGPU::sub1:
|
|
if (Def1)
|
|
return false;
|
|
Def1 = &I;
|
|
Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
|
|
<< " =>\n");
|
|
|
|
if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
|
|
LIS->getInstructionIndex(*Def0)))
|
|
std::swap(Def0, Def1);
|
|
|
|
LIS->RemoveMachineInstrFromMaps(*Def0);
|
|
LIS->RemoveMachineInstrFromMaps(*Def1);
|
|
auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
|
|
TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
|
|
.addImm(Init);
|
|
|
|
Def0->eraseFromParent();
|
|
Def1->eraseFromParent();
|
|
LIS->InsertMachineInstrInMaps(*NewI);
|
|
LIS->removeInterval(Reg);
|
|
LIS->createAndComputeVirtRegInterval(Reg);
|
|
|
|
LLVM_DEBUG(dbgs() << " " << *NewI);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
TII = ST.getInstrInfo();
|
|
MRI = &MF.getRegInfo();
|
|
LIS = &getAnalysis<LiveIntervals>();
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
bool Changed = false;
|
|
|
|
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
|
|
Register Reg = Register::index2VirtReg(I);
|
|
if (!LIS->hasInterval(Reg))
|
|
continue;
|
|
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
|
if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
|
|
continue;
|
|
Changed |= processReg(Reg);
|
|
}
|
|
|
|
return Changed;
|
|
}
|