1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00
llvm-mirror/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
Stanislav Mekhanoshin 4b0ec23e84 [AMDGPU] Add S_MOV_B64_IMM_PSEUDO for wide constants
This is to allow 64 bit constant rematerialization. If a constant
is split into two separate moves initializing sub0 and sub1 like
now RA cannot rematerizalize a 64 bit register.

This gives 10-20% uplift in a set of huge apps heavily using double
precession math.

Fixes: SWDEV-292645

Differential Revision: https://reviews.llvm.org/D104874
2021-06-30 11:45:38 -07:00

163 lines
4.8 KiB
C++

//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass combines split register tuple initialization into a single psuedo:
///
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
/// %0.sub0:sreg_64 = S_MOV_B32 2
/// =>
/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
///
/// This is to allow rematerialization of a value instead of spilling. It is
/// supposed to be done after register coalescer to allow it to do its job and
/// before actual register allocation to allow rematerialization.
///
/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
/// although the same shall be possible with other register classes and
/// instructions if necessary.
///
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
namespace {
class GCNPreRAOptimizations : public MachineFunctionPass {
private:
const SIInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
bool processReg(Register Reg);
public:
static char ID;
GCNPreRAOptimizations() : MachineFunctionPass(ID) {
initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Pre-RA optimizations";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
"AMDGPU Pre-RA optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
false, false)
char GCNPreRAOptimizations::ID = 0;
char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
return new GCNPreRAOptimizations();
}
bool GCNPreRAOptimizations::processReg(Register Reg) {
MachineInstr *Def0 = nullptr;
MachineInstr *Def1 = nullptr;
uint64_t Init = 0;
for (MachineInstr &I : MRI->def_instructions(Reg)) {
if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
!I.getOperand(1).isImm() || I.getNumOperands() != 2)
return false;
switch (I.getOperand(0).getSubReg()) {
default:
return false;
case AMDGPU::sub0:
if (Def0)
return false;
Def0 = &I;
Init |= I.getOperand(1).getImm() & 0xffffffff;
break;
case AMDGPU::sub1:
if (Def1)
return false;
Def1 = &I;
Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
break;
}
}
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
return false;
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
<< " =>\n");
if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
LIS->getInstructionIndex(*Def0)))
std::swap(Def0, Def1);
LIS->RemoveMachineInstrFromMaps(*Def0);
LIS->RemoveMachineInstrFromMaps(*Def1);
auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
.addImm(Init);
Def0->eraseFromParent();
Def1->eraseFromParent();
LIS->InsertMachineInstrInMaps(*NewI);
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
LLVM_DEBUG(dbgs() << " " << *NewI);
return true;
}
bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
bool Changed = false;
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
Register Reg = Register::index2VirtReg(I);
if (!LIS->hasInterval(Reg))
continue;
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
continue;
Changed |= processReg(Reg);
}
return Changed;
}