1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/lib/Target/AMDGPU/SIAddIMGInit.cpp
Stanislav Mekhanoshin 53eb610c7c [AMDGPU] Split R600 and GCN subregs
These are generated and do not need to have the same values.
We are defining separate subregs for R600 and GCN but then
using AMDGPU subregs on R600.

Differential Revision: https://reviews.llvm.org/D74248
2020-02-10 08:29:56 -08:00

177 lines
6.2 KiB
C++

//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Any MIMG instructions that use tfe or lwe require an initialization of the
/// result register that will be written in the case of a memory access failure
/// The required code is also added to tie this init code to the result of the
/// img instruction
///
//===----------------------------------------------------------------------===//
//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-img-init"
using namespace llvm;
namespace {
class SIAddIMGInit : public MachineFunctionPass {
public:
static char ID;
public:
SIAddIMGInit() : MachineFunctionPass(ID) {
initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
char SIAddIMGInit::ID = 0;
char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *RI = ST.getRegisterInfo();
bool Changed = false;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
auto Opcode = MI.getOpcode();
if (TII->isMIMG(Opcode) && !MI.mayStore()) {
MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
// Check for instructions that don't have tfe or lwe fields
// There shouldn't be any at this point.
assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
unsigned TFEVal = TFE->getImm();
unsigned LWEVal = LWE->getImm();
unsigned D16Val = D16 ? D16->getImm() : 0;
if (TFEVal || LWEVal) {
// At least one of TFE or LWE are non-zero
// We have to insert a suitable initialization of the result value and
// tie this to the dest of the image instruction.
const DebugLoc &DL = MI.getDebugLoc();
int DstIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
// Calculate which dword we have to initialize to 0.
MachineOperand *MO_Dmask =
TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
// check that dmask operand is found.
assert(MO_Dmask && "Expected dmask operand in instruction");
unsigned dmask = MO_Dmask->getImm();
// Determine the number of active lanes taking into account the
// Gather4 special case
unsigned ActiveLanes =
TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
bool Packed = !ST.hasUnpackedD16VMem();
unsigned InitIdx =
D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
// Abandon attempt if the dst size isn't large enough
// - this is in fact an error but this is picked up elsewhere and
// reported correctly.
uint32_t DstSize =
RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
if (DstSize < InitIdx)
continue;
// Create a register for the intialization value.
Register PrevDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
// If PRTStrictNull feature is enabled (the default) then initialize
// all the result registers to 0, otherwise just the error indication
// register (VGPRn+1)
unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1);
if (DstSize == 1) {
// In this case we can just initialize the result directly
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
.addImm(0);
NewDst = PrevDst;
} else {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
for (; SizeLeft; SizeLeft--, CurrIdx++) {
NewDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
// Initialize dword
Register SubReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
.addImm(0);
// Insert into the super-reg
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
.addReg(PrevDst)
.addReg(SubReg)
.addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
PrevDst = NewDst;
}
}
// Add as an implicit operand
MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
// Tie the just added implicit operand to the dst
MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
Changed = true;
}
}
}
}
return Changed;
}