1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00
llvm-mirror/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Matt Arsenault 4f44d0e3e0 AMDGPU: Remove tfe bit from flat instruction definitions
We don't use it and it was removed in gfx9, and the encoding
bit repurposed.

Additionally actually using it requires changing the output register
class, which wasn't done anyway.

llvm-svn: 302814
2017-05-11 17:38:33 +00:00

424 lines
14 KiB
C++

//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
/// AMDGPU.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AMDGPUInstructionSelector.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-isel"
using namespace llvm;
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
: InstructionSelector(), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
MachineOperand
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
unsigned SubIdx) const {
MachineInstr *MI = MO.getParent();
MachineBasicBlock *BB = MO.getParent()->getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
if (MO.isReg()) {
unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
unsigned Reg = MO.getReg();
BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
.addReg(Reg, 0, ComposedSubIdx);
return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
MO.isKill(), MO.isDead(), MO.isUndef(),
MO.isEarlyClobber(), 0, MO.isDebug(),
MO.isInternalRead());
}
assert(MO.isImm());
APInt Imm(64, MO.getImm());
switch (SubIdx) {
default:
llvm_unreachable("do not know to split immediate with this sub index.");
case AMDGPU::sub0:
return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
case AMDGPU::sub1:
return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
}
}
bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
if (Size != 64)
return false;
DebugLoc DL = I.getDebugLoc();
MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
.add(Lo1)
.add(Lo2);
MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
.add(Hi1)
.add(Hi2);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
.addReg(DstLo)
.addImm(AMDGPU::sub0)
.addReg(DstHi)
.addImm(AMDGPU::sub1);
for (MachineOperand &MO : I.explicit_operands()) {
if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
continue;
RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
}
I.eraseFromParent();
return true;
}
bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
return selectG_ADD(I);
}
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
DebugLoc DL = I.getDebugLoc();
// FIXME: Select store instruction based on address space
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
.add(I.getOperand(1))
.add(I.getOperand(0))
.addImm(0)
.addImm(0);
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned DstReg = I.getOperand(0).getReg();
unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
if (Size == 32) {
I.setDesc(TII.get(AMDGPU::S_MOV_B32));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
assert(Size == 64);
DebugLoc DL = I.getDebugLoc();
unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
const APInt &Imm = I.getOperand(1).getCImm()->getValue();
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
.addImm(Imm.trunc(32).getZExtValue());
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
.addImm(Imm.ashr(32).getZExtValue());
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
.addReg(LoReg)
.addImm(AMDGPU::sub0)
.addReg(HiReg)
.addImm(AMDGPU::sub1);
// We can't call constrainSelectedInstRegOperands here, because it doesn't
// work for target independent opcodes
I.eraseFromParent();
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
}
static bool isConstant(const MachineInstr &MI) {
return MI.getOpcode() == TargetOpcode::G_CONSTANT;
}
void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
assert(PtrMI);
if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
return;
GEPInfo GEPInfo(*PtrMI);
for (unsigned i = 1, e = 3; i < e; ++i) {
const MachineOperand &GEPOp = PtrMI->getOperand(i);
const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
assert(OpDef);
if (isConstant(*OpDef)) {
// FIXME: Is it possible to have multiple Imm parts? Maybe if we
// are lacking other optimizations.
assert(GEPInfo.Imm == 0);
GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
continue;
}
const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
if (OpBank->getID() == AMDGPU::SGPRRegBankID)
GEPInfo.SgprParts.push_back(GEPOp.getReg());
else
GEPInfo.VgprParts.push_back(GEPOp.getReg());
}
AddrInfo.push_back(GEPInfo);
getAddrModeInfo(*PtrMI, MRI, AddrInfo);
}
static bool isInstrUniform(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
const Value *Ptr = MMO->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.
// Sometimes LDS instructions have constant pointers.
// If Ptr is null, then that means this mem operand contains a
// PseudoSourceValue like GOT.
if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
return true;
const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}
static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
if (LoadSize == 32)
return BaseOpcode;
switch (BaseOpcode) {
case AMDGPU::S_LOAD_DWORD_IMM:
switch (LoadSize) {
case 64:
return AMDGPU::S_LOAD_DWORDX2_IMM;
case 128:
return AMDGPU::S_LOAD_DWORDX4_IMM;
case 256:
return AMDGPU::S_LOAD_DWORDX8_IMM;
case 512:
return AMDGPU::S_LOAD_DWORDX16_IMM;
}
break;
case AMDGPU::S_LOAD_DWORD_IMM_ci:
switch (LoadSize) {
case 64:
return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
case 128:
return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
case 256:
return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
case 512:
return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
}
break;
case AMDGPU::S_LOAD_DWORD_SGPR:
switch (LoadSize) {
case 64:
return AMDGPU::S_LOAD_DWORDX2_SGPR;
case 128:
return AMDGPU::S_LOAD_DWORDX4_SGPR;
case 256:
return AMDGPU::S_LOAD_DWORDX8_SGPR;
case 512:
return AMDGPU::S_LOAD_DWORDX16_SGPR;
}
break;
}
llvm_unreachable("Invalid base smrd opcode or size");
}
bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
for (const GEPInfo &GEPInfo : AddrInfo) {
if (!GEPInfo.VgprParts.empty())
return true;
}
return false;
}
bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
ArrayRef<GEPInfo> AddrInfo) const {
if (!I.hasOneMemOperand())
return false;
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
return false;
if (!isInstrUniform(I))
return false;
if (hasVgprParts(AddrInfo))
return false;
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned DstReg = I.getOperand(0).getReg();
const DebugLoc &DL = I.getDebugLoc();
unsigned Opcode;
unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
const GEPInfo &GEPInfo = AddrInfo[0];
unsigned PtrReg = GEPInfo.SgprParts[0];
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
.addReg(PtrReg)
.addImm(EncodedImm)
.addImm(0); // glc
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
}
if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
isUInt<32>(EncodedImm)) {
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
.addReg(PtrReg)
.addImm(EncodedImm)
.addImm(0); // glc
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
}
if (isUInt<32>(GEPInfo.Imm)) {
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(GEPInfo.Imm);
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
.addReg(PtrReg)
.addReg(OffsetReg)
.addImm(0); // glc
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
}
}
unsigned PtrReg = I.getOperand(1).getReg();
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
.addReg(PtrReg)
.addImm(0)
.addImm(0); // glc
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = I.getDebugLoc();
unsigned DstReg = I.getOperand(0).getReg();
unsigned PtrReg = I.getOperand(1).getReg();
unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
unsigned Opcode;
SmallVector<GEPInfo, 4> AddrInfo;
getAddrModeInfo(I, MRI, AddrInfo);
if (selectSMRD(I, AddrInfo)) {
I.eraseFromParent();
return true;
}
switch (LoadSize) {
default:
llvm_unreachable("Load size not supported\n");
case 32:
Opcode = AMDGPU::FLAT_LOAD_DWORD;
break;
case 64:
Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
break;
}
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
.add(I.getOperand(0))
.addReg(PtrReg)
.addImm(0)
.addImm(0);
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
if (!isPreISelGenericOpcode(I.getOpcode()))
return true;
switch (I.getOpcode()) {
default:
break;
case TargetOpcode::G_ADD:
return selectG_ADD(I);
case TargetOpcode::G_CONSTANT:
return selectG_CONSTANT(I);
case TargetOpcode::G_GEP:
return selectG_GEP(I);
case TargetOpcode::G_LOAD:
return selectG_LOAD(I);
case TargetOpcode::G_STORE:
return selectG_STORE(I);
}
return false;
}