1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

AMDGPU/GlobalISel: Refactor special argument management

This commit is contained in:
Matt Arsenault 2020-07-20 15:56:39 -04:00
parent 72b7484d1d
commit e11e91bb11
3 changed files with 46 additions and 60 deletions

View File

@ -993,7 +993,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
if (IncomingArg) {
LI->loadInputValue(InputReg, MIRBuilder, IncomingArg);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
} else {
assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
@ -1026,13 +1026,16 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
if (!OutgoingArg)
return false;
const ArgDescriptor *IncomingArgX = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
const ArgDescriptor *IncomingArgY = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
const ArgDescriptor *IncomingArgZ = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
auto WorkitemIDX =
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
auto WorkitemIDY =
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
auto WorkitemIDZ =
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
const LLT S32 = LLT::scalar(32);
// If incoming ids are not packed we need to pack them.
@ -1040,12 +1043,14 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
Register InputReg;
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
InputReg = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
@ -1053,7 +1058,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
@ -1067,7 +1073,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
ArgDescriptor IncomingArg = ArgDescriptor::createArg(
IncomingArgX ? *IncomingArgX :
IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg);
LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
&AMDGPU::VGPR_32RegClass, S32);
}
if (OutgoingArg->isRegister()) {

View File

@ -1630,8 +1630,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register QueuePtr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr))
if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return Register();
// Offset into amd_queue_t for group_segment_aperture_base_hi /
@ -2496,33 +2495,16 @@ Register AMDGPULegalizerInfo::getLiveInRegister(MachineIRBuilder &B,
return insertLiveInCopy(B, MRI, LiveIn, PhyReg);
}
const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
const ArgDescriptor *Arg;
const TargetRegisterClass *RC;
LLT ArgTy;
std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
if (!Arg) {
LLVM_DEBUG(dbgs() << "Required arg register missing\n");
return nullptr;
}
return Arg;
}
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg) const {
if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
Register SrcReg = Arg->getRegister();
const ArgDescriptor *Arg,
const TargetRegisterClass *ArgRC,
LLT ArgTy) const {
MCRegister SrcReg = Arg->getRegister();
assert(SrcReg.isPhysical() && "Physical register expected");
assert(DstReg.isVirtual() && "Virtual register expected");
MachineRegisterInfo &MRI = *B.getMRI();
LLT Ty = MRI.getType(DstReg);
Register LiveIn = getLiveInRegister(B, MRI, SrcReg, Ty);
Register LiveIn = getLiveInRegister(B, MRI, SrcReg, ArgTy);
if (Arg->isMasked()) {
// TODO: Should we try to emit this once in the entry block?
@ -2545,15 +2527,24 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
return true;
}
bool AMDGPULegalizerInfo::loadInputValue(
Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
const ArgDescriptor *Arg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy);
}
bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
const ArgDescriptor *Arg = getArgDescriptor(B, ArgType);
if (!Arg)
return false;
if (!loadInputValue(MI.getOperand(0).getReg(), B, Arg))
if (!loadInputValue(MI.getOperand(0).getReg(), B, ArgType))
return false;
MI.eraseFromParent();
@ -3165,23 +3156,15 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
uint64_t Offset =
ST.getTargetLowering()->getImplicitParameterOffset(
B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
LLT DstTy = MRI.getType(DstReg);
LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
const ArgDescriptor *Arg;
const TargetRegisterClass *RC;
LLT ArgTy;
std::tie(Arg, RC, ArgTy) =
MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
if (!Arg)
return false;
Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
if (!loadInputValue(KernargPtrReg, B, Arg))
if (!loadInputValue(KernargPtrReg, B,
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
return false;
// FIXME: This should be nuw
@ -4169,16 +4152,12 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
} else {
// Pass queue pointer to trap handler as input, and insert trap instruction
// Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
const ArgDescriptor *Arg =
getArgDescriptor(B, AMDGPUFunctionArgInfo::QUEUE_PTR);
if (!Arg)
return false;
MachineRegisterInfo &MRI = *B.getMRI();
Register SGPR01(AMDGPU::SGPR0_SGPR1);
Register LiveIn = getLiveInRegister(
B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64),
/*InsertLiveInCopy=*/false);
if (!loadInputValue(LiveIn, B, Arg))
if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return false;
B.buildCopy(SGPR01, LiveIn);
B.buildInstr(AMDGPU::S_TRAP)

View File

@ -91,11 +91,11 @@ public:
bool InsertLiveInCopy = true) const;
Register insertLiveInCopy(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register LiveIn, Register PhyReg) const;
const ArgDescriptor *
getArgDescriptor(MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg) const;
const ArgDescriptor *Arg,
const TargetRegisterClass *ArgRC, LLT ArgTy) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
bool legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;