1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

AMDGPU: Refactor kernel argument lowering

Summary:
The main challenge in lowering kernel arguments for AMDGPU is determing the
memory type of the argument.  The generic calling convention code assumes
that only legal register types can be stored in memory, but this is not the
case for AMDGPU.

This consolidates all the logic AMDGPU uses for deducing memory types into a single
function.  This will make it much easier to support different ABIs in the future.

Reviewers: arsenm

Subscribers: arsenm, wdng, nhaehnle, llvm-commits, yaxunl

Differential Revision: https://reviews.llvm.org/D24614

llvm-svn: 281781
This commit is contained in:
Tom Stellard 2016-09-16 21:53:00 +00:00
parent 6fa6edbc52
commit d9bf037744
5 changed files with 110 additions and 55 deletions

View File

@ -37,7 +37,7 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
MachineFunction &MF = State.getMachineFunction();
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
@ -626,9 +626,104 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types. However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.
/// In order to handle this properly we need to get the original types sizes
/// from the LLVM IR Function and fixup the ISD:InputArg values before
/// passing them to AnalyzeFormalArguments()
/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
/// input values across multiple registers. Each item in the Ins array
/// represents a single value that will be stored in regsters. Ins[x].VT is
/// the value type of the value that will be stored in the register, so
/// whatever SDNode we lower the argument to needs to be this type.
///
/// In order to correctly lower the arguments we need to know the size of each
/// argument. Since Ins[x].VT gives us the size of the register that will
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
/// for the orignal function argument so that we can deduce the correct memory
/// type to use for Ins[x]. In most cases the correct memory type will be
/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
/// we have a kernel argument of type v8i8, this argument will be split into
/// 8 parts and each part will be represented by its own item in the Ins array.
/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
/// the argument before it was split. From this, we deduce that the memory type
/// for each individual part is i8. We pass the memory type as LocVT to the
/// calling convention analysis function and the register type (Ins[x].VT) as
/// the ValVT.
void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
const ISD::InputArg &In = Ins[i];
EVT MemVT;
unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT);
if (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16) {
// The ABI says the caller will extend these values to 32-bits.
MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32;
} else if (NumRegs == 1) {
// This argument is not split, so the IR type is the memory type.
assert(!In.Flags.isSplit());
if (In.ArgVT.isExtended()) {
// We have an extended type, like i24, so we should just use the register type
MemVT = In.VT;
} else {
MemVT = In.ArgVT;
}
} else if (In.ArgVT.isVector() && In.VT.isVector() &&
In.ArgVT.getScalarType() == In.VT.getScalarType()) {
assert(In.ArgVT.getVectorNumElements() > In.VT.getVectorNumElements());
// We have a vector value which has been split into a vector with
// the same scalar type, but fewer elements. This should handle
// all the floating-point vector types.
MemVT = In.VT;
} else if (In.ArgVT.isVector() &&
In.ArgVT.getVectorNumElements() == NumRegs) {
// This arg has been split so that each element is stored in a separate
// register.
MemVT = In.ArgVT.getScalarType();
} else if (In.ArgVT.isExtended()) {
// We have an extended type, like i65.
MemVT = In.VT;
} else {
unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs;
assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0);
if (In.VT.isInteger()) {
MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
} else if (In.VT.isVector()) {
assert(!In.VT.getScalarType().isFloatingPoint());
unsigned NumElements = In.VT.getVectorNumElements();
assert(MemoryBits % NumElements == 0);
// This vector type has been split into another vector type with
// a different elements size.
EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
MemoryBits / NumElements);
MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
} else {
llvm_unreachable("cannot deduce memory type.");
}
}
// Convert one element vectors to scalar.
if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
MemVT = MemVT.getScalarType();
if (MemVT.isExtended()) {
// This should really only happen if we have vec3 arguments
assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
MemVT = MemVT.getPow2VectorType(State.getContext());
}
assert(MemVT.isSimple());
allocateKernArg(i, In.VT, MemVT.getSimpleVT(), CCValAssign::Full, In.Flags,
State);
}
}
void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
@ -2617,38 +2712,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
// Helper functions
//===----------------------------------------------------------------------===//
void AMDGPUTargetLowering::getOriginalFunctionArgs(
SelectionDAG &DAG,
const Function *F,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<ISD::InputArg> &OrigIns) const {
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
if (Ins[i].ArgVT == Ins[i].VT) {
OrigIns.push_back(Ins[i]);
continue;
}
EVT VT;
if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
// Vector has been split into scalars.
VT = Ins[i].ArgVT.getVectorElementType();
} else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
Ins[i].ArgVT.getVectorElementType() !=
Ins[i].VT.getVectorElementType()) {
// Vector elements have been promoted
VT = Ins[i].ArgVT;
} else {
// Vector has been spilt into smaller vectors.
VT = Ins[i].VT;
}
ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
Ins[i].OrigArgIndex, Ins[i].PartOffset);
OrigIns.push_back(Arg);
}
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {

View File

@ -100,16 +100,8 @@ protected:
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types. However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.
/// In order to handle this properly we need to get the origianl types sizes
/// from the LLVM IR Function and fixup the ISD:InputArg values before
/// passing them to AnalyzeFormalArguments()
void getOriginalFunctionArgs(SelectionDAG &DAG,
const Function *F,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<ISD::InputArg> &OrigIns) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeReturn(CCState &State,

View File

@ -1512,9 +1512,11 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
AnalyzeFormalArguments(CCInfo, LocalIns);
if (AMDGPU::isShader(CallConv)) {
AnalyzeFormalArguments(CCInfo, Ins);
} else {
analyzeFormalArgumentsCompute(CCInfo, Ins);
}
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];

View File

@ -685,9 +685,6 @@ SDValue SITargetLowering::LowerFormalArguments(
}
if (!AMDGPU::isShader(CallConv)) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
@ -735,7 +732,10 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(FlatScratchInitReg);
}
AnalyzeFormalArguments(CCInfo, Splits);
if (!AMDGPU::isShader(CallConv))
analyzeFormalArgumentsCompute(CCInfo, Ins);
else
AnalyzeFormalArguments(CCInfo, Splits);
SmallVector<SDValue, 16> Chains;
@ -752,7 +752,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = Splits[i].VT;
EVT MemVT = VA.getLocVT();
const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about

View File

@ -1,5 +1,3 @@
; REQUIRES: asserts
; XFAIL: *
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
; CHECK-LABEL: {{^}}kernel_arg_i64: