1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 12:02:58 +02:00
llvm-mirror/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Chandler Carruth ae65e281f3 Update the file headers across all of the LLVM projects in the monorepo
to reflect the new license.

We understand that people may be surprised that we're moving the header
entirely to discuss the new license. We checked this carefully with the
Foundation's lawyer and we believe this is the correct approach.

Essentially, all code in the project is now made available by the LLVM
project under our new license, so you will see that the license headers
include that license only. Some of our contributors have contributed
code under our old license, and accordingly, we have retained a copy of
our old license notice in the top-level files in each project and
repository.

llvm-svn: 351636
2019-01-19 08:50:56 +00:00

251 lines
9.0 KiB
C++

//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the lowering of LLVM calls to machine code calls for
/// GlobalISel.
///
//===----------------------------------------------------------------------===//
#include "AMDGPUCallLowering.h"
#include "AMDGPU.h"
#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<unsigned> VRegs) const {
// FIXME: Add support for non-void returns.
if (Val)
return false;
MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
return true;
}
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Type *ParamTy,
uint64_t Offset) const {
MachineFunction &MF = MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIRBuilder.buildConstant(OffsetReg, Offset);
MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
return DstReg;
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
Type *ParamTy, uint64_t Offset,
unsigned Align,
unsigned DstReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
MachineMemOperand::MONonTemporal |
MachineMemOperand::MOInvariant,
TypeSize, Align);
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
}
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
// AMDGPU_GS and AMDGP_HS are not supported yet.
if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
F.getCallingConv() == CallingConv::AMDGPU_HS)
return false;
MachineFunction &MF = MIRBuilder.getMF();
const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
if (Info->hasDispatchPtr()) {
unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(DispatchPtrReg);
}
if (Info->hasQueuePtr()) {
unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(QueuePtrReg);
}
if (Info->hasKernargSegmentPtr()) {
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
unsigned VReg = MRI.createGenericVirtualRegister(P2);
MRI.addLiveIn(InputPtrReg, VReg);
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
MIRBuilder.buildCopy(VReg, InputPtrReg);
CCInfo.AllocateReg(InputPtrReg);
}
if (Info->hasDispatchID()) {
unsigned DispatchIDReg = Info->addDispatchID(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(DispatchIDReg);
}
if (Info->hasFlatScratchInit()) {
unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(FlatScratchInitReg);
}
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
unsigned i = 0;
const unsigned KernArgBaseAlign = 16;
const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
uint64_t ExplicitArgOffset = 0;
// TODO: Align down to dword alignment and extract bits for extending loads.
for (auto &Arg : F.args()) {
Type *ArgTy = Arg.getType();
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
if (AllocSize == 0)
continue;
unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
++i;
}
return true;
}
unsigned NumArgs = F.arg_size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
unsigned PSInputNum = 0;
BitVector Skipped(NumArgs);
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
// We can only hanlde simple value types at the moment.
ISD::ArgFlagsTy Flags;
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
setArgFlags(OrigArg, i + 1, DL, F);
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
!OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
PSInputNum <= 15) {
if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
Skipped.set(i);
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (!CurOrigArg->use_empty())
Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
/*IsVarArg=*/false);
if (ValEVT.isVector()) {
EVT ElemVT = ValEVT.getVectorElementType();
if (!ValEVT.isSimple())
return false;
MVT ValVT = ElemVT.getSimpleVT();
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
OrigArg.Flags, CCInfo);
if (!Res)
return false;
} else {
MVT ValVT = ValEVT.getSimpleVT();
if (!ValEVT.isSimple())
return false;
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
// Fail if we don't know how to handle this type.
if (Res)
return false;
}
}
Function::const_arg_iterator Arg = F.arg_begin();
if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
F.getCallingConv() == CallingConv::AMDGPU_PS) {
for (unsigned i = 0, OrigArgIdx = 0;
OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
if (Skipped.test(OrigArgIdx))
continue;
CCValAssign &VA = ArgLocs[i++];
MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
}
return true;
}
return false;
}