mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
ac11cfc716
This also adds new interfaces for the fixed- and scalable case: * LLT::fixed_vector * LLT::scalable_vector The strategy for migrating to the new interfaces was as follows: * If the new LLT is a (modified) clone of another LLT, taking the same number of elements, then use LLT::vector(OtherTy.getElementCount()) or if the number of elements is halfed/doubled, it uses .divideCoefficientBy(2) or operator*. That is because there is no reason to specifically restrict the types to 'fixed_vector'. * If the algorithm works on the number of elements (as unsigned), then just use fixed_vector. This will need to be fixed up in the future when modifying the algorithm to also work for scalable vectors, and will need then need additional tests to confirm the behaviour works the same for scalable vectors. * If the test used the '/*Scalable=*/true` flag of LLT::vector, then this is replaced by LLT::scalable_vector. Reviewed By: aemerson Differential Revision: https://reviews.llvm.org/D104451
187 lines
7.7 KiB
C++
187 lines
7.7 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUTargetMachine.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/Support/NativeFormatting.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
|
|
|
|
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
|
|
"Argument Register Usage Information Storage", false, true)
|
|
|
|
void ArgDescriptor::print(raw_ostream &OS,
|
|
const TargetRegisterInfo *TRI) const {
|
|
if (!isSet()) {
|
|
OS << "<not set>\n";
|
|
return;
|
|
}
|
|
|
|
if (isRegister())
|
|
OS << "Reg " << printReg(getRegister(), TRI);
|
|
else
|
|
OS << "Stack offset " << getStackOffset();
|
|
|
|
if (isMasked()) {
|
|
OS << " & ";
|
|
llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
|
|
}
|
|
|
|
OS << '\n';
|
|
}
|
|
|
|
char AMDGPUArgumentUsageInfo::ID = 0;
|
|
|
|
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
|
|
|
|
// Hardcoded registers from fixed function ABI
|
|
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
|
|
= AMDGPUFunctionArgInfo::fixedABILayout();
|
|
|
|
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
|
|
ArgInfoMap.clear();
|
|
return false;
|
|
}
|
|
|
|
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
|
for (const auto &FI : ArgInfoMap) {
|
|
OS << "Arguments for " << FI.first->getName() << '\n'
|
|
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
|
|
<< " DispatchPtr: " << FI.second.DispatchPtr
|
|
<< " QueuePtr: " << FI.second.QueuePtr
|
|
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
|
|
<< " DispatchID: " << FI.second.DispatchID
|
|
<< " FlatScratchInit: " << FI.second.FlatScratchInit
|
|
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
|
|
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
|
|
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
|
|
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
|
|
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
|
|
<< " PrivateSegmentWaveByteOffset: "
|
|
<< FI.second.PrivateSegmentWaveByteOffset
|
|
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
|
|
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
|
|
<< " WorkItemIDX " << FI.second.WorkItemIDX
|
|
<< " WorkItemIDY " << FI.second.WorkItemIDY
|
|
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
|
|
<< '\n';
|
|
}
|
|
}
|
|
|
|
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
|
AMDGPUFunctionArgInfo::getPreloadedValue(
|
|
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
switch (Value) {
|
|
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
|
|
return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
|
|
: nullptr,
|
|
&AMDGPU::SGPR_128RegClass, LLT::fixed_vector(4, 32));
|
|
}
|
|
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
|
|
return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
|
return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
|
return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
|
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
|
return std::make_tuple(
|
|
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
|
|
return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
|
|
return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::DISPATCH_ID:
|
|
return std::make_tuple(DispatchID ? &DispatchID : nullptr,
|
|
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
|
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
|
|
return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
|
|
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
|
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
|
|
return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::QUEUE_PTR:
|
|
return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
|
|
return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
|
|
return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
|
|
return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
}
|
|
llvm_unreachable("unexpected preloaded value type");
|
|
}
|
|
|
|
constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
|
|
AMDGPUFunctionArgInfo AI;
|
|
AI.PrivateSegmentBuffer
|
|
= ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
|
|
AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
|
|
AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
|
|
|
|
// Do not pass kernarg segment pointer, only pass increment version in its
|
|
// place.
|
|
AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
|
|
AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
|
|
|
|
// Skip FlatScratchInit/PrivateSegmentSize
|
|
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
|
|
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
|
|
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
|
|
|
|
const unsigned Mask = 0x3ff;
|
|
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
|
|
AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
|
|
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
|
|
return AI;
|
|
}
|
|
|
|
const AMDGPUFunctionArgInfo &
|
|
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
|
|
auto I = ArgInfoMap.find(&F);
|
|
if (I == ArgInfoMap.end()) {
|
|
if (AMDGPUTargetMachine::EnableFixedFunctionABI)
|
|
return FixedABIFunctionInfo;
|
|
|
|
// Without the fixed ABI, we assume no function has special inputs.
|
|
assert(F.isDeclaration());
|
|
return ExternFunctionInfo;
|
|
}
|
|
|
|
return I->second;
|
|
}
|