1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
Sander de Smalen ac11cfc716 [GlobalISel] NFC: Change LLT::vector to take ElementCount.
This also adds new interfaces for the fixed- and scalable case:
* LLT::fixed_vector
* LLT::scalable_vector

The strategy for migrating to the new interfaces was as follows:
* If the new LLT is a (modified) clone of another LLT, taking the
  same number of elements, then use LLT::vector(OtherTy.getElementCount())
  or if the number of elements is halfed/doubled, it uses .divideCoefficientBy(2)
  or operator*. That is because there is no reason to specifically restrict
  the types to 'fixed_vector'.
* If the algorithm works on the number of elements (as unsigned), then
  just use fixed_vector. This will need to be fixed up in the future when
  modifying the algorithm to also work for scalable vectors, and will need
  then need additional tests to confirm the behaviour works the same for
  scalable vectors.
* If the test used the '/*Scalable=*/true` flag of LLT::vector, then
  this is replaced by LLT::scalable_vector.

Reviewed By: aemerson

Differential Revision: https://reviews.llvm.org/D104451
2021-06-24 11:26:12 +01:00

187 lines
7.7 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
"Argument Register Usage Information Storage", false, true)
void ArgDescriptor::print(raw_ostream &OS,
const TargetRegisterInfo *TRI) const {
if (!isSet()) {
OS << "<not set>\n";
return;
}
if (isRegister())
OS << "Reg " << printReg(getRegister(), TRI);
else
OS << "Stack offset " << getStackOffset();
if (isMasked()) {
OS << " & ";
llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
}
OS << '\n';
}
char AMDGPUArgumentUsageInfo::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
// Hardcoded registers from fixed function ABI
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
= AMDGPUFunctionArgInfo::fixedABILayout();
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
return false;
}
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
ArgInfoMap.clear();
return false;
}
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
OS << "Arguments for " << FI.first->getName() << '\n'
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
<< " DispatchPtr: " << FI.second.DispatchPtr
<< " QueuePtr: " << FI.second.QueuePtr
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
<< " DispatchID: " << FI.second.DispatchID
<< " FlatScratchInit: " << FI.second.FlatScratchInit
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
<< " WorkItemIDX " << FI.second.WorkItemIDX
<< " WorkItemIDY " << FI.second.WorkItemIDY
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
<< '\n';
}
}
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
switch (Value) {
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
: nullptr,
&AMDGPU::SGPR_128RegClass, LLT::fixed_vector(4, 32));
}
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::make_tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::DISPATCH_ID:
return std::make_tuple(DispatchID ? &DispatchID : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::QUEUE_PTR:
return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
}
llvm_unreachable("unexpected preloaded value type");
}
constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AMDGPUFunctionArgInfo AI;
AI.PrivateSegmentBuffer
= ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
// Do not pass kernarg segment pointer, only pass increment version in its
// place.
AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
// Skip FlatScratchInit/PrivateSegmentSize
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
const unsigned Mask = 0x3ff;
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
return AI;
}
const AMDGPUFunctionArgInfo &
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
if (I == ArgInfoMap.end()) {
if (AMDGPUTargetMachine::EnableFixedFunctionABI)
return FixedABIFunctionInfo;
// Without the fixed ABI, we assume no function has special inputs.
assert(F.isDeclaration());
return ExternFunctionInfo;
}
return I->second;
}