2012-12-11 22:25:42 +01:00
|
|
|
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Implementation of the TargetInstrInfo class that is common to all
|
2012-12-11 22:25:42 +01:00
|
|
|
/// AMD GPUs.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPUInstrInfo.h"
|
|
|
|
#include "AMDGPURegisterInfo.h"
|
|
|
|
#include "AMDGPUTargetMachine.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 04:03:23 +02:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2012-12-11 22:25:42 +01:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
|
2014-04-22 04:03:14 +02:00
|
|
|
using namespace llvm;
|
|
|
|
|
2013-11-19 01:57:56 +01:00
|
|
|
#define GET_INSTRINFO_CTOR_DTOR
|
2012-12-11 22:25:42 +01:00
|
|
|
#include "AMDGPUGenInstrInfo.inc"
|
|
|
|
|
AMDGPU: Make getTgtMemIntrinsic table-driven for resource-based intrinsics
Summary:
Avoids having to list all intrinsics manually.
This is in preparation for the new dimension-aware image intrinsics,
which I'd rather not have to list here by hand.
Change-Id: If7ced04998397ef68c4cb8f7de66b5050fb767e5
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44937
llvm-svn: 328938
2018-04-01 19:09:07 +02:00
|
|
|
namespace llvm {
|
|
|
|
namespace AMDGPU {
|
|
|
|
#define GET_RSRCINTRINSIC_IMPL
|
|
|
|
#include "AMDGPUGenSearchableTables.inc"
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 12:58:54 +02:00
|
|
|
|
|
|
|
#define GET_D16IMAGEDIMINTRINSIC_IMPL
|
|
|
|
#include "AMDGPUGenSearchableTables.inc"
|
AMDGPU: Make getTgtMemIntrinsic table-driven for resource-based intrinsics
Summary:
Avoids having to list all intrinsics manually.
This is in preparation for the new dimension-aware image intrinsics,
which I'd rather not have to list here by hand.
Change-Id: If7ced04998397ef68c4cb8f7de66b5050fb767e5
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44937
llvm-svn: 328938
2018-04-01 19:09:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-19 01:57:56 +01:00
|
|
|
// Pin the vtable to this file.
|
|
|
|
void AMDGPUInstrInfo::anchor() {}
|
|
|
|
|
2016-06-24 08:30:11 +02:00
|
|
|
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
|
2017-08-01 21:54:18 +02:00
|
|
|
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
|
|
|
|
ST(ST),
|
|
|
|
AMDGPUASI(ST.getAMDGPUAS()) {}
|
2012-12-11 22:25:42 +01:00
|
|
|
|
2014-08-06 02:29:49 +02:00
|
|
|
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
|
|
|
|
// the first 16 loads will be interleaved with the stores, and the next 16 will
|
|
|
|
// be clustered as expected. It should really split into 2 16 store batches.
|
|
|
|
//
|
|
|
|
// Loads are clustered until this returns false, rather than trying to schedule
|
|
|
|
// groups of stores. This also means we have to deal with saying different
|
|
|
|
// address space loads should be clustered, and ones which might cause bank
|
|
|
|
// conflicts.
|
|
|
|
//
|
|
|
|
// This might be deprecated so it might not be worth that much effort to fix.
|
|
|
|
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
|
|
|
|
int64_t Offset0, int64_t Offset1,
|
|
|
|
unsigned NumLoads) const {
|
|
|
|
assert(Offset1 > Offset0 &&
|
|
|
|
"Second offset should be larger than first offset!");
|
|
|
|
// If we have less than 16 loads in a row, and the offsets are within 64
|
|
|
|
// bytes, then schedule together.
|
|
|
|
|
|
|
|
// A cacheline is 64 bytes (for global memory).
|
|
|
|
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
|
2012-12-11 22:25:42 +01:00
|
|
|
}
|
|
|
|
|
2016-06-24 08:30:11 +02:00
|
|
|
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
|
|
|
|
enum SIEncodingFamily {
|
|
|
|
SI = 0,
|
2017-06-21 10:53:38 +02:00
|
|
|
VI = 1,
|
|
|
|
SDWA = 2,
|
2017-08-09 19:10:47 +02:00
|
|
|
SDWA9 = 3,
|
2018-01-12 22:12:19 +01:00
|
|
|
GFX80 = 4,
|
|
|
|
GFX9 = 5
|
2016-06-24 08:30:11 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
|
|
|
|
switch (ST.getGeneration()) {
|
|
|
|
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
|
|
|
case AMDGPUSubtarget::SEA_ISLANDS:
|
|
|
|
return SIEncodingFamily::SI;
|
2015-01-15 19:42:51 +01:00
|
|
|
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
2017-02-18 19:29:53 +01:00
|
|
|
case AMDGPUSubtarget::GFX9:
|
2016-06-24 08:30:11 +02:00
|
|
|
return SIEncodingFamily::VI;
|
|
|
|
|
|
|
|
// FIXME: This should never be called for r600 GPUs.
|
|
|
|
case AMDGPUSubtarget::R600:
|
|
|
|
case AMDGPUSubtarget::R700:
|
|
|
|
case AMDGPUSubtarget::EVERGREEN:
|
|
|
|
case AMDGPUSubtarget::NORTHERN_ISLANDS:
|
|
|
|
return SIEncodingFamily::SI;
|
2015-01-15 19:42:51 +01:00
|
|
|
}
|
2016-06-27 14:58:10 +02:00
|
|
|
|
|
|
|
llvm_unreachable("Unknown subtarget generation!");
|
2015-01-15 19:42:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
2017-06-21 10:53:38 +02:00
|
|
|
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
|
2017-11-20 19:24:21 +01:00
|
|
|
|
|
|
|
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
|
|
|
|
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
|
|
|
|
Gen = SIEncodingFamily::GFX9;
|
|
|
|
|
2017-06-21 10:53:38 +02:00
|
|
|
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
|
|
|
|
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
|
|
|
: SIEncodingFamily::SDWA;
|
2018-02-01 19:41:33 +01:00
|
|
|
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
|
|
|
|
// subtarget has UnpackedD16VMem feature.
|
|
|
|
// TODO: remove this when we discard GFX80 encoding.
|
|
|
|
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
|
|
|
|
&& !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
|
|
|
|
Gen = SIEncodingFamily::GFX80;
|
2017-06-21 10:53:38 +02:00
|
|
|
|
|
|
|
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
|
2015-01-15 19:42:51 +01:00
|
|
|
|
|
|
|
// -1 means that Opcode is already a native instruction.
|
|
|
|
if (MCOp == -1)
|
|
|
|
return Opcode;
|
|
|
|
|
|
|
|
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
|
|
|
|
// no encoding in the given subtarget generation.
|
|
|
|
if (MCOp == (uint16_t)-1)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return MCOp;
|
|
|
|
}
|
2018-02-09 17:57:48 +01:00
|
|
|
|
|
|
|
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
|
|
|
|
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
|
|
|
|
const Value *Ptr = MMO->getValue();
|
|
|
|
// UndefValue means this is a load of a kernel input. These are uniform.
|
|
|
|
// Sometimes LDS instructions have constant pointers.
|
|
|
|
// If Ptr is null, then that means this mem operand contains a
|
|
|
|
// PseudoSourceValue like GOT.
|
|
|
|
if (!Ptr || isa<UndefValue>(Ptr) ||
|
|
|
|
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
|
|
|
|
return true;
|
|
|
|
|
2018-02-09 17:57:57 +01:00
|
|
|
if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
|
|
|
|
return true;
|
|
|
|
|
2018-02-09 17:57:48 +01:00
|
|
|
if (const Argument *Arg = dyn_cast<Argument>(Ptr))
|
|
|
|
return AMDGPU::isArgPassedInSGPR(Arg);
|
|
|
|
|
|
|
|
const Instruction *I = dyn_cast<Instruction>(Ptr);
|
|
|
|
return I && I->getMetadata("amdgpu.uniform");
|
|
|
|
}
|