2017-08-10 02:46:15 +02:00
|
|
|
//===- SIMemoryLegalizer.cpp ----------------------------------------------===//
|
2017-07-21 23:19:23 +02:00
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-07-21 23:19:23 +02:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Memory legalizer - implements memory model. More information can be
|
2017-07-21 23:19:23 +02:00
|
|
|
/// found here:
|
|
|
|
/// http://llvm.org/docs/AMDGPUUsage.html#memory-model
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPU.h"
|
|
|
|
#include "AMDGPUMachineModuleInfo.h"
|
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-08-10 02:46:15 +02:00
|
|
|
#include "SIDefines.h"
|
|
|
|
#include "SIInstrInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 04:03:23 +02:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2017-07-21 23:19:23 +02:00
|
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
2018-06-08 00:28:32 +02:00
|
|
|
#include "llvm/ADT/BitmaskEnum.h"
|
2017-08-10 02:46:15 +02:00
|
|
|
#include "llvm/ADT/None.h"
|
|
|
|
#include "llvm/ADT/Optional.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2017-07-21 23:19:23 +02:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-08-10 02:46:15 +02:00
|
|
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
|
|
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
|
|
#include "llvm/IR/DebugLoc.h"
|
2017-07-21 23:19:23 +02:00
|
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
2017-08-10 02:46:15 +02:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/AtomicOrdering.h"
|
2018-06-08 00:28:32 +02:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2017-08-10 02:46:15 +02:00
|
|
|
#include <cassert>
|
|
|
|
#include <list>
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::AMDGPU;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "si-memory-legalizer"
|
|
|
|
#define PASS_NAME "SI Memory Legalizer"
|
|
|
|
|
[AMDGPU] Make generating cache invalidating instructions optional
Summary:
D78800 skipped generating cache invalidating instrucions altogether
on AMDPAL. However, this is sometimes too restrictive - we want a
more flexible option to be able to toggle this behaviour on and off
while we work towards developing a correct implementation of the
alternative memory model.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, dexonsmith, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84448
2020-07-23 19:26:49 +02:00
|
|
|
static cl::opt<bool> AmdgcnSkipCacheInvalidations(
|
|
|
|
"amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
|
|
|
|
cl::desc("Use this to skip inserting cache invalidating instructions."));
|
|
|
|
|
2017-07-21 23:19:23 +02:00
|
|
|
namespace {
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
|
|
|
|
|
|
|
|
/// Memory operation flags. Can be ORed together.
|
|
|
|
enum class SIMemOp {
|
|
|
|
NONE = 0u,
|
|
|
|
LOAD = 1u << 0,
|
|
|
|
STORE = 1u << 1,
|
|
|
|
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Position to insert a new instruction relative to an existing
|
|
|
|
/// instruction.
|
|
|
|
enum class Position {
|
|
|
|
BEFORE,
|
|
|
|
AFTER
|
|
|
|
};
|
|
|
|
|
|
|
|
/// The atomic synchronization scopes supported by the AMDGPU target.
|
|
|
|
enum class SIAtomicScope {
|
|
|
|
NONE,
|
|
|
|
SINGLETHREAD,
|
|
|
|
WAVEFRONT,
|
|
|
|
WORKGROUP,
|
|
|
|
AGENT,
|
|
|
|
SYSTEM
|
|
|
|
};
|
|
|
|
|
|
|
|
/// The distinct address spaces supported by the AMDGPU target for
|
|
|
|
/// atomic memory operation. Can be ORed toether.
|
|
|
|
enum class SIAtomicAddrSpace {
|
|
|
|
NONE = 0u,
|
|
|
|
GLOBAL = 1u << 0,
|
|
|
|
LDS = 1u << 1,
|
|
|
|
SCRATCH = 1u << 2,
|
|
|
|
GDS = 1u << 3,
|
|
|
|
OTHER = 1u << 4,
|
|
|
|
|
|
|
|
/// The address spaces that can be accessed by a FLAT instruction.
|
|
|
|
FLAT = GLOBAL | LDS | SCRATCH,
|
|
|
|
|
|
|
|
/// The address spaces that support atomic instructions.
|
|
|
|
ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
|
|
|
|
|
|
|
|
/// All address spaces.
|
|
|
|
ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
|
|
|
|
|
|
|
|
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Sets named bit \p BitName to "true" if present in instruction \p MI.
|
|
|
|
/// \returns Returns true if \p MI is modified, false otherwise.
|
|
|
|
template <uint16_t BitName>
|
|
|
|
bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
|
|
|
|
int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
|
|
|
|
if (BitIdx == -1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MachineOperand &Bit = MI->getOperand(BitIdx);
|
|
|
|
if (Bit.getImm() != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Bit.setImm(1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
class SIMemOpInfo final {
|
|
|
|
private:
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
friend class SIMemOpAccess;
|
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
|
|
|
|
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
|
2018-06-08 00:28:32 +02:00
|
|
|
SIAtomicScope Scope = SIAtomicScope::SYSTEM;
|
|
|
|
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
|
|
|
|
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
|
|
|
|
bool IsCrossAddressSpaceOrdering = false;
|
2017-09-07 19:14:54 +02:00
|
|
|
bool IsNonTemporal = false;
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
|
|
|
|
SIAtomicScope Scope = SIAtomicScope::SYSTEM,
|
|
|
|
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
|
|
|
|
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
|
|
|
|
bool IsCrossAddressSpaceOrdering = true,
|
|
|
|
AtomicOrdering FailureOrdering =
|
|
|
|
AtomicOrdering::SequentiallyConsistent,
|
|
|
|
bool IsNonTemporal = false)
|
|
|
|
: Ordering(Ordering), FailureOrdering(FailureOrdering),
|
|
|
|
Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
|
|
|
|
InstrAddrSpace(InstrAddrSpace),
|
|
|
|
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
|
|
|
|
IsNonTemporal(IsNonTemporal) {
|
|
|
|
// There is also no cross address space ordering if the ordering
|
|
|
|
// address space is the same as the instruction address space and
|
|
|
|
// only contains a single address space.
|
|
|
|
if ((OrderingAddrSpace == InstrAddrSpace) &&
|
|
|
|
isPowerOf2_32(uint32_t(InstrAddrSpace)))
|
2019-07-12 16:58:15 +02:00
|
|
|
this->IsCrossAddressSpaceOrdering = false;
|
2018-06-08 00:28:32 +02:00
|
|
|
}
|
2017-09-07 18:14:21 +02:00
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
public:
|
2018-06-08 00:28:32 +02:00
|
|
|
/// \returns Atomic synchronization scope of the machine instruction used to
|
2017-09-05 18:41:25 +02:00
|
|
|
/// create this SIMemOpInfo.
|
2018-06-08 00:28:32 +02:00
|
|
|
SIAtomicScope getScope() const {
|
|
|
|
return Scope;
|
2017-09-05 18:41:25 +02:00
|
|
|
}
|
2018-06-08 00:28:32 +02:00
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
/// \returns Ordering constraint of the machine instruction used to
|
|
|
|
/// create this SIMemOpInfo.
|
|
|
|
AtomicOrdering getOrdering() const {
|
|
|
|
return Ordering;
|
|
|
|
}
|
2018-06-08 00:28:32 +02:00
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
/// \returns Failure ordering constraint of the machine instruction used to
|
|
|
|
/// create this SIMemOpInfo.
|
|
|
|
AtomicOrdering getFailureOrdering() const {
|
|
|
|
return FailureOrdering;
|
|
|
|
}
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
/// \returns The address spaces be accessed by the machine
|
|
|
|
/// instruction used to create this SiMemOpInfo.
|
|
|
|
SIAtomicAddrSpace getInstrAddrSpace() const {
|
|
|
|
return InstrAddrSpace;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns The address spaces that must be ordered by the machine
|
|
|
|
/// instruction used to create this SiMemOpInfo.
|
|
|
|
SIAtomicAddrSpace getOrderingAddrSpace() const {
|
|
|
|
return OrderingAddrSpace;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns Return true iff memory ordering of operations on
|
|
|
|
/// different address spaces is required.
|
|
|
|
bool getIsCrossAddressSpaceOrdering() const {
|
|
|
|
return IsCrossAddressSpaceOrdering;
|
|
|
|
}
|
|
|
|
|
2017-09-07 19:14:54 +02:00
|
|
|
/// \returns True if memory access of the machine instruction used to
|
|
|
|
/// create this SIMemOpInfo is non-temporal, false otherwise.
|
|
|
|
bool isNonTemporal() const {
|
|
|
|
return IsNonTemporal;
|
|
|
|
}
|
2017-09-05 18:41:25 +02:00
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
/// \returns True if ordering constraint of the machine instruction used to
|
|
|
|
/// create this SIMemOpInfo is unordered or higher, false otherwise.
|
|
|
|
bool isAtomic() const {
|
|
|
|
return Ordering != AtomicOrdering::NotAtomic;
|
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
class SIMemOpAccess final {
|
|
|
|
private:
|
|
|
|
AMDGPUMachineModuleInfo *MMI = nullptr;
|
|
|
|
|
|
|
|
/// Reports unsupported message \p Msg for \p MI to LLVM context.
|
|
|
|
void reportUnsupported(const MachineBasicBlock::iterator &MI,
|
|
|
|
const char *Msg) const;
|
|
|
|
|
|
|
|
/// Inspects the target synchonization scope \p SSID and determines
|
|
|
|
/// the SI atomic scope it corresponds to, the address spaces it
|
|
|
|
/// covers, and whether the memory ordering applies between address
|
|
|
|
/// spaces.
|
|
|
|
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
|
|
|
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
|
|
|
|
|
|
|
|
/// \return Return a bit set of the address spaces accessed by \p AS.
|
|
|
|
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
|
|
|
|
|
|
|
|
/// \returns Info constructed from \p MI, which has at least machine memory
|
|
|
|
/// operand.
|
|
|
|
Optional<SIMemOpInfo> constructFromMIWithMMO(
|
|
|
|
const MachineBasicBlock::iterator &MI) const;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// Construct class to support accessing the machine memory operands
|
|
|
|
/// of instructions in the machine function \p MF.
|
|
|
|
SIMemOpAccess(MachineFunction &MF);
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
/// \returns Load info if \p MI is a load operation, "None" otherwise.
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> getLoadInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const;
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
/// \returns Store info if \p MI is a store operation, "None" otherwise.
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> getStoreInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const;
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
/// \returns Atomic fence info if \p MI is an atomic fence operation,
|
|
|
|
/// "None" otherwise.
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> getAtomicFenceInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const;
|
|
|
|
|
2018-02-06 20:11:56 +01:00
|
|
|
/// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
|
|
|
|
/// rmw operation, "None" otherwise.
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const;
|
2017-09-05 18:18:05 +02:00
|
|
|
};
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
class SICacheControl {
|
|
|
|
protected:
|
2017-08-10 02:46:15 +02:00
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Instruction info.
|
2017-07-21 23:19:23 +02:00
|
|
|
const SIInstrInfo *TII = nullptr;
|
|
|
|
|
2018-09-12 20:50:47 +02:00
|
|
|
IsaVersion IV;
|
2017-08-10 02:46:15 +02:00
|
|
|
|
[AMDGPU] Make generating cache invalidating instructions optional
Summary:
D78800 skipped generating cache invalidating instrucions altogether
on AMDPAL. However, this is sometimes too restrictive - we want a
more flexible option to be able to toggle this behaviour on and off
while we work towards developing a correct implementation of the
alternative memory model.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, dexonsmith, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84448
2020-07-23 19:26:49 +02:00
|
|
|
/// Whether to insert cache invalidating instructions.
|
[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78800
2020-04-24 09:56:41 +02:00
|
|
|
bool InsertCacheInv;
|
|
|
|
|
2018-07-11 22:59:01 +02:00
|
|
|
SICacheControl(const GCNSubtarget &ST);
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
public:
|
2017-09-07 19:14:54 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
/// Create a cache control for the subtarget \p ST.
|
2018-07-11 22:59:01 +02:00
|
|
|
static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
/// Update \p MI memory load instruction to bypass any caches up to
|
|
|
|
/// the \p Scope memory scope for address spaces \p
|
|
|
|
/// AddrSpace. Return true iff the instruction was modified.
|
|
|
|
virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace) const = 0;
|
|
|
|
|
|
|
|
/// Update \p MI memory instruction to indicate it is
|
|
|
|
/// nontemporal. Return true iff the instruction was modified.
|
|
|
|
virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
|
|
|
|
const = 0;
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
/// Inserts any necessary instructions at position \p Pos relative to
|
|
|
|
/// instruction \p MI to ensure any subsequent memory instructions of this
|
|
|
|
/// thread with address spaces \p AddrSpace will observe the previous memory
|
|
|
|
/// operations by any thread for memory scopes up to memory scope \p Scope .
|
|
|
|
/// Returns true iff any instructions inserted.
|
|
|
|
virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const = 0;
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
/// Inserts any necessary instructions at position \p Pos relative
|
2020-10-13 04:06:33 +02:00
|
|
|
/// to instruction \p MI to ensure memory instructions before \p Pos of kind
|
|
|
|
/// \p Op associated with address spaces \p AddrSpace have completed. Used
|
|
|
|
/// between memory instructions to enforce the order they become visible as
|
|
|
|
/// observed by other memory instructions executing in memory scope \p Scope.
|
|
|
|
/// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
|
|
|
|
/// address spaces. Returns true iff any instructions inserted.
|
2018-06-08 00:28:32 +02:00
|
|
|
virtual bool insertWait(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
SIMemOp Op,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const = 0;
|
2018-06-08 03:00:11 +02:00
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
/// Inserts any necessary instructions at position \p Pos relative to
|
|
|
|
/// instruction \p MI to ensure previous memory instructions by this thread
|
|
|
|
/// with address spaces \p AddrSpace have completed and can be observed by
|
|
|
|
/// subsequent memory instructions by any thread executing in memory scope \p
|
|
|
|
/// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
|
|
|
|
/// between address spaces. Returns true iff any instructions inserted.
|
|
|
|
virtual bool insertRelease(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const = 0;
|
|
|
|
|
2018-06-08 03:00:11 +02:00
|
|
|
/// Virtual destructor to allow derivations to be deleted.
|
|
|
|
virtual ~SICacheControl() = default;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
};
|
2017-09-07 19:14:54 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
class SIGfx6CacheControl : public SICacheControl {
|
|
|
|
protected:
|
2017-09-07 19:14:54 +02:00
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
|
2017-09-07 19:14:54 +02:00
|
|
|
/// is modified, false otherwise.
|
|
|
|
bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
|
|
|
|
return enableNamedBit<AMDGPU::OpName::glc>(MI);
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
|
2017-09-07 19:14:54 +02:00
|
|
|
/// is modified, false otherwise.
|
|
|
|
bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
|
|
|
|
return enableNamedBit<AMDGPU::OpName::slc>(MI);
|
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
public:
|
|
|
|
|
2018-07-11 22:59:01 +02:00
|
|
|
SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace) const override;
|
|
|
|
|
|
|
|
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const override;
|
|
|
|
|
|
|
|
bool insertRelease(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const override;
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
bool insertWait(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
SIMemOp Op,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const override;
|
|
|
|
};
|
|
|
|
|
|
|
|
class SIGfx7CacheControl : public SIGfx6CacheControl {
|
|
|
|
public:
|
|
|
|
|
2018-07-11 22:59:01 +02:00
|
|
|
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
|
2018-06-08 00:28:32 +02:00
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const override;
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
};
|
|
|
|
|
2019-05-06 23:57:02 +02:00
|
|
|
class SIGfx10CacheControl : public SIGfx7CacheControl {
|
|
|
|
protected:
|
|
|
|
bool CuMode = false;
|
|
|
|
|
|
|
|
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
|
|
|
|
/// is modified, false otherwise.
|
|
|
|
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
|
|
|
|
return enableNamedBit<AMDGPU::OpName::dlc>(MI);
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
|
|
|
|
SIGfx7CacheControl(ST), CuMode(CuMode) {};
|
|
|
|
|
|
|
|
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace) const override;
|
|
|
|
|
|
|
|
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const override;
|
2019-05-06 23:57:02 +02:00
|
|
|
|
|
|
|
bool insertWait(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
SIMemOp Op,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const override;
|
|
|
|
};
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
class SIMemoryLegalizer final : public MachineFunctionPass {
|
|
|
|
private:
|
|
|
|
|
|
|
|
/// Cache Control.
|
|
|
|
std::unique_ptr<SICacheControl> CC = nullptr;
|
|
|
|
|
|
|
|
/// List of atomic pseudo instructions.
|
|
|
|
std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
|
|
|
|
|
|
|
|
/// Return true iff instruction \p MI is a atomic instruction that
|
|
|
|
/// returns a result.
|
|
|
|
bool isAtomicRet(const MachineInstr &MI) const {
|
|
|
|
return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
|
|
|
|
}
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Removes all processed atomic pseudo instructions from the current
|
2017-07-21 23:19:23 +02:00
|
|
|
/// function. Returns true if current function is modified, false otherwise.
|
|
|
|
bool removeAtomicPseudoMIs();
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Expands load operation \p MI. Returns true if instructions are
|
2017-08-18 19:30:02 +02:00
|
|
|
/// added/deleted or \p MI is modified, false otherwise.
|
2017-09-05 18:18:05 +02:00
|
|
|
bool expandLoad(const SIMemOpInfo &MOI,
|
|
|
|
MachineBasicBlock::iterator &MI);
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Expands store operation \p MI. Returns true if instructions are
|
2017-08-18 19:30:02 +02:00
|
|
|
/// added/deleted or \p MI is modified, false otherwise.
|
2017-09-05 18:18:05 +02:00
|
|
|
bool expandStore(const SIMemOpInfo &MOI,
|
|
|
|
MachineBasicBlock::iterator &MI);
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Expands atomic fence operation \p MI. Returns true if
|
2017-08-19 20:44:27 +02:00
|
|
|
/// instructions are added/deleted or \p MI is modified, false otherwise.
|
2017-09-05 18:18:05 +02:00
|
|
|
bool expandAtomicFence(const SIMemOpInfo &MOI,
|
2017-08-19 20:44:27 +02:00
|
|
|
MachineBasicBlock::iterator &MI);
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
|
2017-07-21 23:19:23 +02:00
|
|
|
/// instructions are added/deleted or \p MI is modified, false otherwise.
|
2018-02-09 07:05:33 +01:00
|
|
|
bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
|
|
|
|
MachineBasicBlock::iterator &MI);
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
|
2017-08-10 02:46:15 +02:00
|
|
|
SIMemoryLegalizer() : MachineFunctionPass(ID) {}
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getPassName() const override {
|
|
|
|
return PASS_NAME;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace anonymous
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
|
|
|
|
const char *Msg) const {
|
|
|
|
const Function &Func = MI->getParent()->getParent()->getFunction();
|
|
|
|
DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
|
|
|
|
Func.getContext().diagnose(Diag);
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
|
|
|
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
|
|
|
SIAtomicAddrSpace InstrScope) const {
|
2019-03-25 21:50:21 +01:00
|
|
|
if (SSID == SyncScope::System)
|
|
|
|
return std::make_tuple(SIAtomicScope::SYSTEM,
|
|
|
|
SIAtomicAddrSpace::ATOMIC,
|
|
|
|
true);
|
|
|
|
if (SSID == MMI->getAgentSSID())
|
|
|
|
return std::make_tuple(SIAtomicScope::AGENT,
|
|
|
|
SIAtomicAddrSpace::ATOMIC,
|
|
|
|
true);
|
|
|
|
if (SSID == MMI->getWorkgroupSSID())
|
|
|
|
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
|
|
|
SIAtomicAddrSpace::ATOMIC,
|
|
|
|
true);
|
|
|
|
if (SSID == MMI->getWavefrontSSID())
|
|
|
|
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
|
|
|
SIAtomicAddrSpace::ATOMIC,
|
|
|
|
true);
|
|
|
|
if (SSID == SyncScope::SingleThread)
|
|
|
|
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
|
|
|
SIAtomicAddrSpace::ATOMIC,
|
|
|
|
true);
|
|
|
|
if (SSID == MMI->getSystemOneAddressSpaceSSID())
|
2018-06-08 00:28:32 +02:00
|
|
|
return std::make_tuple(SIAtomicScope::SYSTEM,
|
|
|
|
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
|
|
|
false);
|
2019-03-25 21:50:21 +01:00
|
|
|
if (SSID == MMI->getAgentOneAddressSpaceSSID())
|
2018-06-08 00:28:32 +02:00
|
|
|
return std::make_tuple(SIAtomicScope::AGENT,
|
|
|
|
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
|
|
|
false);
|
2019-03-25 21:50:21 +01:00
|
|
|
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
|
2018-06-08 00:28:32 +02:00
|
|
|
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
|
|
|
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
|
|
|
false);
|
2019-03-25 21:50:21 +01:00
|
|
|
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
|
2018-06-08 00:28:32 +02:00
|
|
|
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
|
|
|
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
|
|
|
false);
|
2019-03-25 21:50:21 +01:00
|
|
|
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
|
2018-06-08 00:28:32 +02:00
|
|
|
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
|
|
|
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
|
|
|
false);
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
|
2018-08-31 07:49:54 +02:00
|
|
|
if (AS == AMDGPUAS::FLAT_ADDRESS)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIAtomicAddrSpace::FLAT;
|
2018-08-31 07:49:54 +02:00
|
|
|
if (AS == AMDGPUAS::GLOBAL_ADDRESS)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIAtomicAddrSpace::GLOBAL;
|
2018-08-31 07:49:54 +02:00
|
|
|
if (AS == AMDGPUAS::LOCAL_ADDRESS)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIAtomicAddrSpace::LDS;
|
2018-08-31 07:49:54 +02:00
|
|
|
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIAtomicAddrSpace::SCRATCH;
|
2018-08-31 07:49:54 +02:00
|
|
|
if (AS == AMDGPUAS::REGION_ADDRESS)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIAtomicAddrSpace::GDS;
|
|
|
|
|
|
|
|
return SIAtomicAddrSpace::OTHER;
|
|
|
|
}
|
|
|
|
|
|
|
|
SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
|
|
|
|
MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
|
|
|
|
}
|
2017-09-07 18:14:21 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
|
|
|
assert(MI->getNumMemOperands() > 0);
|
2017-09-07 18:14:21 +02:00
|
|
|
|
|
|
|
SyncScope::ID SSID = SyncScope::SingleThread;
|
|
|
|
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
|
|
|
|
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
|
2018-06-08 00:28:32 +02:00
|
|
|
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
|
2017-09-07 19:14:54 +02:00
|
|
|
bool IsNonTemporal = true;
|
2017-09-07 18:14:21 +02:00
|
|
|
|
|
|
|
// Validator should check whether or not MMOs cover the entire set of
|
|
|
|
// locations accessed by the memory instruction.
|
|
|
|
for (const auto &MMO : MI->memoperands()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
IsNonTemporal &= MMO->isNonTemporal();
|
|
|
|
InstrAddrSpace |=
|
|
|
|
toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
|
|
|
|
AtomicOrdering OpOrdering = MMO->getOrdering();
|
|
|
|
if (OpOrdering != AtomicOrdering::NotAtomic) {
|
|
|
|
const auto &IsSyncScopeInclusion =
|
|
|
|
MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
|
|
|
|
if (!IsSyncScopeInclusion) {
|
|
|
|
reportUnsupported(MI,
|
|
|
|
"Unsupported non-inclusive atomic synchronization scope");
|
|
|
|
return None;
|
|
|
|
}
|
2017-09-07 19:14:54 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
|
|
|
|
Ordering =
|
|
|
|
isStrongerThan(Ordering, OpOrdering) ?
|
|
|
|
Ordering : MMO->getOrdering();
|
|
|
|
assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
|
|
|
|
MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
|
|
|
|
FailureOrdering =
|
|
|
|
isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
|
|
|
|
FailureOrdering : MMO->getFailureOrdering();
|
|
|
|
}
|
2017-09-07 18:14:21 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
SIAtomicScope Scope = SIAtomicScope::NONE;
|
|
|
|
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
|
|
|
|
bool IsCrossAddressSpaceOrdering = false;
|
|
|
|
if (Ordering != AtomicOrdering::NotAtomic) {
|
|
|
|
auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
|
|
|
|
if (!ScopeOrNone) {
|
|
|
|
reportUnsupported(MI, "Unsupported atomic synchronization scope");
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
|
|
|
|
ScopeOrNone.getValue();
|
|
|
|
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
|
|
|
|
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
|
|
|
|
reportUnsupported(MI, "Unsupported atomic address space");
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
|
|
|
|
IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
|
2017-09-07 18:14:21 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
2017-07-21 23:19:23 +02:00
|
|
|
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
|
|
|
|
|
|
|
|
if (!(MI->mayLoad() && !MI->mayStore()))
|
|
|
|
return None;
|
2017-09-07 18:14:21 +02:00
|
|
|
|
|
|
|
// Be conservative if there are no memory operands.
|
|
|
|
if (MI->getNumMemOperands() == 0)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIMemOpInfo();
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return constructFromMIWithMMO(MI);
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
2017-07-21 23:19:23 +02:00
|
|
|
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
|
|
|
|
|
|
|
|
if (!(!MI->mayLoad() && MI->mayStore()))
|
|
|
|
return None;
|
2017-09-07 18:14:21 +02:00
|
|
|
|
|
|
|
// Be conservative if there are no memory operands.
|
|
|
|
if (MI->getNumMemOperands() == 0)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIMemOpInfo();
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return constructFromMIWithMMO(MI);
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
2017-08-19 20:44:27 +02:00
|
|
|
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
|
|
|
|
|
|
|
|
if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
AtomicOrdering Ordering =
|
2018-06-08 00:28:32 +02:00
|
|
|
static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
|
|
|
|
|
|
|
|
SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
|
|
|
|
auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
|
|
|
|
if (!ScopeOrNone) {
|
|
|
|
reportUnsupported(MI, "Unsupported atomic synchronization scope");
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
SIAtomicScope Scope = SIAtomicScope::NONE;
|
|
|
|
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
|
|
|
|
bool IsCrossAddressSpaceOrdering = false;
|
|
|
|
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
|
|
|
|
ScopeOrNone.getValue();
|
|
|
|
|
|
|
|
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
|
|
|
|
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
|
|
|
|
reportUnsupported(MI, "Unsupported atomic address space");
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
|
|
|
|
IsCrossAddressSpaceOrdering);
|
2017-08-19 20:44:27 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
2017-07-21 23:19:23 +02:00
|
|
|
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
|
|
|
|
|
|
|
|
if (!(MI->mayLoad() && MI->mayStore()))
|
|
|
|
return None;
|
2017-09-07 18:14:21 +02:00
|
|
|
|
|
|
|
// Be conservative if there are no memory operands.
|
|
|
|
if (MI->getNumMemOperands() == 0)
|
2018-06-08 00:28:32 +02:00
|
|
|
return SIMemOpInfo();
|
|
|
|
|
|
|
|
return constructFromMIWithMMO(MI);
|
|
|
|
}
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-07-11 22:59:01 +02:00
|
|
|
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
2018-06-08 00:28:32 +02:00
|
|
|
TII = ST.getInstrInfo();
|
2018-09-12 20:50:47 +02:00
|
|
|
IV = getIsaVersion(ST.getCPU());
|
[AMDGPU] Make generating cache invalidating instructions optional
Summary:
D78800 skipped generating cache invalidating instrucions altogether
on AMDPAL. However, this is sometimes too restrictive - we want a
more flexible option to be able to toggle this behaviour on and off
while we work towards developing a correct implementation of the
alternative memory model.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, dexonsmith, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84448
2020-07-23 19:26:49 +02:00
|
|
|
InsertCacheInv = !AmdgcnSkipCacheInvalidations;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2017-09-07 18:14:21 +02:00
|
|
|
/* static */
|
2018-07-11 22:59:01 +02:00
|
|
|
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
|
|
|
|
GCNSubtarget::Generation Generation = ST.getGeneration();
|
2018-06-08 00:28:32 +02:00
|
|
|
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
2019-08-15 17:54:37 +02:00
|
|
|
return std::make_unique<SIGfx6CacheControl>(ST);
|
2019-05-06 23:57:02 +02:00
|
|
|
if (Generation < AMDGPUSubtarget::GFX10)
|
2019-08-15 17:54:37 +02:00
|
|
|
return std::make_unique<SIGfx7CacheControl>(ST);
|
|
|
|
return std::make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
|
2018-06-08 00:28:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool SIGfx6CacheControl::enableLoadCacheBypass(
|
|
|
|
const MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace) const {
|
|
|
|
assert(MI->mayLoad() && !MI->mayStore());
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
Changed |= enableGLCBit(MI);
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// No cache to bypass.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The scratch address space does not need the global memory caches
|
|
|
|
/// to be bypassed as all memory operations by the same thread are
|
|
|
|
/// sequentially consistent, and no other thread can access scratch
|
|
|
|
/// memory.
|
|
|
|
|
2020-10-15 04:07:56 +02:00
|
|
|
/// Other address spaces do not have a cache.
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
bool SIGfx6CacheControl::enableNonTemporal(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
|
|
|
assert(MI->mayLoad() ^ MI->mayStore());
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
/// TODO: Do not enableGLCBit if rmw atomic.
|
|
|
|
Changed |= enableGLCBit(MI);
|
|
|
|
Changed |= enableSLCBit(MI);
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const {
|
[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78800
2020-04-24 09:56:41 +02:00
|
|
|
if (!InsertCacheInv)
|
|
|
|
return false;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
bool Changed = false;
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if (Pos == Position::AFTER)
|
2017-09-05 18:41:25 +02:00
|
|
|
++MI;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
|
|
|
|
Changed = true;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// No cache to invalidate.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The scratch address space does not need the global memory cache
|
|
|
|
/// to be flushed as all memory operations by the same thread are
|
|
|
|
/// sequentially consistent, and no other thread can access scratch
|
|
|
|
/// memory.
|
2017-09-05 18:41:25 +02:00
|
|
|
|
2020-10-15 04:07:56 +02:00
|
|
|
/// Other address spaces do not have a cache.
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
2017-09-05 18:41:25 +02:00
|
|
|
--MI;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
2017-09-05 18:41:25 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
SIMemOp Op,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const {
|
|
|
|
bool Changed = false;
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if (Pos == Position::AFTER)
|
2017-09-05 18:41:25 +02:00
|
|
|
++MI;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
bool VMCnt = false;
|
|
|
|
bool LGKMCnt = false;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
2019-03-25 21:50:21 +01:00
|
|
|
VMCnt |= true;
|
2018-06-08 00:28:32 +02:00
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The L1 cache keeps all memory operations in order for
|
2018-07-16 12:02:41 +02:00
|
|
|
// wavefronts in the same work-group.
|
2018-06-08 00:28:32 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
2020-10-13 04:06:33 +02:00
|
|
|
// If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
|
|
|
|
// not needed as LDS operations for all waves are executed in a total
|
|
|
|
// global ordering as observed by all waves. Required if also
|
|
|
|
// synchronizing with global/GDS memory as LDS operations could be
|
|
|
|
// reordered with respect to later global/GDS memory operations of the
|
|
|
|
// same wave.
|
2019-03-25 21:50:21 +01:00
|
|
|
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
2018-06-08 00:28:32 +02:00
|
|
|
break;
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The LDS keeps all memory operations in order for
|
|
|
|
// the same wavesfront.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
2020-10-13 04:06:33 +02:00
|
|
|
// If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
|
|
|
|
// is not needed as GDS operations for all waves are executed in a total
|
|
|
|
// global ordering as observed by all waves. Required if also
|
|
|
|
// synchronizing with global/LDS memory as GDS operations could be
|
|
|
|
// reordered with respect to later global/LDS memory operations of the
|
|
|
|
// same wave.
|
2019-03-25 21:50:21 +01:00
|
|
|
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
2018-06-08 00:28:32 +02:00
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The GDS keeps all memory operations in order for
|
|
|
|
// the same work-group.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
2017-09-05 18:41:25 +02:00
|
|
|
|
2019-03-25 21:50:21 +01:00
|
|
|
if (VMCnt || LGKMCnt) {
|
2018-06-08 00:28:32 +02:00
|
|
|
unsigned WaitCntImmediate =
|
|
|
|
AMDGPU::encodeWaitcnt(IV,
|
|
|
|
VMCnt ? 0 : getVmcntBitMask(IV),
|
2019-03-25 21:50:21 +01:00
|
|
|
getExpcntBitMask(IV),
|
2018-06-08 00:28:32 +02:00
|
|
|
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
2017-09-05 18:41:25 +02:00
|
|
|
--MI;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const {
|
|
|
|
return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
|
|
|
|
IsCrossAddrSpaceOrdering, Pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const {
|
[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78800
2020-04-24 09:56:41 +02:00
|
|
|
if (!InsertCacheInv)
|
|
|
|
return false;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
|
2018-12-10 17:35:53 +01:00
|
|
|
const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
|
|
|
|
? AMDGPU::BUFFER_WBINVL1
|
|
|
|
: AMDGPU::BUFFER_WBINVL1_VOL;
|
2018-12-10 17:35:53 +01:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
++MI;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
2020-10-13 04:06:33 +02:00
|
|
|
BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
|
2018-06-08 00:28:32 +02:00
|
|
|
Changed = true;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// No cache to invalidate.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The scratch address space does not need the global memory cache
|
|
|
|
/// to be flushed as all memory operations by the same thread are
|
|
|
|
/// sequentially consistent, and no other thread can access scratch
|
|
|
|
/// memory.
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
/// Other address spaces do not have a cache.
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
--MI;
|
|
|
|
|
|
|
|
return Changed;
|
2017-09-05 18:41:25 +02:00
|
|
|
}
|
|
|
|
|
2019-05-06 23:57:02 +02:00
|
|
|
bool SIGfx10CacheControl::enableLoadCacheBypass(
|
|
|
|
const MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace) const {
|
|
|
|
assert(MI->mayLoad() && !MI->mayStore());
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
/// TODO Do not set glc for rmw atomic operations as they
|
|
|
|
/// implicitly bypass the L0/L1 caches.
|
|
|
|
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
Changed |= enableGLCBit(MI);
|
|
|
|
Changed |= enableDLCBit(MI);
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
// In WGP mode the waves of a work-group can be executing on either CU of
|
|
|
|
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
|
2020-10-13 04:06:33 +02:00
|
|
|
// CU mode all waves of a work-group are on the same CU, and so the L0
|
|
|
|
// does not need to be bypassed.
|
2019-05-06 23:57:02 +02:00
|
|
|
if (!CuMode) Changed |= enableGLCBit(MI);
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// No cache to bypass.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The scratch address space does not need the global memory caches
|
|
|
|
/// to be bypassed as all memory operations by the same thread are
|
|
|
|
/// sequentially consistent, and no other thread can access scratch
|
|
|
|
/// memory.
|
|
|
|
|
2020-10-15 04:07:56 +02:00
|
|
|
/// Other address spaces do not have a cache.
|
2019-05-06 23:57:02 +02:00
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SIGfx10CacheControl::enableNonTemporal(
|
|
|
|
const MachineBasicBlock::iterator &MI) const {
|
|
|
|
assert(MI->mayLoad() ^ MI->mayStore());
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
Changed |= enableSLCBit(MI);
|
|
|
|
/// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2020-10-13 04:06:33 +02:00
|
|
|
bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
Position Pos) const {
|
[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78800
2020-04-24 09:56:41 +02:00
|
|
|
if (!InsertCacheInv)
|
|
|
|
return false;
|
|
|
|
|
2019-05-06 23:57:02 +02:00
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
++MI;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
|
|
|
|
Changed = true;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
// In WGP mode the waves of a work-group can be executing on either CU of
|
|
|
|
// the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
|
|
|
|
// in CU mode and all waves of a work-group are on the same CU, and so the
|
|
|
|
// L0 does not need to be invalidated.
|
|
|
|
if (!CuMode) {
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// No cache to invalidate.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The scratch address space does not need the global memory cache
|
|
|
|
/// to be flushed as all memory operations by the same thread are
|
|
|
|
/// sequentially consistent, and no other thread can access scratch
|
|
|
|
/// memory.
|
|
|
|
|
2020-10-15 04:07:56 +02:00
|
|
|
/// Other address spaces do not have a cache.
|
2019-05-06 23:57:02 +02:00
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
--MI;
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|
|
|
SIAtomicScope Scope,
|
|
|
|
SIAtomicAddrSpace AddrSpace,
|
|
|
|
SIMemOp Op,
|
|
|
|
bool IsCrossAddrSpaceOrdering,
|
|
|
|
Position Pos) const {
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
++MI;
|
|
|
|
|
|
|
|
bool VMCnt = false;
|
|
|
|
bool VSCnt = false;
|
|
|
|
bool LGKMCnt = false;
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
|
|
|
|
VMCnt |= true;
|
|
|
|
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
|
|
|
|
VSCnt |= true;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
// In WGP mode the waves of a work-group can be executing on either CU of
|
|
|
|
// the WGP. Therefore need to wait for operations to complete to ensure
|
|
|
|
// they are visible to waves in the other CU as the L0 is per CU.
|
|
|
|
// Otherwise in CU mode and all waves of a work-group are on the same CU
|
|
|
|
// which shares the same L0.
|
|
|
|
if (!CuMode) {
|
|
|
|
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
|
|
|
|
VMCnt |= true;
|
|
|
|
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
|
|
|
|
VSCnt |= true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The L0 cache keeps all memory operations in order for
|
|
|
|
// work-items in the same wavefront.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
2020-10-13 04:06:33 +02:00
|
|
|
// If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
|
|
|
|
// not needed as LDS operations for all waves are executed in a total
|
|
|
|
// global ordering as observed by all waves. Required if also
|
|
|
|
// synchronizing with global/GDS memory as LDS operations could be
|
|
|
|
// reordered with respect to later global/GDS memory operations of the
|
|
|
|
// same wave.
|
2019-05-06 23:57:02 +02:00
|
|
|
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The LDS keeps all memory operations in order for
|
|
|
|
// the same wavesfront.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
|
|
|
|
switch (Scope) {
|
|
|
|
case SIAtomicScope::SYSTEM:
|
|
|
|
case SIAtomicScope::AGENT:
|
2020-10-13 04:06:33 +02:00
|
|
|
// If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
|
|
|
|
// is not needed as GDS operations for all waves are executed in a total
|
|
|
|
// global ordering as observed by all waves. Required if also
|
|
|
|
// synchronizing with global/LDS memory as GDS operations could be
|
|
|
|
// reordered with respect to later global/LDS memory operations of the
|
|
|
|
// same wave.
|
2019-05-06 23:57:02 +02:00
|
|
|
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
|
|
|
break;
|
|
|
|
case SIAtomicScope::WORKGROUP:
|
|
|
|
case SIAtomicScope::WAVEFRONT:
|
|
|
|
case SIAtomicScope::SINGLETHREAD:
|
|
|
|
// The GDS keeps all memory operations in order for
|
|
|
|
// the same work-group.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported synchronization scope");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (VMCnt || LGKMCnt) {
|
|
|
|
unsigned WaitCntImmediate =
|
|
|
|
AMDGPU::encodeWaitcnt(IV,
|
|
|
|
VMCnt ? 0 : getVmcntBitMask(IV),
|
|
|
|
getExpcntBitMask(IV),
|
|
|
|
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (VSCnt) {
|
|
|
|
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
|
|
|
|
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
|
|
|
|
.addImm(0);
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Pos == Position::AFTER)
|
|
|
|
--MI;
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2017-09-05 18:41:25 +02:00
|
|
|
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
|
|
|
|
if (AtomicPseudoMIs.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (auto &MI : AtomicPseudoMIs)
|
|
|
|
MI->eraseFromParent();
|
|
|
|
|
|
|
|
AtomicPseudoMIs.clear();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-05 18:18:05 +02:00
|
|
|
bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
|
2017-08-19 20:44:27 +02:00
|
|
|
MachineBasicBlock::iterator &MI) {
|
|
|
|
assert(MI->mayLoad() && !MI->mayStore());
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
bool Changed = false;
|
2017-08-19 20:44:27 +02:00
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
if (MOI.isAtomic()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
|
|
|
|
Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace());
|
2017-09-07 18:14:21 +02:00
|
|
|
}
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
|
|
|
Changed |= CC->insertWait(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
SIMemOp::LOAD | SIMemOp::STORE,
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::BEFORE);
|
|
|
|
|
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
|
|
|
|
Changed |= CC->insertWait(MI, MOI.getScope(),
|
|
|
|
MOI.getInstrAddrSpace(),
|
|
|
|
SIMemOp::LOAD,
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::AFTER);
|
2020-10-13 04:06:33 +02:00
|
|
|
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
Position::AFTER);
|
2017-08-19 20:44:27 +02:00
|
|
|
}
|
2017-09-07 18:14:21 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
2017-09-05 21:01:10 +02:00
|
|
|
|
2017-09-07 19:14:54 +02:00
|
|
|
// Atomic instructions do not have the nontemporal attribute.
|
|
|
|
if (MOI.isNonTemporal()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
Changed |= CC->enableNonTemporal(MI);
|
2017-09-07 19:14:54 +02:00
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2017-09-05 18:18:05 +02:00
|
|
|
bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
|
2017-08-19 20:44:27 +02:00
|
|
|
MachineBasicBlock::iterator &MI) {
|
|
|
|
assert(!MI->mayLoad() && MI->mayStore());
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
bool Changed = false;
|
2017-09-05 21:01:10 +02:00
|
|
|
|
|
|
|
if (MOI.isAtomic()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Release ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
2020-10-13 04:06:33 +02:00
|
|
|
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::BEFORE);
|
2017-09-05 21:01:10 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
2017-09-05 21:01:10 +02:00
|
|
|
|
2017-09-07 19:14:54 +02:00
|
|
|
// Atomic instructions do not have the nontemporal attribute.
|
|
|
|
if (MOI.isNonTemporal()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
Changed |= CC->enableNonTemporal(MI);
|
2017-09-07 19:14:54 +02:00
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2017-09-05 18:18:05 +02:00
|
|
|
bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
|
2017-08-19 20:44:27 +02:00
|
|
|
MachineBasicBlock::iterator &MI) {
|
|
|
|
assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
|
2017-07-21 23:19:23 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
AtomicPseudoMIs.push_back(MI);
|
2017-07-21 23:19:23 +02:00
|
|
|
bool Changed = false;
|
2017-09-05 21:01:10 +02:00
|
|
|
|
|
|
|
if (MOI.isAtomic()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::Release ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
|
|
|
/// TODO: This relies on a barrier always generating a waitcnt
|
|
|
|
/// for LDS to ensure it is not reordered with the completion of
|
|
|
|
/// the proceeding LDS operations. If barrier had a memory
|
|
|
|
/// ordering and memory scope, then library does not need to
|
|
|
|
/// generate a fence. Could add support in this file for
|
|
|
|
/// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
|
2020-10-13 04:06:33 +02:00
|
|
|
/// adding S_WAITCNT before a S_BARRIER.
|
|
|
|
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::BEFORE);
|
|
|
|
|
|
|
|
// TODO: If both release and invalidate are happening they could be combined
|
|
|
|
// to use the single "BUFFER_WBL2" instruction. This could be done by
|
|
|
|
// reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
|
|
|
|
// track cache invalidate and write back instructions.
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
2020-10-13 04:06:33 +02:00
|
|
|
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
Position::BEFORE);
|
2017-09-07 18:14:21 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
2017-09-05 21:01:10 +02:00
|
|
|
|
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
2018-02-09 07:05:33 +01:00
|
|
|
bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
|
|
|
|
MachineBasicBlock::iterator &MI) {
|
2017-07-21 23:19:23 +02:00
|
|
|
assert(MI->mayLoad() && MI->mayStore());
|
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
|
2017-09-05 21:01:10 +02:00
|
|
|
if (MOI.isAtomic()) {
|
2018-06-08 00:28:32 +02:00
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Release ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
|
|
|
|
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
|
2020-10-13 04:06:33 +02:00
|
|
|
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::BEFORE);
|
2018-06-08 00:28:32 +02:00
|
|
|
|
|
|
|
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
|
|
|
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
|
|
|
|
MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
|
|
|
|
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
|
|
|
|
Changed |= CC->insertWait(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
isAtomicRet(*MI) ? SIMemOp::LOAD :
|
|
|
|
SIMemOp::STORE,
|
|
|
|
MOI.getIsCrossAddressSpaceOrdering(),
|
|
|
|
Position::AFTER);
|
2020-10-13 04:06:33 +02:00
|
|
|
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
|
|
|
MOI.getOrderingAddrSpace(),
|
|
|
|
Position::AFTER);
|
2017-09-05 21:01:10 +02:00
|
|
|
}
|
2017-09-07 18:14:21 +02:00
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
2017-09-05 21:01:10 +02:00
|
|
|
|
|
|
|
return Changed;
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
bool Changed = false;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
SIMemOpAccess MOA(MF);
|
2018-07-11 22:59:01 +02:00
|
|
|
CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
|
2017-07-21 23:19:23 +02:00
|
|
|
|
|
|
|
for (auto &MBB : MF) {
|
|
|
|
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
|
2020-01-13 23:54:17 +01:00
|
|
|
|
|
|
|
if (MI->getOpcode() == TargetOpcode::BUNDLE && MI->mayLoadOrStore()) {
|
|
|
|
MachineBasicBlock::instr_iterator II(MI->getIterator());
|
|
|
|
for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
|
|
|
|
I != E && I->isBundledWithPred(); ++I) {
|
|
|
|
I->unbundleFromPred();
|
|
|
|
for (MachineOperand &MO : I->operands())
|
|
|
|
if (MO.isReg())
|
|
|
|
MO.setIsInternalRead(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
MI->eraseFromParent();
|
|
|
|
MI = II->getIterator();
|
|
|
|
}
|
|
|
|
|
2017-07-21 23:19:23 +02:00
|
|
|
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
|
|
|
|
continue;
|
|
|
|
|
2018-06-08 00:28:32 +02:00
|
|
|
if (const auto &MOI = MOA.getLoadInfo(MI))
|
2017-08-18 19:30:02 +02:00
|
|
|
Changed |= expandLoad(MOI.getValue(), MI);
|
2018-06-08 00:28:32 +02:00
|
|
|
else if (const auto &MOI = MOA.getStoreInfo(MI))
|
2017-08-18 19:30:02 +02:00
|
|
|
Changed |= expandStore(MOI.getValue(), MI);
|
2018-06-08 00:28:32 +02:00
|
|
|
else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
|
2017-08-19 20:44:27 +02:00
|
|
|
Changed |= expandAtomicFence(MOI.getValue(), MI);
|
2018-06-08 00:28:32 +02:00
|
|
|
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
|
2018-02-09 07:05:33 +01:00
|
|
|
Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
|
2017-07-21 23:19:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Changed |= removeAtomicPseudoMIs();
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
|
|
|
|
|
|
|
|
char SIMemoryLegalizer::ID = 0;
|
|
|
|
char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
|
|
|
|
|
|
|
|
FunctionPass *llvm::createSIMemoryLegalizerPass() {
|
|
|
|
return new SIMemoryLegalizer();
|
|
|
|
}
|