mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[AMDGPU] Remove -amdgpu-spill-sgpr-to-smem.
Summary: The implementation was never completed and never used except in tests. Reviewers: arsenm, mareko Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69163 llvm-svn: 375293
This commit is contained in:
parent
46e0f8f422
commit
6298616030
@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl::opt<bool> EnableSpillSGPRToSMEM(
|
|
||||||
"amdgpu-spill-sgpr-to-smem",
|
|
||||||
cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
|
|
||||||
cl::init(false));
|
|
||||||
|
|
||||||
static cl::opt<bool> EnableSpillSGPRToVGPR(
|
static cl::opt<bool> EnableSpillSGPRToVGPR(
|
||||||
"amdgpu-spill-sgpr-to-vgpr",
|
"amdgpu-spill-sgpr-to-vgpr",
|
||||||
cl::desc("Enable spilling VGPRs to SGPRs"),
|
cl::desc("Enable spilling VGPRs to SGPRs"),
|
||||||
@ -65,14 +60,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
|
|||||||
SGPRPressureSets(getNumRegPressureSets()),
|
SGPRPressureSets(getNumRegPressureSets()),
|
||||||
VGPRPressureSets(getNumRegPressureSets()),
|
VGPRPressureSets(getNumRegPressureSets()),
|
||||||
AGPRPressureSets(getNumRegPressureSets()),
|
AGPRPressureSets(getNumRegPressureSets()),
|
||||||
SpillSGPRToVGPR(false),
|
SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
|
||||||
SpillSGPRToSMEM(false),
|
|
||||||
isWave32(ST.isWave32()) {
|
isWave32(ST.isWave32()) {
|
||||||
if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
|
|
||||||
SpillSGPRToSMEM = true;
|
|
||||||
else if (EnableSpillSGPRToVGPR)
|
|
||||||
SpillSGPRToVGPR = true;
|
|
||||||
|
|
||||||
unsigned NumRegPressureSets = getNumRegPressureSets();
|
unsigned NumRegPressureSets = getNumRegPressureSets();
|
||||||
|
|
||||||
SGPRSetID = NumRegPressureSets;
|
SGPRSetID = NumRegPressureSets;
|
||||||
@ -759,22 +748,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
|
|
||||||
bool Store) {
|
|
||||||
if (SuperRegSize % 16 == 0) {
|
|
||||||
return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
|
|
||||||
AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SuperRegSize % 8 == 0) {
|
|
||||||
return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
|
|
||||||
AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
|
|
||||||
}
|
|
||||||
|
|
||||||
return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
|
|
||||||
AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
||||||
int Index,
|
int Index,
|
||||||
RegScavenger *RS,
|
RegScavenger *RS,
|
||||||
@ -799,38 +772,16 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
|||||||
|
|
||||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||||
|
|
||||||
bool SpillToSMEM = spillSGPRToSMEM();
|
|
||||||
if (SpillToSMEM && OnlyToVGPR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Register FrameReg = getFrameRegister(*MF);
|
|
||||||
|
|
||||||
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
|
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
|
||||||
SuperReg != MFI->getFrameOffsetReg() &&
|
SuperReg != MFI->getFrameOffsetReg() &&
|
||||||
SuperReg != MFI->getScratchWaveOffsetReg()));
|
SuperReg != MFI->getScratchWaveOffsetReg()));
|
||||||
|
|
||||||
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
||||||
|
|
||||||
unsigned OffsetReg = AMDGPU::M0;
|
|
||||||
unsigned M0CopyReg = AMDGPU::NoRegister;
|
unsigned M0CopyReg = AMDGPU::NoRegister;
|
||||||
|
|
||||||
if (SpillToSMEM) {
|
|
||||||
if (RS->isRegUsed(AMDGPU::M0)) {
|
|
||||||
M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
|
|
||||||
.addReg(AMDGPU::M0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned ScalarStoreOp;
|
|
||||||
unsigned EltSize = 4;
|
unsigned EltSize = 4;
|
||||||
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
||||||
if (SpillToSMEM && isSGPRClass(RC)) {
|
|
||||||
// XXX - if private_element_size is larger than 4 it might be useful to be
|
|
||||||
// able to spill wider vmem spills.
|
|
||||||
std::tie(EltSize, ScalarStoreOp) =
|
|
||||||
getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
|
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
|
||||||
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
|
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
|
||||||
@ -845,47 +796,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
|||||||
Register SubReg =
|
Register SubReg =
|
||||||
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
|
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
|
||||||
|
|
||||||
if (SpillToSMEM) {
|
|
||||||
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
|
|
||||||
|
|
||||||
// The allocated memory size is really the wavefront size * the frame
|
|
||||||
// index size. The widest register class is 64 bytes, so a 4-byte scratch
|
|
||||||
// allocation is enough to spill this in a single stack object.
|
|
||||||
//
|
|
||||||
// FIXME: Frame size/offsets are computed earlier than this, so the extra
|
|
||||||
// space is still unnecessarily allocated.
|
|
||||||
|
|
||||||
unsigned Align = FrameInfo.getObjectAlignment(Index);
|
|
||||||
MachinePointerInfo PtrInfo
|
|
||||||
= MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
|
|
||||||
MachineMemOperand *MMO
|
|
||||||
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
|
|
||||||
EltSize, MinAlign(Align, EltSize * i));
|
|
||||||
|
|
||||||
// SMEM instructions only support a single offset, so increment the wave
|
|
||||||
// offset.
|
|
||||||
|
|
||||||
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
|
|
||||||
if (Offset != 0) {
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
|
||||||
.addReg(FrameReg)
|
|
||||||
.addImm(Offset);
|
|
||||||
} else {
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
|
||||||
.addReg(FrameReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
|
|
||||||
.addReg(SubReg, getKillRegState(IsKill)) // sdata
|
|
||||||
.addReg(MFI->getScratchRSrcReg()) // sbase
|
|
||||||
.addReg(OffsetReg, RegState::Kill) // soff
|
|
||||||
.addImm(0) // glc
|
|
||||||
.addImm(0) // dlc
|
|
||||||
.addMemOperand(MMO);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SpillToVGPR) {
|
if (SpillToVGPR) {
|
||||||
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
|
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
|
||||||
|
|
||||||
@ -914,10 +824,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Spill SGPR to a frame index.
|
// Spill SGPR to a frame index.
|
||||||
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
|
|
||||||
if (!TmpVGPR.isValid())
|
if (!TmpVGPR.isValid())
|
||||||
TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
||||||
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
|
|
||||||
|
|
||||||
MachineInstrBuilder Mov
|
MachineInstrBuilder Mov
|
||||||
= BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
= BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||||
@ -979,82 +887,24 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
|||||||
const DebugLoc &DL = MI->getDebugLoc();
|
const DebugLoc &DL = MI->getDebugLoc();
|
||||||
|
|
||||||
Register SuperReg = MI->getOperand(0).getReg();
|
Register SuperReg = MI->getOperand(0).getReg();
|
||||||
bool SpillToSMEM = spillSGPRToSMEM();
|
|
||||||
if (SpillToSMEM && OnlyToVGPR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
||||||
|
|
||||||
unsigned OffsetReg = AMDGPU::M0;
|
|
||||||
unsigned M0CopyReg = AMDGPU::NoRegister;
|
unsigned M0CopyReg = AMDGPU::NoRegister;
|
||||||
|
|
||||||
if (SpillToSMEM) {
|
|
||||||
if (RS->isRegUsed(AMDGPU::M0)) {
|
|
||||||
M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
|
|
||||||
.addReg(AMDGPU::M0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned EltSize = 4;
|
unsigned EltSize = 4;
|
||||||
unsigned ScalarLoadOp;
|
|
||||||
|
|
||||||
Register FrameReg = getFrameRegister(*MF);
|
|
||||||
|
|
||||||
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
||||||
if (SpillToSMEM && isSGPRClass(RC)) {
|
|
||||||
// XXX - if private_element_size is larger than 4 it might be useful to be
|
|
||||||
// able to spill wider vmem spills.
|
|
||||||
std::tie(EltSize, ScalarLoadOp) =
|
|
||||||
getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
|
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
|
||||||
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
|
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
|
||||||
|
|
||||||
// SubReg carries the "Kill" flag when SubReg == SuperReg.
|
|
||||||
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
|
|
||||||
|
|
||||||
Register TmpVGPR;
|
Register TmpVGPR;
|
||||||
|
|
||||||
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
|
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
|
||||||
Register SubReg =
|
Register SubReg =
|
||||||
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
|
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
|
||||||
|
|
||||||
if (SpillToSMEM) {
|
|
||||||
// FIXME: Size may be > 4 but extra bytes wasted.
|
|
||||||
unsigned Align = FrameInfo.getObjectAlignment(Index);
|
|
||||||
MachinePointerInfo PtrInfo
|
|
||||||
= MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
|
|
||||||
MachineMemOperand *MMO
|
|
||||||
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
|
|
||||||
EltSize, MinAlign(Align, EltSize * i));
|
|
||||||
|
|
||||||
// Add i * 4 offset
|
|
||||||
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
|
|
||||||
if (Offset != 0) {
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
|
||||||
.addReg(FrameReg)
|
|
||||||
.addImm(Offset);
|
|
||||||
} else {
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
|
||||||
.addReg(FrameReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto MIB =
|
|
||||||
BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
|
|
||||||
.addReg(MFI->getScratchRSrcReg()) // sbase
|
|
||||||
.addReg(OffsetReg, RegState::Kill) // soff
|
|
||||||
.addImm(0) // glc
|
|
||||||
.addImm(0) // dlc
|
|
||||||
.addMemOperand(MMO);
|
|
||||||
|
|
||||||
if (NumSubRegs > 1 && i == 0)
|
|
||||||
MIB.addReg(SuperReg, RegState::ImplicitDefine);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SpillToVGPR) {
|
if (SpillToVGPR) {
|
||||||
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
|
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
|
||||||
auto MIB =
|
auto MIB =
|
||||||
|
@ -35,7 +35,6 @@ private:
|
|||||||
BitVector VGPRPressureSets;
|
BitVector VGPRPressureSets;
|
||||||
BitVector AGPRPressureSets;
|
BitVector AGPRPressureSets;
|
||||||
bool SpillSGPRToVGPR;
|
bool SpillSGPRToVGPR;
|
||||||
bool SpillSGPRToSMEM;
|
|
||||||
bool isWave32;
|
bool isWave32;
|
||||||
|
|
||||||
void classifyPressureSet(unsigned PSetID, unsigned Reg,
|
void classifyPressureSet(unsigned PSetID, unsigned Reg,
|
||||||
@ -47,10 +46,6 @@ public:
|
|||||||
return SpillSGPRToVGPR;
|
return SpillSGPRToVGPR;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool spillSGPRToSMEM() const {
|
|
||||||
return SpillSGPRToSMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the end register initially reserved for the scratch buffer in case
|
/// Return the end register initially reserved for the scratch buffer in case
|
||||||
/// spilling is needed.
|
/// spilling is needed.
|
||||||
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
|
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s
|
|
||||||
|
|
||||||
; FIXME: SGPR-to-SMEM requires an additional SGPR always to scavenge m0
|
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}max_9_sgprs:
|
|
||||||
; ALL: SGPRBlocks: 1
|
|
||||||
; ALL: NumSGPRsForWavesPerEU: 9
|
|
||||||
define amdgpu_kernel void @max_9_sgprs() #0 {
|
|
||||||
%one = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%two = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%three = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%four = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%five = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%six = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%seven = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%eight = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%nine = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
%ten = load volatile i32, i32 addrspace(4)* undef
|
|
||||||
call void asm sideeffect "", "s,s,s,s,s,s,s,s"(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight)
|
|
||||||
store volatile i32 %one, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %two, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %three, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %four, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %five, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %six, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %seven, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %eight, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %nine, i32 addrspace(1)* undef
|
|
||||||
store volatile i32 %ten, i32 addrspace(1)* undef
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
|
|
@ -1,10 +1,6 @@
|
|||||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
|
||||||
|
|
||||||
; If spilling to smem, additional registers are used for the resource
|
|
||||||
; descriptor.
|
|
||||||
|
|
||||||
; FIXME: Vectorization can increase required SGPR count beyond limit.
|
; FIXME: Vectorization can increase required SGPR count beyond limit.
|
||||||
; FIXME: SGPR-to-SMEM requires an additional SGPR always to scavenge m0
|
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}max_9_sgprs:
|
; ALL-LABEL: {{^}}max_9_sgprs:
|
||||||
|
|
||||||
@ -55,13 +51,6 @@ define amdgpu_kernel void @max_9_sgprs() #0 {
|
|||||||
; XTOSGPR: SGPRBlocks: 1
|
; XTOSGPR: SGPRBlocks: 1
|
||||||
; XTOSGPR: NumSGPRsForWavesPerEU: 16
|
; XTOSGPR: NumSGPRsForWavesPerEU: 16
|
||||||
|
|
||||||
; XTOSMEM: s_mov_b64 s[10:11], s[2:3]
|
|
||||||
; XTOSMEM: s_mov_b64 s[8:9], s[0:1]
|
|
||||||
; XTOSMEM: s_mov_b32 s7, s13
|
|
||||||
|
|
||||||
; XTOSMEM: SGPRBlocks: 1
|
|
||||||
; XTOSMEM: NumSGPRsForWavesPerEU: 16
|
|
||||||
;
|
|
||||||
; This test case is disabled: When calculating the spillslot addresses AMDGPU
|
; This test case is disabled: When calculating the spillslot addresses AMDGPU
|
||||||
; creates an extra vreg to save/restore m0 which in a point of maximum register
|
; creates an extra vreg to save/restore m0 which in a point of maximum register
|
||||||
; pressure would trigger an endless loop; the compiler aborts earlier with
|
; pressure would trigger an endless loop; the compiler aborts earlier with
|
||||||
@ -101,10 +90,6 @@ define amdgpu_kernel void @max_9_sgprs() #0 {
|
|||||||
; ; swapping the order the registers are copied from what normally
|
; ; swapping the order the registers are copied from what normally
|
||||||
; ; happens.
|
; ; happens.
|
||||||
|
|
||||||
; XTOSMEM: s_mov_b32 s5, s11
|
|
||||||
; XTOSMEM: s_add_u32 m0, s5,
|
|
||||||
; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
|
|
||||||
|
|
||||||
; XALL: SGPRBlocks: 2
|
; XALL: SGPRBlocks: 2
|
||||||
; XALL: NumSGPRsForWavesPerEU: 18
|
; XALL: NumSGPRsForWavesPerEU: 18
|
||||||
;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
|
;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-WavefrontSize32,+WavefrontSize64 -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
|
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
|
|
||||||
|
|
||||||
; Make sure this doesn't crash.
|
; Make sure this doesn't crash.
|
||||||
; ALL-LABEL: {{^}}test:
|
; ALL-LABEL: {{^}}test:
|
||||||
@ -14,15 +13,6 @@
|
|||||||
; SGPR-NEXT: s_nop 4
|
; SGPR-NEXT: s_nop 4
|
||||||
; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
|
; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
|
||||||
|
|
||||||
; Make sure scratch wave offset register is correctly incremented and
|
|
||||||
; then restored.
|
|
||||||
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Spill
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Reload
|
|
||||||
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; ALL: s_endpgm
|
; ALL: s_endpgm
|
||||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
|
define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
|
||||||
call void asm sideeffect "", "~{s[0:7]}" ()
|
call void asm sideeffect "", "~{s[0:7]}" ()
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
||||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s
|
|
||||||
|
|
||||||
; XXX - Why does it like to use vcc?
|
; XXX - Why does it like to use vcc?
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}spill_m0:
|
; GCN-LABEL: {{^}}spill_m0:
|
||||||
; TOSMEM: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
|
|
||||||
; TOSMEM: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
|
|
||||||
|
|
||||||
; GCN-DAG: s_cmp_lg_u32
|
; GCN-DAG: s_cmp_lg_u32
|
||||||
|
|
||||||
@ -19,11 +16,6 @@
|
|||||||
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
|
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
|
||||||
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Spill
|
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Spill
|
||||||
|
|
||||||
; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x300{{$}}
|
|
||||||
; TOSMEM-NOT: [[M0_COPY]]
|
|
||||||
; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill
|
|
||||||
|
|
||||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||||
|
|
||||||
; GCN: [[ENDIF]]:
|
; GCN: [[ENDIF]]:
|
||||||
@ -35,11 +27,6 @@
|
|||||||
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
|
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
|
||||||
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
||||||
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x300{{$}}
|
|
||||||
; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload
|
|
||||||
; TOSMEM-NOT: [[M0_RESTORE]]
|
|
||||||
; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
|
||||||
|
|
||||||
; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
|
; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
|
||||||
define amdgpu_kernel void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
|
define amdgpu_kernel void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||||
entry:
|
entry:
|
||||||
@ -64,26 +51,6 @@ endif:
|
|||||||
; GCN: s_mov_b32 m0, s6
|
; GCN: s_mov_b32 m0, s6
|
||||||
; GCN: v_interp_mov_f32
|
; GCN: v_interp_mov_f32
|
||||||
|
|
||||||
; TOSMEM-NOT: s_m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s7, 0x100
|
|
||||||
; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
|
|
||||||
; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s7, 0x300
|
|
||||||
; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
|
|
||||||
; TOSMEM: s_mov_b64 exec,
|
|
||||||
; TOSMEM: s_cbranch_execz
|
|
||||||
; TOSMEM: s_branch
|
|
||||||
|
|
||||||
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
|
|
||||||
; TOSMEM: s_add_u32 m0, s7, 0x500
|
|
||||||
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
|
|
||||||
|
|
||||||
|
|
||||||
; GCN-NOT: v_readlane_b32 m0
|
; GCN-NOT: v_readlane_b32 m0
|
||||||
; GCN-NOT: s_buffer_store_dword m0
|
; GCN-NOT: s_buffer_store_dword m0
|
||||||
; GCN-NOT: s_buffer_load_dword m0
|
; GCN-NOT: s_buffer_load_dword m0
|
||||||
@ -110,101 +77,6 @@ endif: ; preds = %else, %if
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Force save and restore of m0 during SMEM spill
|
|
||||||
; GCN-LABEL: {{^}}m0_unavailable_spill:
|
|
||||||
|
|
||||||
; GCN: ; def m0, 1
|
|
||||||
|
|
||||||
; GCN: s_mov_b32 m0, s2
|
|
||||||
; GCN: v_interp_mov_f32
|
|
||||||
|
|
||||||
; GCN: ; clobber m0
|
|
||||||
|
|
||||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
|
||||||
; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
|
|
||||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
|
||||||
|
|
||||||
; TOSMEM: s_mov_b64 exec,
|
|
||||||
; TOSMEM: s_cbranch_execz
|
|
||||||
; TOSMEM: s_branch
|
|
||||||
|
|
||||||
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
|
||||||
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
|
|
||||||
|
|
||||||
; GCN-NOT: v_readlane_b32 m0
|
|
||||||
; GCN-NOT: s_buffer_store_dword m0
|
|
||||||
; GCN-NOT: s_buffer_load_dword m0
|
|
||||||
define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
|
|
||||||
main_body:
|
|
||||||
%m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0
|
|
||||||
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
|
|
||||||
call void asm sideeffect "; clobber $0", "~{m0}"() #0
|
|
||||||
%cmp = fcmp ueq float 0.000000e+00, %tmp
|
|
||||||
br i1 %cmp, label %if, label %else
|
|
||||||
|
|
||||||
if: ; preds = %main_body
|
|
||||||
store volatile i32 8, i32 addrspace(1)* undef
|
|
||||||
br label %endif
|
|
||||||
|
|
||||||
else: ; preds = %main_body
|
|
||||||
store volatile i32 11, i32 addrspace(1)* undef
|
|
||||||
br label %endif
|
|
||||||
|
|
||||||
endif:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}restore_m0_lds:
|
|
||||||
; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
|
|
||||||
; TOSMEM: s_cmp_eq_u32
|
|
||||||
; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
|
||||||
; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x200
|
|
||||||
; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; TOSMEM: s_mov_b32 m0, -1
|
|
||||||
|
|
||||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x200
|
|
||||||
; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
|
|
||||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
|
||||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
|
||||||
|
|
||||||
; TOSMEM: ds_write_b64
|
|
||||||
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
|
||||||
; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload
|
|
||||||
; FIXME-TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
|
||||||
; TOSMEM-NOT: m0
|
|
||||||
; TOSMEM: s_mov_b32 m0, s0
|
|
||||||
; TOSMEM: ; use m0
|
|
||||||
|
|
||||||
; TOSMEM: s_dcache_wb
|
|
||||||
; TOSMEM: s_endpgm
|
|
||||||
define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
|
|
||||||
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
|
|
||||||
%sval = load volatile i64, i64 addrspace(4)* undef
|
|
||||||
%cmp = icmp eq i32 %arg, 0
|
|
||||||
br i1 %cmp, label %ret, label %bb
|
|
||||||
|
|
||||||
bb:
|
|
||||||
store volatile i64 %sval, i64 addrspace(3)* undef
|
|
||||||
call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0
|
|
||||||
br label %ret
|
|
||||||
|
|
||||||
ret:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
||||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs -stop-before=prologepilog < %s
|
|
||||||
|
|
||||||
; Spill to SMEM clobbers M0. Check that the implicit-def dead operand is present
|
|
||||||
; in the pseudo instructions.
|
|
||||||
|
|
||||||
; CHECK-LABEL: {{^}}spill_sgpr:
|
|
||||||
; CHECK: SI_SPILL_S32_SAVE {{.*}}, implicit-def dead %m0
|
|
||||||
; CHECK: SI_SPILL_S32_RESTORE {{.*}}, implicit-def dead %m0
|
|
||||||
define amdgpu_kernel void @spill_sgpr(i32 addrspace(1)* %out, i32 %in) #0 {
|
|
||||||
%sgpr = call i32 asm sideeffect "; def $0", "=s" () #0
|
|
||||||
%cmp = icmp eq i32 %in, 0
|
|
||||||
br i1 %cmp, label %bb0, label %ret
|
|
||||||
|
|
||||||
bb0:
|
|
||||||
call void asm sideeffect "; use $0", "s"(i32 %sgpr) #0
|
|
||||||
br label %ret
|
|
||||||
|
|
||||||
ret:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
|
@ -1,21 +1,7 @@
|
|||||||
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s
|
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s
|
||||||
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
|
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s
|
||||||
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s
|
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}spill_sgpr_x2:
|
; ALL-LABEL: {{^}}spill_sgpr_x2:
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill
|
|
||||||
; SMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload
|
|
||||||
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; SMEM: s_endpgm
|
|
||||||
|
|
||||||
; FIXME: Should only need 4 bytes
|
|
||||||
; SMEM: ScratchSize: 12
|
|
||||||
|
|
||||||
|
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||||
@ -24,6 +10,7 @@
|
|||||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
|
||||||
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||||
|
|
||||||
|
|
||||||
; VMEM: buffer_store_dword
|
; VMEM: buffer_store_dword
|
||||||
; VMEM: buffer_store_dword
|
; VMEM: buffer_store_dword
|
||||||
; VMEM: s_cbranch_scc1
|
; VMEM: s_cbranch_scc1
|
||||||
@ -44,21 +31,6 @@ ret:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}spill_sgpr_x3:
|
; ALL-LABEL: {{^}}spill_sgpr_x3:
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; SMEM: s_endpgm
|
|
||||||
|
|
||||||
; FIXME: Should only need 4 bytes
|
|
||||||
; SMEM: ScratchSize: 16
|
|
||||||
|
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||||
@ -92,17 +64,6 @@ ret:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}spill_sgpr_x4:
|
; ALL-LABEL: {{^}}spill_sgpr_x4:
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS:[0-9]+:[0-9]+]]{{\]}}, m0 ; 16-byte Folded Spill
|
|
||||||
; SMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS]]{{\]}}, m0 ; 16-byte Folded Reload
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; SMEM: s_endpgm
|
|
||||||
|
|
||||||
; FIXME: Should only need 4 bytes
|
|
||||||
; SMEM: ScratchSize: 20
|
|
||||||
|
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||||
@ -140,25 +101,6 @@ ret:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; ALL-LABEL: {{^}}spill_sgpr_x5:
|
; ALL-LABEL: {{^}}spill_sgpr_x5:
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_buffer_store_dword s
|
|
||||||
; SMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_buffer_load_dword s
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; SMEM: s_endpgm
|
|
||||||
|
|
||||||
; FIXME: Should only need 4 bytes
|
|
||||||
; SMEM: ScratchSize: 24
|
|
||||||
|
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||||
@ -201,22 +143,6 @@ ret:
|
|||||||
|
|
||||||
; ALL-LABEL: {{^}}spill_sgpr_x8:
|
; ALL-LABEL: {{^}}spill_sgpr_x8:
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS:[0-9]+:[0-9]+]]{{\]}}, m0 ; 16-byte Folded Spill
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
|
||||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS]]{{\]}}, m0 ; 16-byte Folded Spill
|
|
||||||
; SMEM: s_cbranch_scc1
|
|
||||||
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
|
||||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS]]{{\]}}, m0 ; 16-byte Folded Reload
|
|
||||||
; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
|
||||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[VALS]]{{\]}}, m0 ; 16-byte Folded Reload
|
|
||||||
|
|
||||||
; SMEM: s_dcache_wb
|
|
||||||
; SMEM: s_endpgm
|
|
||||||
|
|
||||||
; SMEM: ScratchSize: 36
|
|
||||||
|
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
|
||||||
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
|
||||||
|
Loading…
Reference in New Issue
Block a user