mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[AMDGPU] Avoid use of V_READLANE into EXEC in SGPR spills
Always prefer to clobber input SGPRs and restore them after the spill. This applies to both spills to VGPRs and scratch. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D81914
This commit is contained in:
parent
8f7a8fd1c3
commit
f2504628bd
@ -1269,6 +1269,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
if (RI.isSGPRClass(RC)) {
|
||||
MFI->setHasSpilledSGPRs();
|
||||
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
|
||||
assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
|
||||
SrcReg != AMDGPU::EXEC && "exec should not be spilled");
|
||||
|
||||
// We are only allowed to create one new instruction when spilling
|
||||
// registers, so we need to use pseudo instruction for spilling SGPRs.
|
||||
@ -1278,7 +1280,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
// to make sure we are using the correct register class.
|
||||
if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) {
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
|
||||
}
|
||||
|
||||
BuildMI(MBB, MI, DL, OpDesc)
|
||||
@ -1401,13 +1403,15 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
if (RI.isSGPRClass(RC)) {
|
||||
MFI->setHasSpilledSGPRs();
|
||||
assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
|
||||
assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
|
||||
DestReg != AMDGPU::EXEC && "exec should not be spilled");
|
||||
|
||||
// FIXME: Maybe this should not include a memoperand because it will be
|
||||
// lowered to non-memory instructions.
|
||||
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
|
||||
if (DestReg.isVirtual() && SpillSize == 4) {
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
|
||||
}
|
||||
|
||||
if (RI.spillSGPRToVGPR())
|
||||
@ -7021,20 +7025,24 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
|
||||
// %0 may even spill. We can't spill $m0 normally (it would require copying to
|
||||
// a numbered SGPR anyway), and since it is in the SReg_32 register class,
|
||||
// TargetInstrInfo::foldMemoryOperand() is going to try.
|
||||
// A similar issue also exists with spilling and reloading $exec registers.
|
||||
//
|
||||
// To prevent that, constrain the %0 register class here.
|
||||
if (MI.isFullCopy()) {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
|
||||
if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
|
||||
MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
|
||||
MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
return nullptr;
|
||||
if ((DstReg.isVirtual() || SrcReg.isVirtual()) &&
|
||||
(DstReg.isVirtual() != SrcReg.isVirtual())) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
Register VirtReg = DstReg.isVirtual() ? DstReg : SrcReg;
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(VirtReg);
|
||||
if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) {
|
||||
MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
|
||||
return nullptr;
|
||||
} else if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) {
|
||||
MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -887,68 +887,41 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
||||
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
|
||||
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
|
||||
unsigned FirstPart = isWave32 ? Offset * 16 : Offset * 32;
|
||||
unsigned FirstPart = Offset * 32;
|
||||
unsigned ExecLane = 0;
|
||||
|
||||
bool IsKill = MI->getOperand(0).isKill();
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
|
||||
const bool SuperRegIsExec =
|
||||
SuperReg == AMDGPU::EXEC || SuperReg == AMDGPU::EXEC_LO;
|
||||
|
||||
// If exec mask is stored in the VGPR, make sure it is stored after
|
||||
// any lanes used by the spill (16 lanes on Wave32, 32 lanes on Wave64).
|
||||
const unsigned ExecLoLane = SuperRegIsExec ? 0 : (isWave32 ? 16 : 32);
|
||||
const unsigned ExecHiLane = SuperRegIsExec ? 1 : (isWave32 ? 17 : 33);
|
||||
|
||||
// Try to use the src/dst SGPRs to hold a copy of the exec mask.
|
||||
// Use VGPR lanes when this is not possible, i.e. the src value
|
||||
// must be valid after the spill or src is smaller than exec mask.
|
||||
bool StoreExecInVGPR = !IsLoad && (SuperRegIsExec || !IsKill);
|
||||
// Cannot handle load/store to EXEC
|
||||
assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
|
||||
SuperReg != AMDGPU::EXEC && "exec should never spill");
|
||||
|
||||
// On Wave32 only handle EXEC_LO.
|
||||
// On Wave64 only update EXEC_HI if there is sufficent space for a copy.
|
||||
bool OnlyExecLo = isWave32 || NumSubRegs == 1;
|
||||
bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI;
|
||||
|
||||
unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||
Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||
Register SavedExecReg;
|
||||
|
||||
// Backup EXEC
|
||||
if (SuperRegIsExec) {
|
||||
// Do nothing; exec is already stored in VGPR or will be overwritten
|
||||
} else if (StoreExecInVGPR) {
|
||||
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
||||
VGPR)
|
||||
.addReg(AMDGPU::EXEC_LO)
|
||||
.addImm(ExecLoLane)
|
||||
.addReg(VGPR, getUndefRegState(IsLoad));
|
||||
|
||||
if (!isWave32) {
|
||||
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
||||
VGPR)
|
||||
.addReg(AMDGPU::EXEC_HI)
|
||||
.addImm(ExecHiLane)
|
||||
.addReg(VGPR);
|
||||
}
|
||||
if (OnlyExecLo) {
|
||||
SavedExecReg = NumSubRegs == 1
|
||||
? SuperReg
|
||||
: getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]);
|
||||
} else {
|
||||
if (OnlyExecLo) {
|
||||
SavedExecReg = NumSubRegs == 1
|
||||
? SuperReg
|
||||
: getSubReg(SuperReg, SplitParts[FirstPart]);
|
||||
} else {
|
||||
SavedExecReg =
|
||||
getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart]),
|
||||
AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass);
|
||||
// If src/dst is an odd size it is possible subreg0 is not aligned.
|
||||
if (!SavedExecReg && NumSubRegs > 2)
|
||||
SavedExecReg =
|
||||
getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart + 1]),
|
||||
AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass);
|
||||
// If src/dst is an odd size it is possible subreg0 is not aligned.
|
||||
for (; ExecLane < (NumSubRegs - 1); ++ExecLane) {
|
||||
SavedExecReg = getMatchingSuperReg(
|
||||
getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0,
|
||||
&AMDGPU::SReg_64_XEXECRegClass);
|
||||
if (SavedExecReg)
|
||||
break;
|
||||
}
|
||||
|
||||
assert(SavedExecReg);
|
||||
BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg);
|
||||
}
|
||||
assert(SavedExecReg);
|
||||
BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg);
|
||||
|
||||
// Setup EXEC
|
||||
BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes);
|
||||
@ -976,34 +949,34 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
Offset * EltSize, MMO,
|
||||
RS);
|
||||
} else {
|
||||
buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
|
||||
Index,
|
||||
VGPR, !StoreExecInVGPR,
|
||||
MFI->getScratchRSrcReg(), FrameReg,
|
||||
Offset * EltSize, MMO,
|
||||
RS);
|
||||
buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, Index, VGPR,
|
||||
IsKill, MFI->getScratchRSrcReg(), FrameReg,
|
||||
Offset * EltSize, MMO, RS);
|
||||
// This only ever adds one VGPR spill
|
||||
MFI->addToSpilledVGPRs(1);
|
||||
}
|
||||
|
||||
// Restore EXEC
|
||||
if (SuperRegIsExec && IsLoad) {
|
||||
// Do nothing; exec will be overwritten
|
||||
} else if (StoreExecInVGPR) {
|
||||
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||
AMDGPU::EXEC_LO)
|
||||
.addReg(VGPR, getKillRegState(!IsLoad && isWave32))
|
||||
.addImm(ExecLoLane);
|
||||
if (!isWave32) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg)
|
||||
.addReg(SavedExecReg, getKillRegState(IsLoad || IsKill));
|
||||
|
||||
// Restore clobbered SGPRs
|
||||
if (IsLoad) {
|
||||
// Nothing to do; register will be overwritten
|
||||
} else if (!IsKill) {
|
||||
// Restore SGPRs from appropriate VGPR lanes
|
||||
if (!OnlyExecLo) {
|
||||
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||
AMDGPU::EXEC_HI)
|
||||
.addReg(VGPR, getKillRegState(!IsLoad))
|
||||
.addImm(ExecHiLane);
|
||||
getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1]))
|
||||
.addReg(VGPR)
|
||||
.addImm(ExecLane + 1);
|
||||
}
|
||||
} else {
|
||||
assert(SavedExecReg);
|
||||
BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg)
|
||||
.addReg(SavedExecReg, RegState::Kill);
|
||||
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||
NumSubRegs == 1
|
||||
? SavedExecReg
|
||||
: getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]))
|
||||
.addReg(VGPR, RegState::Kill)
|
||||
.addImm(ExecLane);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1032,6 +1005,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
||||
SuperReg != MFI->getFrameOffsetReg()));
|
||||
|
||||
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
||||
assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
|
||||
SuperReg != AMDGPU::EXEC && "exec should never spill");
|
||||
|
||||
unsigned EltSize = 4;
|
||||
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
|
||||
@ -1069,11 +1044,12 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
||||
// Scavenged temporary VGPR to use. It must be scavenged once for any number
|
||||
// of spilled subregs.
|
||||
Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
||||
RS->setRegUsed(TmpVGPR);
|
||||
|
||||
// SubReg carries the "Kill" flag when SubReg == SuperReg.
|
||||
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
|
||||
|
||||
unsigned PerVGPR = isWave32 ? 16 : 32;
|
||||
unsigned PerVGPR = 32;
|
||||
unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
|
||||
int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
|
||||
|
||||
@ -1138,6 +1114,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
||||
Register SuperReg = MI->getOperand(0).getReg();
|
||||
|
||||
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
|
||||
assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
|
||||
SuperReg != AMDGPU::EXEC && "exec should never spill");
|
||||
|
||||
unsigned EltSize = 4;
|
||||
|
||||
@ -1157,14 +1135,14 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
||||
SubReg)
|
||||
.addReg(Spill.VGPR)
|
||||
.addImm(Spill.Lane);
|
||||
|
||||
if (NumSubRegs > 1 && i == 0)
|
||||
MIB.addReg(SuperReg, RegState::ImplicitDefine);
|
||||
}
|
||||
} else {
|
||||
Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
||||
RS->setRegUsed(TmpVGPR);
|
||||
|
||||
unsigned PerVGPR = isWave32 ? 16 : 32;
|
||||
unsigned PerVGPR = 32;
|
||||
unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
|
||||
int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
|
||||
|
||||
@ -1186,7 +1164,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
||||
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
|
||||
.addReg(TmpVGPR, getKillRegState(LastSubReg))
|
||||
.addImm(i);
|
||||
|
||||
if (NumSubRegs > 1 && i == 0)
|
||||
MIB.addReg(SuperReg, RegState::ImplicitDefine);
|
||||
}
|
||||
|
152
test/CodeGen/AMDGPU/fold-reload-into-exec.mir
Normal file
152
test/CodeGen/AMDGPU/fold-reload-into-exec.mir
Normal file
@ -0,0 +1,152 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stress-regalloc=2 -start-before=greedy -stop-after=virtregmap -o - %s | FileCheck %s
|
||||
|
||||
# Test that a spill of a copy of exec is not folded to be a spill of exec directly.
|
||||
|
||||
---
|
||||
|
||||
name: merge_sgpr_spill_into_copy_from_exec_lo
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def $exec_lo
|
||||
; CHECK: $sgpr0 = S_MOV_B32 $exec_lo
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def $exec_lo
|
||||
%0:sreg_32 = COPY $exec_lo
|
||||
S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
|
||||
$exec_lo = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_sgpr_spill_into_copy_from_exec_hi
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def $exec_hi
|
||||
; CHECK: $sgpr0 = S_MOV_B32 $exec_hi
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def $exec_hi
|
||||
%0:sreg_32 = COPY $exec_hi
|
||||
S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
|
||||
$exec_hi = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_sgpr_spill_into_copy_from_exec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def $exec
|
||||
; CHECK: $sgpr0_sgpr1 = S_MOV_B64 $exec
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr1, 1, killed $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1
|
||||
; CHECK: $sgpr1 = V_READLANE_B32_vi $vgpr0, 1
|
||||
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1
|
||||
; CHECK: $sgpr1 = V_READLANE_B32_vi killed $vgpr0, 1
|
||||
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def $exec
|
||||
%0:sreg_64 = COPY $exec
|
||||
S_NOP 0, implicit-def %1:sreg_64, implicit-def %2:sreg_64, implicit %0
|
||||
$exec = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# Test that a reload into a copy of exec is not folded to be a reload of exec directly.
|
||||
|
||||
---
|
||||
|
||||
name: reload_sgpr_spill_into_copy_to_exec_lo
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
|
||||
S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
|
||||
$exec_lo = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: reload_sgpr_spill_into_copy_to_exec_hi
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
|
||||
S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
|
||||
$exec_hi = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: reload_sgpr_spill_into_copy_to_exec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr1, 1, killed $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1
|
||||
; CHECK: $sgpr1 = V_READLANE_B32_vi $vgpr0, 1
|
||||
; CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1
|
||||
; CHECK: $sgpr1 = V_READLANE_B32_vi killed $vgpr0, 1
|
||||
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec
|
||||
S_NOP 0, implicit %0, implicit-def %3:sreg_64, implicit-def %4:sreg_64
|
||||
$exec = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
@ -8,15 +8,15 @@
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
|
||||
# CHECK: $exec_lo = S_MOV_B32 1
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
||||
# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
|
||||
|
||||
# S32 without kill
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
|
||||
# CHECK: $exec_lo = S_MOV_B32 1
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
||||
# CHECK: $exec_lo = V_READLANE
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
||||
# CHECK: $sgpr12 = V_READLANE
|
||||
|
||||
# S64 with kill
|
||||
# CHECK: V_WRITELANE
|
||||
@ -25,20 +25,22 @@
|
||||
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 3
|
||||
# GCN64: $exec = S_MOV_B64 3
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
||||
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
||||
|
||||
# S64 without kill
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# GCN64: V_WRITELANE
|
||||
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
||||
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 3
|
||||
# GCN64: $exec = S_MOV_B64 3
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
||||
# CHECK: $exec_lo = V_READLANE
|
||||
# GCN64: $exec_hi = V_READLANE
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
||||
# GCN32: $exec_lo = S_MOV_B32 $sgpr12
|
||||
# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13
|
||||
# GCN64: $sgpr13 = V_READLANE
|
||||
# CHECK: $sgpr12 = V_READLANE
|
||||
|
||||
# S96
|
||||
# CHECK: V_WRITELANE
|
||||
@ -48,7 +50,7 @@
|
||||
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 7
|
||||
# GCN64: $exec = S_MOV_B64 7
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
||||
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
||||
|
||||
@ -61,7 +63,7 @@
|
||||
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 15
|
||||
# GCN64: $exec = S_MOV_B64 15
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
||||
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
||||
|
||||
@ -138,33 +140,28 @@
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
|
||||
# GCN32: $exec_lo = S_MOV_B32 65535
|
||||
# GCN32: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# CHECK: V_WRITELANE
|
||||
# GCN32: $sgpr80 = S_MOV_B32 $exec_lo
|
||||
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 65535
|
||||
# GCN32: $exec_lo = S_MOV_B32 4294967295
|
||||
# GCN64: $exec = S_MOV_B64 4294967295
|
||||
# GCN32: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 164
|
||||
# GCN64: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr80
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
|
||||
# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
|
||||
|
||||
--- |
|
||||
@ -350,7 +347,7 @@ body: |
|
||||
# S1024
|
||||
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
|
||||
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
|
||||
# GCN32: $exec_lo = S_MOV_B32 65535
|
||||
# GCN32: $exec_lo = S_MOV_B32 4294967295
|
||||
# GCN64: $exec = S_MOV_B64 4294967295
|
||||
# CHECK: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
|
||||
@ -371,10 +368,6 @@ body: |
|
||||
# CHECK: $sgpr77 = V_READLANE
|
||||
# CHECK: $sgpr78 = V_READLANE
|
||||
# CHECK: $sgpr79 = V_READLANE
|
||||
# GCN32: $sgpr80 = S_MOV_B32 $exec_lo
|
||||
# GCN32: $exec_lo = S_MOV_B32 65535
|
||||
# GCN32: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 164
|
||||
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr80
|
||||
# CHECK: $sgpr80 = V_READLANE
|
||||
# CHECK: $sgpr81 = V_READLANE
|
||||
# CHECK: $sgpr82 = V_READLANE
|
||||
|
@ -1,10 +1,15 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=TOVGPR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; These tests check that the compiler won't crash when it needs to spill
|
||||
; SGPRs.
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
|
||||
; Make sure there are no direct spills for EXEC registers before WQM
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, exec_lo
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, exec_hi
|
||||
|
||||
; GCN: s_wqm
|
||||
|
||||
; Make sure not emitting unused scratch resource descriptor setup
|
||||
@ -16,6 +21,13 @@
|
||||
|
||||
; Writing to M0 from an SMRD instruction will hang the GPU.
|
||||
; GCN-NOT: s_buffer_load_dword m0
|
||||
|
||||
; Make sure there are no direct spills/reloads for EXEC registers
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, exec_lo
|
||||
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, exec_hi
|
||||
; GCN-NOT: v_readlane_b32 exec_lo
|
||||
; GCN-NOT: v_readlane_b32 exec_hi
|
||||
|
||||
; GCN: s_endpgm
|
||||
|
||||
; TOVGPR: ScratchSize: 0{{$}}
|
||||
|
@ -7,10 +7,6 @@
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @check_exec() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "frame-pointer"="all" }
|
||||
...
|
||||
---
|
||||
@ -53,12 +49,12 @@ body: |
|
||||
; GFX9: $vcc = IMPLICIT_DEF
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit $vcc
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 32, $vgpr0
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 33, $vgpr0
|
||||
; GFX9: $vcc = S_MOV_B64 $exec
|
||||
; GFX9: $exec = S_MOV_B64 3
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 32
|
||||
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 33
|
||||
; GFX9: $exec = S_MOV_B64 $vcc
|
||||
; GFX9: $vcc_hi = V_READLANE_B32_vi $vgpr0, 1
|
||||
; GFX9: $vcc_lo = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; GFX9: $vcc = IMPLICIT_DEF
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit killed $vcc
|
||||
@ -83,12 +79,12 @@ body: |
|
||||
; GFX10: $vcc = IMPLICIT_DEF
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit $vcc
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 32, $vgpr0
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 33, $vgpr0
|
||||
; GFX10: $vcc = S_MOV_B64 $exec
|
||||
; GFX10: $exec = S_MOV_B64 3
|
||||
; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 32
|
||||
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 33
|
||||
; GFX10: $exec = S_MOV_B64 $vcc
|
||||
; GFX10: $vcc_hi = V_READLANE_B32_gfx10 $vgpr0, 1
|
||||
; GFX10: $vcc_lo = V_READLANE_B32_gfx10 killed $vgpr0, 0
|
||||
; GFX10: $vcc = IMPLICIT_DEF
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit killed $vcc
|
||||
@ -110,72 +106,3 @@ body: |
|
||||
|
||||
$vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
||||
...
|
||||
---
|
||||
name: check_exec
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr4_sgpr5' }
|
||||
- { reg: '$sgpr6_sgpr7' }
|
||||
- { reg: '$sgpr8' }
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
stack:
|
||||
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
waveLimiter: true
|
||||
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
frameOffsetReg: '$sgpr33'
|
||||
argumentInfo:
|
||||
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
dispatchPtr: { reg: '$sgpr4_sgpr5' }
|
||||
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
|
||||
workGroupIDX: { reg: '$sgpr8' }
|
||||
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
|
||||
|
||||
; CHECK-LABEL: name: check_exec
|
||||
; CHECK: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9
|
||||
|
||||
; GFX9: $sgpr33 = S_MOV_B32 0
|
||||
; GFX9: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $sgpr14 = S_MOV_B32 4294967295, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $sgpr15 = S_MOV_B32 14680064, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0, implicit $exec
|
||||
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 1, $vgpr0, implicit $exec
|
||||
; GFX9: $exec = S_MOV_B64 3
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0
|
||||
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1
|
||||
; GFX9: $exec = S_MOV_B64 3
|
||||
; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
|
||||
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0, implicit-def $exec
|
||||
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1
|
||||
|
||||
; GFX10: $sgpr33 = S_MOV_B32 0
|
||||
; GFX10: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $sgpr99 = S_MOV_B32 836853760, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0, implicit $exec
|
||||
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 1, $vgpr0, implicit $exec
|
||||
; GFX10: $exec = S_MOV_B64 3
|
||||
; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0
|
||||
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1
|
||||
; GFX10: $exec = S_MOV_B64 3
|
||||
; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
|
||||
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0, implicit-def $exec
|
||||
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1
|
||||
SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
||||
|
||||
$exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user