mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[AMDGPU] Reserve VGPRs for trap handler usage if instructed
Differential Revision: http://reviews.llvm.org/D19235 llvm-svn: 267563
This commit is contained in:
parent
635064e42f
commit
a8b24aaab2
@ -328,6 +328,13 @@ def FeatureDebuggerInsertNops : SubtargetFeature<
|
||||
"Insert two nop instructions for each high level source statement"
|
||||
>;
|
||||
|
||||
def FeatureDebuggerReserveTrapRegs : SubtargetFeature<
|
||||
"amdgpu-debugger-reserve-trap-regs",
|
||||
"DebuggerReserveTrapVGPRs",
|
||||
"true",
|
||||
"Reserve VGPRs for trap handler usage"
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -235,6 +235,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
|
||||
" bytes/workgroup (compile time only)", false);
|
||||
|
||||
OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
|
||||
false);
|
||||
OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
|
||||
false);
|
||||
|
||||
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
|
||||
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
|
||||
false);
|
||||
@ -472,6 +477,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
|
||||
MaxSGPR += ExtraSGPRs;
|
||||
|
||||
// Update necessary Reserved* fields and max VGPRs used if
|
||||
// "amdgpu-debugger-reserved-trap-regs" was specified.
|
||||
if (STM.debuggerReserveTrapVGPRs()) {
|
||||
ProgInfo.ReservedVGPRFirst = MaxVGPR + 1;
|
||||
ProgInfo.ReservedVGPRCount = STM.debuggerReserveTrapVGPRCount();
|
||||
MaxVGPR += STM.debuggerReserveTrapVGPRCount();
|
||||
}
|
||||
|
||||
// We found the maximum register index. They start at 0, so add one to get the
|
||||
// number of registers.
|
||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||
@ -694,6 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
|
||||
header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
|
||||
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
|
||||
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
|
||||
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
|
@ -40,6 +40,8 @@ private:
|
||||
NumVGPR(0),
|
||||
NumSGPR(0),
|
||||
FlatUsed(false),
|
||||
ReservedVGPRFirst(0),
|
||||
ReservedVGPRCount(0),
|
||||
VCCUsed(false),
|
||||
CodeLen(0) {}
|
||||
|
||||
@ -67,6 +69,9 @@ private:
|
||||
uint32_t LDSSize;
|
||||
bool FlatUsed;
|
||||
|
||||
uint16_t ReservedVGPRFirst;
|
||||
uint16_t ReservedVGPRCount;
|
||||
|
||||
// Bonus information for debugging.
|
||||
bool VCCUsed;
|
||||
uint64_t CodeLen;
|
||||
|
@ -98,7 +98,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
LDSBankCount(0),
|
||||
IsaVersion(ISAVersion0_0_0),
|
||||
EnableSIScheduler(false),
|
||||
DebuggerInsertNops(false),
|
||||
DebuggerInsertNops(false), DebuggerReserveTrapVGPRs(false),
|
||||
FrameLowering(nullptr),
|
||||
GISel(),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
|
||||
|
@ -96,6 +96,7 @@ private:
|
||||
unsigned IsaVersion;
|
||||
bool EnableSIScheduler;
|
||||
bool DebuggerInsertNops;
|
||||
bool DebuggerReserveTrapVGPRs;
|
||||
|
||||
std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
|
||||
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
|
||||
@ -309,6 +310,14 @@ public:
|
||||
return DebuggerInsertNops;
|
||||
}
|
||||
|
||||
bool debuggerReserveTrapVGPRs() const {
|
||||
return DebuggerReserveTrapVGPRs;
|
||||
}
|
||||
|
||||
unsigned debuggerReserveTrapVGPRCount() const {
|
||||
return debuggerReserveTrapVGPRs() ? 4 : 0;
|
||||
}
|
||||
|
||||
bool dumpCode() const {
|
||||
return DumpCode;
|
||||
}
|
||||
|
@ -193,6 +193,17 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
|
||||
}
|
||||
|
||||
// Reserve VGPRs for trap handler usage if "amdgpu-debugger-reserve-trap-regs"
|
||||
// attribute was specified.
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.debuggerReserveTrapVGPRs()) {
|
||||
for (unsigned i = MaxWorkGroupVGPRCount - ST.debuggerReserveTrapVGPRCount();
|
||||
i < MaxWorkGroupVGPRCount; ++i) {
|
||||
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
|
||||
reserveRegisterTuples(Reserved, Reg);
|
||||
}
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
|
37
test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
Normal file
37
test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-trap-regs -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK: reserved_vgpr_count = 4
|
||||
; CHECK: ReservedVGPRCount: 4
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @debugger_reserve_trap_regs(i32 addrspace(1)* %A) #0 {
|
||||
entry:
|
||||
%A.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
|
||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0
|
||||
store i32 1, i32 addrspace(1)* %arrayidx, align 4
|
||||
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1
|
||||
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
|
||||
%2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2
|
||||
store i32 3, i32 addrspace(1)* %arrayidx2, align 4
|
||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4
|
||||
store i32 4, i32 addrspace(1)* %arrayidx3, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!opencl.kernels = !{!0}
|
||||
!llvm.ident = !{!6}
|
||||
|
||||
!0 = !{void (i32 addrspace(1)*)* @debugger_reserve_trap_regs, !1, !2, !3, !4, !5}
|
||||
!1 = !{!"kernel_arg_addr_space", i32 1}
|
||||
!2 = !{!"kernel_arg_access_qual", !"none"}
|
||||
!3 = !{!"kernel_arg_type", !"int*"}
|
||||
!4 = !{!"kernel_arg_base_type", !"int*"}
|
||||
!5 = !{!"kernel_arg_type_qual", !""}
|
||||
!6 = !{!"clang version 3.9.0 (trunk 266639)"}
|
Loading…
Reference in New Issue
Block a user