1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00
llvm-mirror/lib/Target/AMDGPU/SISchedule.td
Tom Stellard 69c2b6ab61 AMDGPU/SI: Improve MachineSchedModel definition
This patch contains a few improvements to the model, including:

- Using a single resource with a defined buffers size for each memory unit.
- Setting the IssueWidth correctly.
- Fixing latency values for memory instructions.

shader-db stats:

16429 shaders in 3231 tests
Totals:
SGPRS: 318232 -> 312328 (-1.86 %)
VGPRS: 208996 -> 209346 (0.17 %)
Code Size: 7147044 -> 7166440 (0.27 %) bytes
LDS: 83 -> 83 (0.00 %) blocks
Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave
Max Waves: 49182 -> 49243 (0.12 %)
Wait states: 0 -> 0 (0.00 %)A

Differential Revision: http://reviews.llvm.org/D18453

llvm-svn: 264877
2016-03-30 16:35:13 +00:00

118 lines
3.3 KiB
TableGen

//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// MachineModel definitions for Southern Islands (SI)
//
//===----------------------------------------------------------------------===//
def WriteBranch : SchedWrite;
def WriteExport : SchedWrite;
def WriteLDS : SchedWrite;
def WriteSALU : SchedWrite;
def WriteSMEM : SchedWrite;
def WriteVMEM : SchedWrite;
def WriteBarrier : SchedWrite;
// Vector ALU instructions
def Write32Bit : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
// Slow quarter rate f64 instruction.
def WriteDouble : SchedWrite;
// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
// FIXME: Should there be a class for instructions which are VALU
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
class SISchedMachineModel : SchedMachineModel {
let CompleteModel = 0;
let IssueWidth = 1;
}
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
let BufferSize = 1;
}
def HWExport : ProcResource<1> {
let BufferSize = 7; // Taken from S_WAITCNT
}
def HWLGKM : ProcResource<1> {
let BufferSize = 31; // Taken from S_WAITCNT
}
def HWSALU : ProcResource<1> {
let BufferSize = 1;
}
def HWVMEM : ProcResource<1> {
let BufferSize = 15; // Taken from S_WAITCNT
}
def HWVALU : ProcResource<1> {
let BufferSize = 1;
}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
int latency> : WriteRes<write, resources> {
let Latency = latency;
}
class HWVALUWriteRes<SchedWrite write, int latency> :
HWWriteRes<write, [HWVALU], latency>;
// The latency numbers are taken from AMD Accelerated Parallel Processing
// guide. They may not be accurate.
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
def : HWWriteRes<WriteBranch, [HWBranch], 8>;
def : HWWriteRes<WriteExport, [HWExport], 4>;
def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
def : HWWriteRes<WriteSALU, [HWSALU], 1>;
def : HWWriteRes<WriteSMEM, [HWLGKM], 5>;
def : HWWriteRes<WriteVMEM, [HWVMEM], 80>;
def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}
let SchedModel = SIFullSpeedModel in {
defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
} // End SchedModel = SIFullSpeedModel
let SchedModel = SIQuarterSpeedModel in {
defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
} // End SchedModel = SIQuarterSpeedModel