mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
66c70e9248
Summary: This patch adds the following 1. Adds a skeleton scheduler model for AMD Znver1. 2. Introduces the znver1 execution units and pipes. 3. Caters the instructions based on the generic scheduler classes. 4. Further additions to the scheduler model with instruction itineraries will be carried out incrementally based on a. Instructions types b. Registers used 5. Since itineraries are not added based on instructions, throughput information are bound to change when incremental changes are added. 6. Scheduler testcases are modified accordingly to suit the new model. Patch by Ganesh Gopalasubramanian. With minor formatting tweaks from me. Reviewers: craig.topper, RKSimon Subscribers: javed.absar, shivaram, ddibyend, vprasad Differential Revision: https://reviews.llvm.org/D35293 llvm-svn: 308411
224 lines
7.3 KiB
TableGen
224 lines
7.3 KiB
TableGen
//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the machine model for Znver1 to support instruction
|
|
// scheduling and other instruction cost heuristics.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
def Znver1Model : SchedMachineModel {
|
|
// Zen can decode 4 instructions per cycle.
|
|
let IssueWidth = 4;
|
|
// Based on the reorder buffer we define MicroOpBufferSize
|
|
let MicroOpBufferSize = 192;
|
|
let LoadLatency = 4;
|
|
let MispredictPenalty = 17;
|
|
let HighLatency = 25;
|
|
let PostRAScheduler = 1;
|
|
|
|
// FIXME: This variable is required for incomplete model.
|
|
// We haven't catered all instructions.
|
|
// So, we reset the value of this variable so as to
|
|
// say that the model is incomplete.
|
|
let CompleteModel = 0;
|
|
}
|
|
|
|
let SchedModel = Znver1Model in {
|
|
|
|
// Zen can issue micro-ops to 10 different units in one cycle.
|
|
// These are
|
|
// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
|
|
// * Two AGU units (ZAGU0, ZAGU1)
|
|
// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
|
|
// AGUs feed load store queues @two loads and 1 store per cycle.
|
|
|
|
// Four ALU units are defined below
|
|
def ZnALU0 : ProcResource<1>;
|
|
def ZnALU1 : ProcResource<1>;
|
|
def ZnALU2 : ProcResource<1>;
|
|
def ZnALU3 : ProcResource<1>;
|
|
|
|
// Two AGU units are defined below
|
|
def ZnAGU0 : ProcResource<1>;
|
|
def ZnAGU1 : ProcResource<1>;
|
|
|
|
// Four FPU units are defined below
|
|
def ZnFPU0 : ProcResource<1>;
|
|
def ZnFPU1 : ProcResource<1>;
|
|
def ZnFPU2 : ProcResource<1>;
|
|
def ZnFPU3 : ProcResource<1>;
|
|
|
|
// FPU grouping
|
|
def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
|
|
def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
|
|
def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
|
|
def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
|
|
def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
|
|
def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
|
|
def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
|
|
def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
|
|
|
|
// Below are the grouping of the units.
|
|
// Micro-ops to be issued to multiple units are tackled this way.
|
|
|
|
// ALU grouping
|
|
// ZnALU03 - 0,3 grouping
|
|
def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
|
|
|
|
// 56 Entry (14x4 entries) Int Scheduler
|
|
def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
|
|
let BufferSize=56;
|
|
}
|
|
|
|
// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
|
|
// but are relevant for some instructions
|
|
def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
|
|
let BufferSize=28;
|
|
}
|
|
|
|
// Integer Multiplication issued on ALU1.
|
|
def ZnMultiplier : ProcResource<1>;
|
|
|
|
// Integer division issued on ALU2.
|
|
def ZnDivider : ProcResource<1>;
|
|
|
|
// 4 Cycles load-to use Latency is captured
|
|
def : ReadAdvance<ReadAfterLd, 4>;
|
|
|
|
// (a folded load is an instruction that loads and does some operation)
|
|
// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
|
|
// Instructions with folded loads are usually micro-fused, so they only appear
|
|
// as two micro-ops.
|
|
// a. load and
|
|
// b. addpd
|
|
// This multiclass is for folded loads for integer units.
|
|
multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
|
|
ProcResourceKind ExePort,
|
|
int Lat> {
|
|
// Register variant takes 1-cycle on Execution Port.
|
|
def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
|
|
|
|
// Memory variant also uses a cycle on ZnAGU
|
|
// adds 4 cycles to the latency.
|
|
def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
|
|
let Latency = !add(Lat, 4);
|
|
}
|
|
}
|
|
|
|
// This multiclass is for folded loads for floating point units.
|
|
multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
|
ProcResourceKind ExePort,
|
|
int Lat> {
|
|
// Register variant takes 1-cycle on Execution Port.
|
|
def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
|
|
|
|
// Memory variant also uses a cycle on ZnAGU
|
|
// adds 7 cycles to the latency.
|
|
def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
|
|
let Latency = !add(Lat, 7);
|
|
}
|
|
}
|
|
|
|
// WriteRMW is set for instructions with Memory write
|
|
// operation in codegen
|
|
def : WriteRes<WriteRMW, [ZnAGU]>;
|
|
|
|
def : WriteRes<WriteStore, [ZnAGU]>;
|
|
def : WriteRes<WriteMove, [ZnALU]>;
|
|
def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
|
|
|
|
def : WriteRes<WriteZero, []>;
|
|
def : WriteRes<WriteLEA, [ZnALU]>;
|
|
defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
|
|
defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
|
|
defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
|
|
|
|
// IDIV
|
|
def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
|
|
let Latency = 41;
|
|
let ResourceCycles = [1, 41];
|
|
}
|
|
|
|
def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
|
|
let Latency = 45;
|
|
let ResourceCycles = [1, 4, 41];
|
|
}
|
|
|
|
// IMUL
|
|
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
|
|
let Latency = 4;
|
|
}
|
|
def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
|
|
let Latency = 4;
|
|
}
|
|
|
|
def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
|
|
let Latency = 8;
|
|
}
|
|
|
|
// Floating point operations
|
|
defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
|
|
defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
|
|
defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
|
|
defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
|
|
defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
|
|
defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
|
|
defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
|
|
defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
|
|
defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
|
|
defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
|
|
defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
|
|
defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
|
|
defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
|
|
defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
|
|
|
|
// Vector integer operations which uses FPU units
|
|
defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
|
|
defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
|
|
defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
|
|
defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
|
|
defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
|
|
defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
|
|
defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
|
|
defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
|
|
|
|
// Vector Shift Operations
|
|
defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
|
|
|
|
// AES Instructions.
|
|
defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
|
|
defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
|
|
defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
|
|
|
|
def : WriteRes<WriteFence, [ZnAGU]>;
|
|
def : WriteRes<WriteNop, []>;
|
|
|
|
// Following instructions with latency=100 are microcoded.
|
|
// We set long latency so as to block the entire pipeline.
|
|
defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
|
|
|
|
//Microcoded Instructions
|
|
let Latency = 100 in {
|
|
def : WriteRes<WriteMicrocoded, []>;
|
|
def : WriteRes<WriteSystem, []>;
|
|
def : WriteRes<WriteMPSAD, []>;
|
|
def : WriteRes<WriteMPSADLd, []>;
|
|
def : WriteRes<WriteCLMul, []>;
|
|
def : WriteRes<WriteCLMulLd, []>;
|
|
def : WriteRes<WritePCmpIStrM, []>;
|
|
def : WriteRes<WritePCmpIStrMLd, []>;
|
|
def : WriteRes<WritePCmpEStrI, []>;
|
|
def : WriteRes<WritePCmpEStrILd, []>;
|
|
def : WriteRes<WritePCmpEStrM, []>;
|
|
def : WriteRes<WritePCmpEStrMLd, []>;
|
|
def : WriteRes<WritePCmpIStrI, []>;
|
|
def : WriteRes<WritePCmpIStrILd, []>;
|
|
}
|
|
}
|