1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00
llvm-mirror/lib/Target/X86/X86ScheduleZnver1.td
Craig Topper 66c70e9248 AMD znver1 Initial Scheduler model
Summary:
This patch adds the following
1. Adds a skeleton scheduler model for AMD Znver1.
2. Introduces the znver1 execution units and pipes.
3. Caters the instructions based on the generic scheduler classes.
4. Further additions to the scheduler model with instruction itineraries will be carried out incrementally based on
        a. Instructions types
        b. Registers used
5. Since itineraries are not added based on instructions, throughput information are bound to change when incremental changes are added.
6. Scheduler testcases are modified accordingly to suit the new model.

Patch by Ganesh Gopalasubramanian. With minor formatting tweaks from me.

Reviewers: craig.topper, RKSimon

Subscribers: javed.absar, shivaram, ddibyend, vprasad

Differential Revision: https://reviews.llvm.org/D35293

llvm-svn: 308411
2017-07-19 02:45:14 +00:00

224 lines
7.3 KiB
TableGen

//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Znver1 to support instruction
// scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def Znver1Model : SchedMachineModel {
// Zen can decode 4 instructions per cycle.
let IssueWidth = 4;
// Based on the reorder buffer we define MicroOpBufferSize
let MicroOpBufferSize = 192;
let LoadLatency = 4;
let MispredictPenalty = 17;
let HighLatency = 25;
let PostRAScheduler = 1;
// FIXME: This variable is required for incomplete model.
// We haven't catered all instructions.
// So, we reset the value of this variable so as to
// say that the model is incomplete.
let CompleteModel = 0;
}
let SchedModel = Znver1Model in {
// Zen can issue micro-ops to 10 different units in one cycle.
// These are
// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
// * Two AGU units (ZAGU0, ZAGU1)
// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
// AGUs feed load store queues @two loads and 1 store per cycle.
// Four ALU units are defined below
def ZnALU0 : ProcResource<1>;
def ZnALU1 : ProcResource<1>;
def ZnALU2 : ProcResource<1>;
def ZnALU3 : ProcResource<1>;
// Two AGU units are defined below
def ZnAGU0 : ProcResource<1>;
def ZnAGU1 : ProcResource<1>;
// Four FPU units are defined below
def ZnFPU0 : ProcResource<1>;
def ZnFPU1 : ProcResource<1>;
def ZnFPU2 : ProcResource<1>;
def ZnFPU3 : ProcResource<1>;
// FPU grouping
def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
// Below are the grouping of the units.
// Micro-ops to be issued to multiple units are tackled this way.
// ALU grouping
// ZnALU03 - 0,3 grouping
def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
// 56 Entry (14x4 entries) Int Scheduler
def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
let BufferSize=56;
}
// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
// but are relevant for some instructions
def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
let BufferSize=28;
}
// Integer Multiplication issued on ALU1.
def ZnMultiplier : ProcResource<1>;
// Integer division issued on ALU2.
def ZnDivider : ProcResource<1>;
// 4 Cycles load-to use Latency is captured
def : ReadAdvance<ReadAfterLd, 4>;
// (a folded load is an instruction that loads and does some operation)
// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops.
// a. load and
// b. addpd
// This multiclass is for folded loads for integer units.
multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
ProcResourceKind ExePort,
int Lat> {
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
// Memory variant also uses a cycle on ZnAGU
// adds 4 cycles to the latency.
def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
let Latency = !add(Lat, 4);
}
}
// This multiclass is for folded loads for floating point units.
multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
ProcResourceKind ExePort,
int Lat> {
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
// Memory variant also uses a cycle on ZnAGU
// adds 7 cycles to the latency.
def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
let Latency = !add(Lat, 7);
}
}
// WriteRMW is set for instructions with Memory write
// operation in codegen
def : WriteRes<WriteRMW, [ZnAGU]>;
def : WriteRes<WriteStore, [ZnAGU]>;
def : WriteRes<WriteMove, [ZnALU]>;
def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
// IDIV
def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
let Latency = 41;
let ResourceCycles = [1, 41];
}
def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
let Latency = 45;
let ResourceCycles = [1, 4, 41];
}
// IMUL
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
let Latency = 4;
}
def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
let Latency = 4;
}
def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
// Floating point operations
defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
// Vector integer operations which uses FPU units
defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
// AES Instructions.
defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
def : WriteRes<WriteFence, [ZnAGU]>;
def : WriteRes<WriteNop, []>;
// Following instructions with latency=100 are microcoded.
// We set long latency so as to block the entire pipeline.
defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
//Microcoded Instructions
let Latency = 100 in {
def : WriteRes<WriteMicrocoded, []>;
def : WriteRes<WriteSystem, []>;
def : WriteRes<WriteMPSAD, []>;
def : WriteRes<WriteMPSADLd, []>;
def : WriteRes<WriteCLMul, []>;
def : WriteRes<WriteCLMulLd, []>;
def : WriteRes<WritePCmpIStrM, []>;
def : WriteRes<WritePCmpIStrMLd, []>;
def : WriteRes<WritePCmpEStrI, []>;
def : WriteRes<WritePCmpEStrILd, []>;
def : WriteRes<WritePCmpEStrM, []>;
def : WriteRes<WritePCmpEStrMLd, []>;
def : WriteRes<WritePCmpIStrI, []>;
def : WriteRes<WritePCmpIStrILd, []>;
}
}