1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00
llvm-mirror/lib/Target/ARM/ARMScheduleM4.td
David Green 2f19a7836e [ARM] Cortex-M4 schedule additions
This is an attempt to fill in some of the missing instructions from the
Cortex-M4 schedule, and make it easier to do the same for other ARM cpus.

- Some instructions are marked as hasNoSchedulingInfo as they are pseudos or
  otherwise do not require scheduling info
- A lot of features have been marked not supported
- Some WriteRes's have been added for cvt instructions.
- Some extra instruction latencies have been added, notably by relaxing the
  regex for dsp instruction to catch more cases, and some fp instructions.

This goes a long way to get the CompleteModel working for this CPU. It does not
go far enough as to get all scheduling info for all output operands correct.

Differential Revision: https://reviews.llvm.org/D67957

llvm-svn: 373163
2019-09-29 08:38:48 +00:00

138 lines
4.8 KiB
TableGen

//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
//
//===----------------------------------------------------------------------===//
def CortexM4Model : SchedMachineModel {
let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
let MicroOpBufferSize = 0; // In-order
let LoadLatency = 2; // Latency when not pipelined, not pc-relative
let MispredictPenalty = 2; // Best case branch taken cost
let PostRAScheduler = 1;
let CompleteModel = 0;
let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt,
IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8,
HasV8_3a, HasTrustZone, HasDFB, IsWindows];
}
// We model the entire cpu as a single pipeline with a BufferSize = 0 since
// Cortex-M4 is in-order.
def M4Unit : ProcResource<1> { let BufferSize = 0; }
let SchedModel = CortexM4Model in {
// Some definitions of latencies we apply to different instructions
class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
// Loads, MAC's and DIV all get a higher latency of 2
def : M4UnitL2<WriteLd>;
def : M4UnitL2<WriteMAC32>;
def : M4UnitL2<WriteMAC64Hi>;
def : M4UnitL2<WriteMAC64Lo>;
def : M4UnitL2<WriteMAC16>;
def : M4UnitL2<WriteDIV>;
def : M4UnitL2I<(instregex "(t|t2)LDM")>;
def : M4UnitL2I<(instregex "(t|t2)LDR")>;
// Stores we use a latency of 1 as they have no outputs
def : M4UnitL1<WriteST>;
def : M4UnitL1I<(instregex "(t|t2)STM")>;
// Everything else has a Latency of 1
def : M4UnitL1<WriteALU>;
def : M4UnitL1<WriteALUsi>;
def : M4UnitL1<WriteALUsr>;
def : M4UnitL1<WriteALUSsr>;
def : M4UnitL1<WriteBr>;
def : M4UnitL1<WriteBrL>;
def : M4UnitL1<WriteBrTbl>;
def : M4UnitL1<WriteCMPsi>;
def : M4UnitL1<WriteCMPsr>;
def : M4UnitL1<WriteCMP>;
def : M4UnitL1<WriteMUL32>;
def : M4UnitL1<WriteMUL64Hi>;
def : M4UnitL1<WriteMUL64Lo>;
def : M4UnitL1<WriteMUL16>;
def : M4UnitL1<WriteNoop>;
def : M4UnitL1<WritePreLd>;
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
def : M4UnitL1I<(instrs COPY)>;
def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>;
def : M4UnitL1I<(instregex "t2CLREX")>;
def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]",
"t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>;
// These instructions are not of much interest to scheduling as they will not
// be generated or it is not very useful to schedule them. They are here to make
// the model more complete.
def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>;
def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>;
def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>;
def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>;
def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>;
def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>;
def : ReadAdvance<ReadALU, 0>;
def : ReadAdvance<ReadALUsr, 0>;
def : ReadAdvance<ReadMUL, 0>;
def : ReadAdvance<ReadMAC, 0>;
// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
// Loads still take 2 cycles.
def : M4UnitL1<WriteFPCVT>;
def : M4UnitL1<WriteFPMOV>;
def : M4UnitL1<WriteFPALU32>;
def : M4UnitL1<WriteFPALU64>;
def : M4UnitL1<WriteFPMUL32>;
def : M4UnitL1<WriteFPMUL64>;
def : M4UnitL2I<(instregex "VLD")>;
def : M4UnitL1I<(instregex "VST")>;
def : M4UnitL3<WriteFPMAC32>;
def : M4UnitL3<WriteFPMAC64>;
def : M4UnitL14<WriteFPDIV32>;
def : M4UnitL14<WriteFPDIV64>;
def : M4UnitL14<WriteFPSQRT32>;
def : M4UnitL14<WriteFPSQRT64>;
def : M4UnitL1<WriteVLD1>;
def : M4UnitL1<WriteVLD2>;
def : M4UnitL1<WriteVLD3>;
def : M4UnitL1<WriteVLD4>;
def : M4UnitL1<WriteVST1>;
def : M4UnitL1<WriteVST2>;
def : M4UnitL1<WriteVST3>;
def : M4UnitL1<WriteVST4>;
def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>;
def : M4UnitL2I<(instregex "VMOVD")>;
def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>;
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;
}