mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AArch64] Refactor the Exynos scheduling predicates
Refactor the scheduling predicates based on `MCInstPredicate`. In this case, for the Exynos processors. Differential revision: https://reviews.llvm.org/D55345 llvm-svn: 348774
This commit is contained in:
parent
ad77e61e7c
commit
c839f35d10
@ -39,7 +39,7 @@
|
||||
// processor scheduling model.
|
||||
//
|
||||
// The `MCInstPredicateExample` definition above is equivalent (and therefore
|
||||
// could replace) the following definition from the ExynosM3 model (see
|
||||
// could replace) the following definition from a previous ExynosM3 model (see
|
||||
// AArch64SchedExynosM3.td):
|
||||
//
|
||||
// def M3BranchLinkFastPred : SchedPredicate<[{
|
||||
|
@ -371,6 +371,7 @@ include "AArch64CallingConvention.td"
|
||||
include "AArch64Schedule.td"
|
||||
include "AArch64InstrInfo.td"
|
||||
include "AArch64SchedPredicates.td"
|
||||
include "AArch64SchedPredExynos.td"
|
||||
|
||||
def AArch64InstrInfo : InstrInfo;
|
||||
|
||||
|
@ -705,7 +705,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
|
||||
// Secondly, check cases specific to sub-targets.
|
||||
|
||||
if (Subtarget.hasExynosCheapAsMoveHandling()) {
|
||||
if (isExynosResetFast(MI) || isExynosShiftExtFast(MI))
|
||||
if (isExynosCheapAsMove(MI))
|
||||
return true;
|
||||
|
||||
return MI.isAsCheapAsAMove();
|
||||
@ -759,213 +759,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
|
||||
llvm_unreachable("Unknown opcode to check as cheap as a move!");
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) {
|
||||
unsigned Reg, Imm, Shift;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
// MOV Rd, SP
|
||||
case AArch64::ADDWri:
|
||||
case AArch64::ADDXri:
|
||||
if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
|
||||
return false;
|
||||
|
||||
Reg = MI.getOperand(1).getReg();
|
||||
Imm = MI.getOperand(2).getImm();
|
||||
return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
|
||||
|
||||
// Literal
|
||||
case AArch64::ADR:
|
||||
case AArch64::ADRP:
|
||||
return true;
|
||||
|
||||
// MOVI Vd, #0
|
||||
case AArch64::MOVID:
|
||||
case AArch64::MOVIv8b_ns:
|
||||
case AArch64::MOVIv2d_ns:
|
||||
case AArch64::MOVIv16b_ns:
|
||||
Imm = MI.getOperand(1).getImm();
|
||||
return (Imm == 0);
|
||||
|
||||
// MOVI Vd, #0
|
||||
case AArch64::MOVIv2i32:
|
||||
case AArch64::MOVIv4i16:
|
||||
case AArch64::MOVIv4i32:
|
||||
case AArch64::MOVIv8i16:
|
||||
Imm = MI.getOperand(1).getImm();
|
||||
Shift = MI.getOperand(2).getImm();
|
||||
return (Imm == 0 && Shift == 0);
|
||||
|
||||
// MOV Rd, Imm
|
||||
case AArch64::MOVNWi:
|
||||
case AArch64::MOVNXi:
|
||||
|
||||
// MOV Rd, Imm
|
||||
case AArch64::MOVZWi:
|
||||
case AArch64::MOVZXi:
|
||||
return true;
|
||||
|
||||
// MOV Rd, Imm
|
||||
case AArch64::ORRWri:
|
||||
case AArch64::ORRXri:
|
||||
if (!MI.getOperand(1).isReg())
|
||||
return false;
|
||||
|
||||
Reg = MI.getOperand(1).getReg();
|
||||
Imm = MI.getOperand(2).getImm();
|
||||
return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
|
||||
|
||||
// MOV Rd, Rm
|
||||
case AArch64::ORRWrs:
|
||||
case AArch64::ORRXrs:
|
||||
if (!MI.getOperand(1).isReg())
|
||||
return false;
|
||||
|
||||
Reg = MI.getOperand(1).getReg();
|
||||
Imm = MI.getOperand(3).getImm();
|
||||
Shift = AArch64_AM::getShiftValue(Imm);
|
||||
return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isExynosLdStExtFast(const MachineInstr &MI) {
|
||||
unsigned Imm;
|
||||
AArch64_AM::ShiftExtendType Ext;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
// WriteLD
|
||||
case AArch64::PRFMroW:
|
||||
case AArch64::PRFMroX:
|
||||
|
||||
// WriteLDIdx
|
||||
case AArch64::LDRBBroW:
|
||||
case AArch64::LDRBBroX:
|
||||
case AArch64::LDRHHroW:
|
||||
case AArch64::LDRHHroX:
|
||||
case AArch64::LDRSBWroW:
|
||||
case AArch64::LDRSBWroX:
|
||||
case AArch64::LDRSBXroW:
|
||||
case AArch64::LDRSBXroX:
|
||||
case AArch64::LDRSHWroW:
|
||||
case AArch64::LDRSHWroX:
|
||||
case AArch64::LDRSHXroW:
|
||||
case AArch64::LDRSHXroX:
|
||||
case AArch64::LDRSWroW:
|
||||
case AArch64::LDRSWroX:
|
||||
case AArch64::LDRWroW:
|
||||
case AArch64::LDRWroX:
|
||||
case AArch64::LDRXroW:
|
||||
case AArch64::LDRXroX:
|
||||
|
||||
case AArch64::LDRBroW:
|
||||
case AArch64::LDRBroX:
|
||||
case AArch64::LDRDroW:
|
||||
case AArch64::LDRDroX:
|
||||
case AArch64::LDRHroW:
|
||||
case AArch64::LDRHroX:
|
||||
case AArch64::LDRSroW:
|
||||
case AArch64::LDRSroX:
|
||||
|
||||
// WriteSTIdx
|
||||
case AArch64::STRBBroW:
|
||||
case AArch64::STRBBroX:
|
||||
case AArch64::STRHHroW:
|
||||
case AArch64::STRHHroX:
|
||||
case AArch64::STRWroW:
|
||||
case AArch64::STRWroX:
|
||||
case AArch64::STRXroW:
|
||||
case AArch64::STRXroX:
|
||||
|
||||
case AArch64::STRBroW:
|
||||
case AArch64::STRBroX:
|
||||
case AArch64::STRDroW:
|
||||
case AArch64::STRDroX:
|
||||
case AArch64::STRHroW:
|
||||
case AArch64::STRHroX:
|
||||
case AArch64::STRSroW:
|
||||
case AArch64::STRSroX:
|
||||
Imm = MI.getOperand(3).getImm();
|
||||
Ext = AArch64_AM::getMemExtendType(Imm);
|
||||
return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
|
||||
}
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isExynosShiftExtFast(const MachineInstr &MI) {
|
||||
unsigned Imm, Shift;
|
||||
AArch64_AM::ShiftExtendType Ext = AArch64_AM::UXTX;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
// WriteI
|
||||
case AArch64::ADDSWri:
|
||||
case AArch64::ADDSXri:
|
||||
case AArch64::ADDWri:
|
||||
case AArch64::ADDXri:
|
||||
case AArch64::SUBSWri:
|
||||
case AArch64::SUBSXri:
|
||||
case AArch64::SUBWri:
|
||||
case AArch64::SUBXri:
|
||||
return true;
|
||||
|
||||
// WriteISReg
|
||||
case AArch64::ADDSWrs:
|
||||
case AArch64::ADDSXrs:
|
||||
case AArch64::ADDWrs:
|
||||
case AArch64::ADDXrs:
|
||||
case AArch64::ANDSWrs:
|
||||
case AArch64::ANDSXrs:
|
||||
case AArch64::ANDWrs:
|
||||
case AArch64::ANDXrs:
|
||||
case AArch64::BICSWrs:
|
||||
case AArch64::BICSXrs:
|
||||
case AArch64::BICWrs:
|
||||
case AArch64::BICXrs:
|
||||
case AArch64::EONWrs:
|
||||
case AArch64::EONXrs:
|
||||
case AArch64::EORWrs:
|
||||
case AArch64::EORXrs:
|
||||
case AArch64::ORNWrs:
|
||||
case AArch64::ORNXrs:
|
||||
case AArch64::ORRWrs:
|
||||
case AArch64::ORRXrs:
|
||||
case AArch64::SUBSWrs:
|
||||
case AArch64::SUBSXrs:
|
||||
case AArch64::SUBWrs:
|
||||
case AArch64::SUBXrs:
|
||||
Imm = MI.getOperand(3).getImm();
|
||||
Shift = AArch64_AM::getShiftValue(Imm);
|
||||
Ext = AArch64_AM::getShiftType(Imm);
|
||||
return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
|
||||
|
||||
// WriteIEReg
|
||||
case AArch64::ADDSWrx:
|
||||
case AArch64::ADDSXrx:
|
||||
case AArch64::ADDWrx:
|
||||
case AArch64::ADDXrx:
|
||||
case AArch64::SUBSWrx:
|
||||
case AArch64::SUBSXrx:
|
||||
case AArch64::SUBWrx:
|
||||
case AArch64::SUBXrx:
|
||||
Ext = AArch64_AM::UXTW;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64::ADDSXrx64:
|
||||
case AArch64::ADDXrx64:
|
||||
case AArch64::SUBSXrx64:
|
||||
case AArch64::SUBXrx64:
|
||||
Imm = MI.getOperand(3).getImm();
|
||||
Shift = AArch64_AM::getArithShiftValue(Imm);
|
||||
return (Shift == 0 ||
|
||||
(Shift <= 3 && Ext == AArch64_AM::getArithExtendType(Imm)));
|
||||
}
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
|
@ -241,15 +241,6 @@ public:
|
||||
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
||||
const outliner::Candidate &C) const override;
|
||||
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
|
||||
/// Returns true if the instruction sets a constant value that can be
|
||||
/// executed more efficiently.
|
||||
static bool isExynosResetFast(const MachineInstr &MI);
|
||||
/// Returns true if the load or store has an extension that can be executed
|
||||
/// more efficiently.
|
||||
static bool isExynosLdStExtFast(const MachineInstr &MI);
|
||||
/// Returns true if the instruction has a constant shift left or extension
|
||||
/// that can be executed more efficiently.
|
||||
static bool isExynosShiftExtFast(const MachineInstr &MI);
|
||||
/// Returns true if the instruction has a shift by immediate that can be
|
||||
/// executed in one cycle less.
|
||||
static bool isFalkorShiftExtFast(const MachineInstr &MI);
|
||||
|
@ -61,14 +61,6 @@ def M1UnitALU : ProcResGroup<[M1UnitA,
|
||||
def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
|
||||
M1UnitNAL1]>; // All simple vector
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Predicates.
|
||||
|
||||
def M1BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
|
||||
MI->getOperand(0).getReg() != AArch64::LR}]>;
|
||||
def M1LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
|
||||
def M1ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Coarse scheduling model.
|
||||
|
||||
@ -86,14 +78,16 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU,
|
||||
def M1WriteAD : SchedWriteRes<[M1UnitALU,
|
||||
M1UnitC]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftExtPred, [M1WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAA]>]>;
|
||||
def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M1WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAA]>]>;
|
||||
def M1WriteAY : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M1WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAA]>]>;
|
||||
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
|
||||
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
|
||||
|
||||
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
|
||||
def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkPred, [M1WriteAB]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAC]>]>;
|
||||
def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAB]>]>;
|
||||
|
||||
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
|
||||
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
|
||||
@ -111,40 +105,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
|
||||
let ResourceCycles = [2, 1]; }
|
||||
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
|
||||
let NumMicroOps = 0; }
|
||||
def M1WriteLX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M1WriteLC]>]>;
|
||||
def M1WriteLY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M1WriteLD]>]>;
|
||||
def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
|
||||
SchedVar<NoSchedPred, [M1WriteL5]>]>;
|
||||
|
||||
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
|
||||
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
|
||||
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
|
||||
def M1WriteSA : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSB : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 3;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSC : SchedWriteRes<[M1UnitS,
|
||||
def M1WriteSB : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 3;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteSD : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSE : SchedWriteRes<[M1UnitS,
|
||||
def M1WriteSC : SchedWriteRes<[M1UnitS,
|
||||
M1UnitA]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteSE]>]>;
|
||||
def M1WriteSY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteSB]>]>;
|
||||
def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
|
||||
SchedVar<NoSchedPred, [M1WriteS1]>]>;
|
||||
|
||||
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>;
|
||||
@ -415,9 +396,9 @@ def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 14;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1]; }
|
||||
M1UnitFST]> { let Latency = 14;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1]; }
|
||||
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
@ -428,9 +409,17 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 17;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
|
||||
M1UnitFST]> { let Latency = 17;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
|
||||
|
||||
// Special cases.
|
||||
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
|
||||
def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteA1]>]>;
|
||||
|
||||
// Fast forwarding.
|
||||
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
|
||||
|
||||
// Branch instructions
|
||||
def : InstRW<[M1WriteB1], (instrs Bcc)>;
|
||||
@ -440,21 +429,34 @@ def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
|
||||
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : InstRW<[M1WriteA1], (instrs COPY)>;
|
||||
def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
|
||||
def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
|
||||
def : InstRW<[M1WriteAY], (instregex ".+rs$")>;
|
||||
|
||||
// Move instructions.
|
||||
def : InstRW<[M1WriteCOPY], (instrs COPY)>;
|
||||
|
||||
// Divide and multiply instructions.
|
||||
|
||||
// Miscellaneous instructions.
|
||||
|
||||
// Load instructions.
|
||||
def : InstRW<[M1WriteLC,
|
||||
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
|
||||
def : InstRW<[M1WriteLB,
|
||||
WriteLDHi,
|
||||
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
|
||||
def : InstRW<[M1WriteLX,
|
||||
ReadAdrBase], (instregex "^PRFMro[WX]")>;
|
||||
def : InstRW<[M1WriteLC,
|
||||
ReadAdrBase], (instrs PRFMroW)>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
ReadAdrBase], (instrs PRFMroX)>;
|
||||
|
||||
// Store instructions.
|
||||
def : InstRW<[M1WriteSC,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
|
||||
def : InstRW<[WriteST,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
|
||||
|
||||
// FP data instructions.
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
|
||||
@ -488,8 +490,10 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
|
||||
def : InstRW<[M1WriteLY,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M1WriteLD,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
|
||||
def : InstRW<[M1WriteLD,
|
||||
ReadAdrBase], (instregex "^LDRQro[WX]")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
@ -508,14 +512,16 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
|
||||
def : InstRW<[M1WriteSY,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M1WriteSB,
|
||||
def : InstRW<[M1WriteSA,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
|
||||
def : InstRW<[WriteVST,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
|
||||
def : InstRW<[M1WriteSA,
|
||||
ReadAdrBase], (instregex "^STRQro[WX]")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STP[DS](post|pre)")>;
|
||||
def : InstRW<[M1WriteSC,
|
||||
def : InstRW<[M1WriteSB,
|
||||
WriteAdr], (instregex "^STPQ(post|pre)")>;
|
||||
|
||||
// ASIMD instructions.
|
||||
@ -609,21 +615,21 @@ def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
|
||||
def : InstRW<[M1WriteVLDE,
|
||||
WriteAdr], (instregex "LD1i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
@ -831,8 +837,6 @@ def : InstRW<[M1WriteVSTI,
|
||||
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
|
||||
|
||||
// Cryptography instructions.
|
||||
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
|
||||
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
|
||||
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
|
||||
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
|
||||
|
||||
|
@ -103,20 +103,6 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
|
||||
M3UnitNSHF1,
|
||||
M3UnitNSHF2]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Predicates.
|
||||
|
||||
def M3BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
|
||||
MI->getOperand(0).isReg() &&
|
||||
MI->getOperand(0).getReg() != AArch64::LR}]>;
|
||||
def M3ResetPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
|
||||
def M3RotatePred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
|
||||
MI->getOpcode() == AArch64::EXTRXrri) &&
|
||||
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
|
||||
MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
|
||||
def M3LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
|
||||
def M3ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Coarse scheduling model.
|
||||
|
||||
@ -138,15 +124,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
|
||||
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
|
||||
def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
|
||||
SchedVar<M3ShiftExtPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotatePred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<ExynosShiftPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAZ : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
|
||||
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
|
||||
def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkPred, [M3WriteAB]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAC]>]>;
|
||||
def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAB]>]>;
|
||||
|
||||
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
|
||||
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
|
||||
@ -163,28 +157,24 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
|
||||
def M3WriteLD : SchedWriteRes<[M3UnitA,
|
||||
M3UnitL]> { let Latency = 4;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteLE : SchedWriteRes<[M3UnitA,
|
||||
M3UnitL]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
|
||||
let NumMicroOps = 0; }
|
||||
|
||||
def M3WriteLX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M3WriteLB]>]>;
|
||||
def M3WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteLB]>,
|
||||
SchedVar<NoSchedPred, [M3WriteL5]>]>;
|
||||
|
||||
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
|
||||
def M3WriteSA : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS,
|
||||
M3UnitFST]> { let Latency = 2;
|
||||
M3UnitFST]> { let Latency = 3;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSB : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSC : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
|
||||
def M3WriteSX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteSB]>]>;
|
||||
def M3WriteSY : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteSC]>]>;
|
||||
def M3WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteSB]>,
|
||||
SchedVar<NoSchedPred, [M3WriteS1]>]>;
|
||||
|
||||
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>;
|
||||
@ -214,9 +204,7 @@ def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
|
||||
let ResourceCycles = [2]; }
|
||||
|
||||
// Miscellaneous instructions.
|
||||
def : WriteRes<WriteExtr, [M3UnitALU,
|
||||
M3UnitALU]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def : SchedAlias<WriteExtr, M3WriteAY>;
|
||||
|
||||
// Addressing modes.
|
||||
def : WriteRes<WriteAdr, []> { let Latency = 1;
|
||||
@ -479,11 +467,15 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
|
||||
|
||||
// Special cases.
|
||||
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
|
||||
def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteZ0]>]>;
|
||||
def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
|
||||
|
||||
// Fast forwarding.
|
||||
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
|
||||
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
|
||||
M3WriteFMAC5]>;
|
||||
def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
|
||||
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
|
||||
|
||||
// Branch instructions
|
||||
@ -494,29 +486,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
|
||||
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : InstRW<[M3WriteA1], (instrs COPY)>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
|
||||
def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
|
||||
def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
|
||||
def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
|
||||
def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
|
||||
def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
|
||||
|
||||
// Move instructions.
|
||||
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
|
||||
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
|
||||
def : InstRW<[M3WriteCOPY], (instrs COPY)>;
|
||||
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
|
||||
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
|
||||
|
||||
// Divide and multiply instructions.
|
||||
|
||||
// Miscellaneous instructions.
|
||||
def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
|
||||
|
||||
// Load instructions.
|
||||
def : InstRW<[M3WriteLB,
|
||||
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
|
||||
def : InstRW<[M3WriteLD,
|
||||
WriteLDHi,
|
||||
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
|
||||
def : InstRW<[M3WriteLX,
|
||||
ReadAdrBase], (instregex "^PRFMro[WX]")>;
|
||||
def : InstRW<[M3WriteLB,
|
||||
ReadAdrBase], (instrs PRFMroW)>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
ReadAdrBase], (instrs PRFMroX)>;
|
||||
|
||||
// Store instructions.
|
||||
def : InstRW<[M3WriteSB,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
|
||||
def : InstRW<[WriteST,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
|
||||
|
||||
// FP data instructions.
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
|
||||
@ -553,9 +556,11 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
|
||||
def : InstRW<[M3WriteLX,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M3WriteLB,
|
||||
def : InstRW<[M3WriteLE,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
|
||||
def : InstRW<[M3WriteLE,
|
||||
ReadAdrBase], (instregex "^LDRQro[WX]")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
M3WriteLH], (instregex "^LDN?P[DS]i")>;
|
||||
@ -573,8 +578,10 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
|
||||
def : InstRW<[M3WriteSY,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M3WriteSA,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
|
||||
def : InstRW<[WriteVST,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
|
||||
def : InstRW<[M3WriteSA,
|
||||
ReadAdrBase], (instregex "^STRQro[WX]")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
|
||||
|
124
lib/Target/AArch64/AArch64SchedPredExynos.td
Normal file
124
lib/Target/AArch64/AArch64SchedPredExynos.td
Normal file
@ -0,0 +1,124 @@
|
||||
//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines scheduling predicate definitions that are used by the
|
||||
// AArch64 Exynos processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Identify BLR specifying the LR register as the indirect target register.
|
||||
def ExynosBranchLinkLRPred : MCSchedPredicate<
|
||||
CheckAll<[CheckOpcode<[BLR]>,
|
||||
CheckRegOperand<0, LR>]>>;
|
||||
|
||||
// Identify arithmetic and logic instructions without or with limited extension.
|
||||
def ExynosExtFn : TIIPredicate<
|
||||
"isExynosExtFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithExt32Op.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<[CheckExtBy0,
|
||||
CheckAll<
|
||||
[CheckExtUXTW,
|
||||
CheckAny<
|
||||
[CheckExtBy1,
|
||||
CheckExtBy2,
|
||||
CheckExtBy3]>]>]>>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsArithExt64Op.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<[CheckExtBy0,
|
||||
CheckAll<
|
||||
[CheckExtUXTX,
|
||||
CheckAny<
|
||||
[CheckExtBy1,
|
||||
CheckExtBy2,
|
||||
CheckExtBy3]>]>]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosExtPred : MCSchedPredicate<ExynosExtFn>;
|
||||
|
||||
// Identify FP instructions.
|
||||
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
|
||||
|
||||
// Identify whether an instruction whose result is a long vector
|
||||
// operates on the upper half of the input registers.
|
||||
def ExynosLongVectorUpperFn : TIIPredicate<
|
||||
"isExynosLongVectorUpper",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsLongVectorUpperOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
|
||||
|
||||
// Identify 128-bit NEON instructions.
|
||||
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
|
||||
|
||||
// Identify instructions that reset a register efficiently.
|
||||
def ExynosResetFn : TIIPredicate<
|
||||
"isExynosResetFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
[ADR, ADRP,
|
||||
MOVNWi, MOVNXi,
|
||||
MOVZWi, MOVZXi],
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[IsCopyIdiomFn,
|
||||
IsZeroFPIdiomFn,
|
||||
IsZeroIdiomFn]>>>>;
|
||||
def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
|
||||
|
||||
// Identify EXTR as the alias for ROR (immediate).
|
||||
def ExynosRotateRightImmPred : MCSchedPredicate<
|
||||
CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
|
||||
CheckSameRegOperand<1, 2>]>>;
|
||||
|
||||
// Identify arithmetic and logic instructions without or with limited shift.
|
||||
def ExynosShiftFn : TIIPredicate<
|
||||
"isExynosShiftFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithLogicShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<[CheckShiftBy0,
|
||||
CheckAll<
|
||||
[CheckShiftLSL,
|
||||
CheckAny<
|
||||
[CheckShiftBy1,
|
||||
CheckShiftBy2,
|
||||
CheckShiftBy3]>]>]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosShiftPred : MCSchedPredicate<ExynosShiftFn>;
|
||||
|
||||
// Identify more arithmetic and logic instructions without or limited shift.
|
||||
def ExynosShiftExFn : TIIPredicate<
|
||||
"isExynosShiftExFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithLogicShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckShiftLSL,
|
||||
CheckShiftBy8]>>>],
|
||||
MCReturnStatement<ExynosShiftFn>>>;
|
||||
def ExynosShiftExPred : MCSchedPredicate<ExynosShiftExFn>;
|
||||
|
||||
// Identify arithmetic and logic immediate instructions.
|
||||
def ExynosCheapFn : TIIPredicate<
|
||||
"isExynosCheapAsMove",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithLogicImmOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[ExynosExtFn, ExynosResetFn, ExynosShiftFn]>>>>;
|
@ -3,31 +3,31 @@
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
|
||||
sub w0, w1, w2, sxtb #0
|
||||
add w3, w4, w5, sxth #1
|
||||
add x3, x4, w5, sxth #1
|
||||
subs x6, x7, w8, uxtw #2
|
||||
adds x9, x10, x11, uxtx #3
|
||||
sub w12, w13, w14, uxtb #3
|
||||
add w15, w16, w17, uxth #2
|
||||
add x15, x16, w17, uxth #2
|
||||
subs x18, x19, w20, sxtw #1
|
||||
adds x21, x22, x23, sxtx #0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 800
|
||||
|
||||
# EM1-NEXT: Total Cycles: 537
|
||||
# EM3-NEXT: Total Cycles: 403
|
||||
# EM1-NEXT: Total Cycles: 403
|
||||
# EM3-NEXT: Total Cycles: 303
|
||||
|
||||
# ALL-NEXT: Total uOps: 800
|
||||
|
||||
# EM1: Dispatch Width: 4
|
||||
# EM1-NEXT: uOps Per Cycle: 1.49
|
||||
# EM1-NEXT: IPC: 1.49
|
||||
# EM1-NEXT: Block RThroughput: 5.3
|
||||
# EM1-NEXT: uOps Per Cycle: 1.99
|
||||
# EM1-NEXT: IPC: 1.99
|
||||
# EM1-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# EM3: Dispatch Width: 6
|
||||
# EM3-NEXT: uOps Per Cycle: 1.99
|
||||
# EM3-NEXT: IPC: 1.99
|
||||
# EM3-NEXT: Block RThroughput: 4.0
|
||||
# EM3-NEXT: uOps Per Cycle: 2.64
|
||||
# EM3-NEXT: IPC: 2.64
|
||||
# EM3-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
@ -39,20 +39,20 @@
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# EM1-NEXT: 1 2 0.67 sub w0, w1, w2, sxtb
|
||||
# EM1-NEXT: 1 2 0.67 add w3, w4, w5, sxth #1
|
||||
# EM1-NEXT: 1 2 0.67 subs x6, x7, w8, uxtw #2
|
||||
# EM1-NEXT: 1 2 0.67 adds x9, x10, x11, uxtx #3
|
||||
# EM1-NEXT: 1 1 0.33 sub w0, w1, w2, sxtb
|
||||
# EM1-NEXT: 1 2 0.67 add x3, x4, w5, sxth #1
|
||||
# EM1-NEXT: 1 1 0.33 subs x6, x7, w8, uxtw #2
|
||||
# EM1-NEXT: 1 1 0.33 adds x9, x10, x11, uxtx #3
|
||||
# EM1-NEXT: 1 2 0.67 sub w12, w13, w14, uxtb #3
|
||||
# EM1-NEXT: 1 2 0.67 add w15, w16, w17, uxth #2
|
||||
# EM1-NEXT: 1 2 0.67 add x15, x16, w17, uxth #2
|
||||
# EM1-NEXT: 1 2 0.67 subs x18, x19, w20, sxtw #1
|
||||
# EM1-NEXT: 1 2 0.67 adds x21, x22, x23, sxtx
|
||||
# EM1-NEXT: 1 1 0.33 adds x21, x22, x23, sxtx
|
||||
|
||||
# EM3-NEXT: 1 2 0.50 sub w0, w1, w2, sxtb
|
||||
# EM3-NEXT: 1 2 0.50 add w3, w4, w5, sxth #1
|
||||
# EM3-NEXT: 1 2 0.50 subs x6, x7, w8, uxtw #2
|
||||
# EM3-NEXT: 1 2 0.50 adds x9, x10, x11, uxtx #3
|
||||
# EM3-NEXT: 1 1 0.25 sub w0, w1, w2, sxtb
|
||||
# EM3-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
|
||||
# EM3-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
|
||||
# EM3-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
|
||||
# EM3-NEXT: 1 2 0.50 sub w12, w13, w14, uxtb #3
|
||||
# EM3-NEXT: 1 2 0.50 add w15, w16, w17, uxth #2
|
||||
# EM3-NEXT: 1 2 0.50 add x15, x16, w17, uxth #2
|
||||
# EM3-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #1
|
||||
# EM3-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx
|
||||
# EM3-NEXT: 1 1 0.25 adds x21, x22, x23, sxtx
|
||||
|
@ -9,20 +9,17 @@
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 400
|
||||
|
||||
# EM1-NEXT: Total Cycles: 408
|
||||
# EM3-NEXT: Total Cycles: 208
|
||||
|
||||
# ALL-NEXT: Total uOps: 800
|
||||
# ALL-NEXT: Total Cycles: 308
|
||||
# ALL-NEXT: Total uOps: 600
|
||||
|
||||
# EM1: Dispatch Width: 4
|
||||
# EM1-NEXT: uOps Per Cycle: 1.96
|
||||
# EM1-NEXT: IPC: 0.98
|
||||
# EM1-NEXT: uOps Per Cycle: 1.95
|
||||
# EM1-NEXT: IPC: 1.30
|
||||
# EM1-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# EM3: Dispatch Width: 6
|
||||
# EM3-NEXT: uOps Per Cycle: 3.85
|
||||
# EM3-NEXT: IPC: 1.92
|
||||
# EM3-NEXT: uOps Per Cycle: 1.95
|
||||
# EM3-NEXT: IPC: 1.30
|
||||
# EM3-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
@ -35,12 +32,12 @@
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# EM1-NEXT: 2 5 1.00 * ldr w0, [x1, x2]
|
||||
# EM1-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
|
||||
# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
|
||||
# EM1-NEXT: 2 2 1.00 * str x9, [x10, x11, lsl #3]
|
||||
# EM1-NEXT: 1 5 1.00 * ldr w0, [x1, x2]
|
||||
# EM3-NEXT: 1 5 0.50 * ldr w0, [x1, x2]
|
||||
|
||||
# EM3-NEXT: 2 5 0.50 * ldr w0, [x1, x2]
|
||||
# EM3-NEXT: 2 1 1.00 * str x3, [x4, w5, sxtw]
|
||||
# ALL-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
|
||||
|
||||
# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
|
||||
# EM3-NEXT: 2 5 0.50 * ldr x6, [x7, w8, uxtw #3]
|
||||
# EM3-NEXT: 2 1 1.00 * str x9, [x10, x11, lsl #3]
|
||||
|
||||
# ALL-NEXT: 1 1 1.00 * str x9, [x10, x11, lsl #3]
|
||||
|
@ -10,20 +10,20 @@
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 400
|
||||
|
||||
# EM1-NEXT: Total Cycles: 271
|
||||
# EM3-NEXT: Total Cycles: 203
|
||||
# EM1-NEXT: Total Cycles: 204
|
||||
# EM3-NEXT: Total Cycles: 154
|
||||
|
||||
# ALL-NEXT: Total uOps: 400
|
||||
|
||||
# EM1: Dispatch Width: 4
|
||||
# EM1-NEXT: uOps Per Cycle: 1.48
|
||||
# EM1-NEXT: IPC: 1.48
|
||||
# EM1-NEXT: Block RThroughput: 2.7
|
||||
# EM1-NEXT: uOps Per Cycle: 1.96
|
||||
# EM1-NEXT: IPC: 1.96
|
||||
# EM1-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# EM3: Dispatch Width: 6
|
||||
# EM3-NEXT: uOps Per Cycle: 1.97
|
||||
# EM3-NEXT: IPC: 1.97
|
||||
# EM3-NEXT: Block RThroughput: 2.0
|
||||
# EM3-NEXT: uOps Per Cycle: 2.60
|
||||
# EM3-NEXT: IPC: 2.60
|
||||
# EM3-NEXT: Block RThroughput: 1.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
@ -35,12 +35,12 @@
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# EM1-NEXT: 1 2 0.67 add w0, w1, w2
|
||||
# EM1-NEXT: 1 1 0.33 add w0, w1, w2
|
||||
# EM1-NEXT: 1 2 0.67 sub x3, x4, x5, lsr #1
|
||||
# EM1-NEXT: 1 2 0.67 adds x6, x7, x8, lsl #2
|
||||
# EM1-NEXT: 1 1 0.33 adds x6, x7, x8, lsl #2
|
||||
# EM1-NEXT: 1 2 0.67 subs w9, w10, w11, asr #3
|
||||
|
||||
# EM3-NEXT: 1 2 0.50 add w0, w1, w2
|
||||
# EM3-NEXT: 1 1 0.25 add w0, w1, w2
|
||||
# EM3-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
|
||||
# EM3-NEXT: 1 2 0.50 adds x6, x7, x8, lsl #2
|
||||
# EM3-NEXT: 1 1 0.25 adds x6, x7, x8, lsl #2
|
||||
# EM3-NEXT: 1 2 0.50 subs w9, w10, w11, asr #3
|
||||
|
Loading…
Reference in New Issue
Block a user