mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
5d3783c0d0
This patch adds a new ReadAdvance definition named ReadInt2Fpu. ReadInt2Fpu allows x86 scheduling models to accurately describe delays caused by data transfers from the integer unit to the floating point unit. ReadInt2Fpu currently defaults to a delay of zero cycles (i.e. no delay) for all x86 models excluding BtVer2. That means, this patch is only a functional change for the Jaguar cpu model only. Tablegen definitions for instructions (V)PINSR* have been updated to account for the new ReadInt2Fpu. That read is mapped to the the GPR input operand. On Jaguar, int-to-fpu transfers are modeled as a +6cy delay. Before this patch, that extra delay was added to the opcode latency. In practice, the insert opcode only executes for 1cy. Most of the actual latency is actually contributed by the so-called operand-latency. According to the AMD SOG for family 16h, (V)PINSR* latency is defined by expression f+1, where f is defined as a forwarding delay from the integer unit to the fpu. When printing instruction latency from MCA (see InstructionInfoView.cpp) and LLC (only when flag -print-schedule is speified), we now need to account for any extra forwarding delays. We do this by checking if scheduling classes declare any negative ReadAdvance entries. Quoting a code comment in TargetSchedule.td: "A negative advance effectively increases latency, which may be used for cross-domain stalls". When computing the instruction latency for the purpose of our scheduling tests, we now add any extra delay to the formula. This avoids regressing existing codegen and mca schedule tests. It comes with the cost of an extra (but very simple) hook in MCSchedModel. Differential Revision: https://reviews.llvm.org/D57056 llvm-svn: 351965
168 lines
6.1 KiB
C++
168 lines
6.1 KiB
C++
//===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the default scheduling model.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/MC/MCSchedule.h"
|
|
#include "llvm/MC/MCInst.h"
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
#include "llvm/MC/MCInstrInfo.h"
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
|
#include <type_traits>
|
|
|
|
using namespace llvm;
|
|
|
|
static_assert(std::is_pod<MCSchedModel>::value,
|
|
"We shouldn't have a static constructor here");
|
|
const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
|
|
DefaultMicroOpBufferSize,
|
|
DefaultLoopMicroOpBufferSize,
|
|
DefaultLoadLatency,
|
|
DefaultHighLatency,
|
|
DefaultMispredictPenalty,
|
|
false,
|
|
true,
|
|
0,
|
|
nullptr,
|
|
nullptr,
|
|
0,
|
|
0,
|
|
nullptr,
|
|
nullptr};
|
|
|
|
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
|
|
const MCSchedClassDesc &SCDesc) {
|
|
int Latency = 0;
|
|
for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
|
|
DefIdx != DefEnd; ++DefIdx) {
|
|
// Lookup the definition's write latency in SubtargetInfo.
|
|
const MCWriteLatencyEntry *WLEntry =
|
|
STI.getWriteLatencyEntry(&SCDesc, DefIdx);
|
|
// Early exit if we found an invalid latency.
|
|
if (WLEntry->Cycles < 0)
|
|
return WLEntry->Cycles;
|
|
Latency = std::max(Latency, static_cast<int>(WLEntry->Cycles));
|
|
}
|
|
return Latency;
|
|
}
|
|
|
|
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
|
|
unsigned SchedClass) const {
|
|
const MCSchedClassDesc &SCDesc = *getSchedClassDesc(SchedClass);
|
|
if (!SCDesc.isValid())
|
|
return 0;
|
|
if (!SCDesc.isVariant())
|
|
return MCSchedModel::computeInstrLatency(STI, SCDesc);
|
|
|
|
llvm_unreachable("unsupported variant scheduling class");
|
|
}
|
|
|
|
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
|
|
const MCInstrInfo &MCII,
|
|
const MCInst &Inst) const {
|
|
unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
|
|
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
|
|
if (!SCDesc->isValid())
|
|
return 0;
|
|
|
|
unsigned CPUID = getProcessorID();
|
|
while (SCDesc->isVariant()) {
|
|
SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
|
|
SCDesc = getSchedClassDesc(SchedClass);
|
|
}
|
|
|
|
if (SchedClass)
|
|
return MCSchedModel::computeInstrLatency(STI, *SCDesc);
|
|
|
|
llvm_unreachable("unsupported variant scheduling class");
|
|
}
|
|
|
|
double
|
|
MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
|
|
const MCSchedClassDesc &SCDesc) {
|
|
Optional<double> Throughput;
|
|
const MCSchedModel &SM = STI.getSchedModel();
|
|
const MCWriteProcResEntry *I = STI.getWriteProcResBegin(&SCDesc);
|
|
const MCWriteProcResEntry *E = STI.getWriteProcResEnd(&SCDesc);
|
|
for (; I != E; ++I) {
|
|
if (!I->Cycles)
|
|
continue;
|
|
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
|
|
double Temp = NumUnits * 1.0 / I->Cycles;
|
|
Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
|
|
}
|
|
if (Throughput.hasValue())
|
|
return 1.0 / Throughput.getValue();
|
|
|
|
// If no throughput value was calculated, assume that we can execute at the
|
|
// maximum issue width scaled by number of micro-ops for the schedule class.
|
|
return ((double)SCDesc.NumMicroOps) / SM.IssueWidth;
|
|
}
|
|
|
|
double
|
|
MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
|
|
const MCInstrInfo &MCII,
|
|
const MCInst &Inst) const {
|
|
unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
|
|
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
|
|
|
|
// If there's no valid class, assume that the instruction executes/completes
|
|
// at the maximum issue width.
|
|
if (!SCDesc->isValid())
|
|
return 1.0 / IssueWidth;
|
|
|
|
unsigned CPUID = getProcessorID();
|
|
while (SCDesc->isVariant()) {
|
|
SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
|
|
SCDesc = getSchedClassDesc(SchedClass);
|
|
}
|
|
|
|
if (SchedClass)
|
|
return MCSchedModel::getReciprocalThroughput(STI, *SCDesc);
|
|
|
|
llvm_unreachable("unsupported variant scheduling class");
|
|
}
|
|
|
|
double
|
|
MCSchedModel::getReciprocalThroughput(unsigned SchedClass,
|
|
const InstrItineraryData &IID) {
|
|
Optional<double> Throughput;
|
|
const InstrStage *I = IID.beginStage(SchedClass);
|
|
const InstrStage *E = IID.endStage(SchedClass);
|
|
for (; I != E; ++I) {
|
|
if (!I->getCycles())
|
|
continue;
|
|
double Temp = countPopulation(I->getUnits()) * 1.0 / I->getCycles();
|
|
Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
|
|
}
|
|
if (Throughput.hasValue())
|
|
return 1.0 / Throughput.getValue();
|
|
|
|
// If there are no execution resources specified for this class, then assume
|
|
// that it can execute at the maximum default issue width.
|
|
return 1.0 / DefaultIssueWidth;
|
|
}
|
|
|
|
unsigned
|
|
MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
|
|
unsigned WriteResourceID) {
|
|
if (Entries.empty())
|
|
return 0;
|
|
|
|
int DelayCycles = 0;
|
|
for (const MCReadAdvanceEntry &E : Entries) {
|
|
if (E.WriteResourceID != WriteResourceID)
|
|
continue;
|
|
DelayCycles = std::min(DelayCycles, E.Cycles);
|
|
}
|
|
|
|
return std::abs(DelayCycles);
|
|
}
|