1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[MCA] Disable RCU for InOrderIssueStage

This is a follow-up for:
D98604 [MCA] Ensure that writes occur in-order

When instructions are aligned by the order of writes, they retire
in-order naturally. There is no need for an RCU, so it is disabled.

Differential Revision: https://reviews.llvm.org/D98628
This commit is contained in:
Andrew Savonichev 2021-03-15 01:25:51 +03:00
parent 4e38761daa
commit 182b0cd903
18 changed files with 183 additions and 245 deletions

View File

@ -975,7 +975,6 @@ met. Multiple instructions can be issued in one cycle according to the value of
the ``IssueWidth`` parameter in LLVM's scheduling model.
Once issued, an instruction is moved to ``IssuedInst`` set until it is ready to
retire. If ``RetireControlUnit`` is defined in the LLVM's scheduling model,
:program:`llvm-mca` ensures that instructions are retired in-order. However, an
instruction is allowed to retire out-of-order if ``RetireOOO`` property is true
for at least one of its writes.
retire. :program:`llvm-mca` ensures that writes are committed in-order. However,
an instruction is allowed to commit writes and retire out-of-order if
``RetireOOO`` property is true for at least one of its writes.

View File

@ -27,12 +27,10 @@ class MCSubtargetInfo;
namespace mca {
class RegisterFile;
class ResourceManager;
struct RetireControlUnit;
class InOrderIssueStage final : public Stage {
const MCSchedModel &SM;
const MCSubtargetInfo &STI;
RetireControlUnit &RCU;
RegisterFile &PRF;
std::unique_ptr<ResourceManager> RM;
@ -67,14 +65,16 @@ class InOrderIssueStage final : public Stage {
Error tryIssue(InstRef &IR, unsigned *StallCycles);
/// Update status of instructions from IssuedInst.
Error updateIssuedInst();
void updateIssuedInst();
/// Retire instruction once it is executed.
void retireInstruction(InstRef &IR);
public:
InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF,
const MCSchedModel &SM, const MCSubtargetInfo &STI)
: SM(SM), STI(STI), RCU(RCU), PRF(PRF),
RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM,
const MCSubtargetInfo &STI)
: SM(SM), STI(STI), PRF(PRF), RM(std::make_unique<ResourceManager>(SM)),
NumIssued(0), StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
bool isAvailable(const InstRef &) const override;
bool hasWorkToComplete() const override;

View File

@ -30,7 +30,6 @@ class RetireStage final : public Stage {
RetireControlUnit &RCU;
RegisterFile &PRF;
LSUnitBase &LSU;
SmallVector<InstRef, 4> RetireInst;
RetireStage(const RetireStage &Other) = delete;
RetireStage &operator=(const RetireStage &Other) = delete;
@ -39,9 +38,7 @@ public:
RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
: Stage(), RCU(R), PRF(F), LSU(LS) {}
bool hasWorkToComplete() const override {
return !RCU.isEmpty() || !RetireInst.empty();
}
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
Error cycleStart() override;
Error cycleEnd() override;
Error execute(InstRef &IR) override;

View File

@ -71,23 +71,16 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
std::unique_ptr<Pipeline>
Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
const MCSchedModel &SM = STI.getSchedModel();
auto RCU = std::make_unique<RetireControlUnit>(SM);
auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
Opts.StoreQueueSize, Opts.AssumeNoAlias);
auto Entry = std::make_unique<EntryStage>(SrcMgr);
auto InOrderIssue = std::make_unique<InOrderIssueStage>(*RCU, *PRF, SM, STI);
auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
auto InOrderIssue = std::make_unique<InOrderIssueStage>(*PRF, SM, STI);
auto StagePipeline = std::make_unique<Pipeline>();
StagePipeline->appendStage(std::move(Entry));
StagePipeline->appendStage(std::move(InOrderIssue));
StagePipeline->appendStage(std::move(Retire));
addHardwareUnit(std::move(RCU));
addHardwareUnit(std::move(PRF));
addHardwareUnit(std::move(LSU));
return StagePipeline;
}

View File

@ -23,6 +23,8 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
AvailableEntries(SM.isOutOfOrder() ? SM.MicroOpBufferSize : 0),
MaxRetirePerCycle(0) {
assert(SM.isOutOfOrder() &&
"RetireControlUnit is not available for in-order processors");
// Check if the scheduling model provides extra information about the machine
// processor. If so, then use that information to set the reorder buffer size
// and the maximum number of instructions retired per cycle.
@ -33,17 +35,12 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
MaxRetirePerCycle = EPI.MaxRetirePerCycle;
}
NumROBEntries = AvailableEntries;
if (!SM.isOutOfOrder() && !NumROBEntries)
return;
assert(NumROBEntries && "Invalid reorder buffer size!");
Queue.resize(2 * NumROBEntries);
}
// Reserves a number of slots, and returns a new token.
unsigned RetireControlUnit::dispatch(const InstRef &IR) {
if (!NumROBEntries)
return UnhandledTokenID;
const Instruction &Inst = *IR.getInstruction();
unsigned Entries = normalizeQuantity(Inst.getNumMicroOps());
assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!");

View File

@ -182,7 +182,7 @@ static void addRegisterReadWrite(RegisterFile &PRF, Instruction &IS,
PRF.addRegisterWrite(WriteRef(SourceIndex, &WS), UsedRegs);
}
static void notifyInstructionExecute(
static void notifyInstructionIssue(
const InstRef &IR,
const SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedRes,
const Stage &S) {
@ -205,28 +205,11 @@ static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops,
}
llvm::Error InOrderIssueStage::execute(InstRef &IR) {
Instruction &IS = *IR.getInstruction();
const InstrDesc &Desc = IS.getDesc();
unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
if (!Desc.RetireOOO)
RCUTokenID = RCU.dispatch(IR);
IS.dispatch(RCUTokenID);
if (Desc.EndGroup) {
Bandwidth = 0;
} else {
unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps();
assert(Bandwidth >= NumMicroOps);
Bandwidth -= NumMicroOps;
}
if (llvm::Error E = tryIssue(IR, &StallCyclesLeft))
return E;
if (StallCyclesLeft) {
StalledInst = IR;
Bandwidth = 0;
}
return llvm::ErrorSuccess();
@ -235,20 +218,26 @@ llvm::Error InOrderIssueStage::execute(InstRef &IR) {
llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
Instruction &IS = *IR.getInstruction();
unsigned SourceIndex = IR.getSourceIndex();
const InstrDesc &Desc = IS.getDesc();
if (!canExecute(IR, StallCycles)) {
LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles
<< " cycles\n");
Bandwidth = 0;
return llvm::ErrorSuccess();
}
unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
IS.dispatch(RCUTokenID);
SmallVector<unsigned, 4> UsedRegs(PRF.getNumRegisterFiles());
addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs);
notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this);
unsigned NumMicroOps = IS.getNumMicroOps();
notifyInstructionDispatch(IR, NumMicroOps, UsedRegs, *this);
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
RM->issueInstruction(IS.getDesc(), UsedResources);
RM->issueInstruction(Desc, UsedResources);
IS.execute(SourceIndex);
// Replace resource masks with valid resource processor IDs.
@ -256,10 +245,17 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
uint64_t Mask = Use.first.first;
Use.first.first = RM->resolveResourceMask(Mask);
}
notifyInstructionExecute(IR, UsedResources, *this);
notifyInstructionIssue(IR, UsedResources, *this);
if (Desc.EndGroup) {
Bandwidth = 0;
} else {
assert(Bandwidth >= NumMicroOps);
Bandwidth -= NumMicroOps;
}
IssuedInst.push_back(IR);
++NumIssued;
NumIssued += NumMicroOps;
if (!IR.getInstruction()->getDesc().RetireOOO)
LastWriteBackCycle = findLastWriteBackCycle(IR);
@ -267,7 +263,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
return llvm::ErrorSuccess();
}
llvm::Error InOrderIssueStage::updateIssuedInst() {
void InOrderIssueStage::updateIssuedInst() {
// Update other instructions. Executed instructions will be retired during the
// next cycle.
unsigned NumExecuted = 0;
@ -283,29 +279,37 @@ llvm::Error InOrderIssueStage::updateIssuedInst() {
++I;
continue;
}
PRF.onInstructionExecuted(&IS);
notifyEvent<HWInstructionEvent>(
HWInstructionEvent(HWInstructionEvent::Executed, IR));
LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
++NumExecuted;
retireInstruction(*I);
std::iter_swap(I, E - NumExecuted);
}
// Retire instructions in the next cycle
if (NumExecuted) {
for (auto I = IssuedInst.end() - NumExecuted, E = IssuedInst.end(); I != E;
++I) {
if (llvm::Error E = moveToTheNextStage(*I))
return E;
}
if (NumExecuted)
IssuedInst.resize(IssuedInst.size() - NumExecuted);
}
}
return llvm::ErrorSuccess();
void InOrderIssueStage::retireInstruction(InstRef &IR) {
Instruction &IS = *IR.getInstruction();
IS.retire();
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
for (const WriteState &WS : IS.getDefs())
PRF.removeRegisterWrite(WS, FreedRegs);
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
LLVM_DEBUG(dbgs() << "[E] Retired #" << IR << " \n");
}
llvm::Error InOrderIssueStage::cycleStart() {
NumIssued = 0;
Bandwidth = SM.IssueWidth;
PRF.cycleStart();
@ -313,8 +317,7 @@ llvm::Error InOrderIssueStage::cycleStart() {
SmallVector<ResourceRef, 4> Freed;
RM->cycleEvent(Freed);
if (llvm::Error E = updateIssuedInst())
return E;
updateIssuedInst();
// Issue instructions scheduled for this cycle
if (!StallCyclesLeft && StalledInst) {
@ -325,7 +328,6 @@ llvm::Error InOrderIssueStage::cycleStart() {
if (!StallCyclesLeft) {
StalledInst.invalidate();
assert(NumIssued <= SM.IssueWidth && "Overflow.");
Bandwidth = SM.IssueWidth - NumIssued;
} else {
// The instruction is still stalled, cannot issue any new instructions in
// this cycle.

View File

@ -38,13 +38,6 @@ llvm::Error RetireStage::cycleStart() {
NumRetired++;
}
// Retire instructions that are not controlled by the RCU
for (InstRef &IR : RetireInst) {
IR.getInstruction()->retire();
notifyInstructionRetired(IR);
}
RetireInst.resize(0);
return llvm::ErrorSuccess();
}
@ -58,12 +51,9 @@ llvm::Error RetireStage::execute(InstRef &IR) {
PRF.onInstructionExecuted(&IS);
unsigned TokenID = IS.getRCUTokenID();
if (TokenID != RetireControlUnit::UnhandledTokenID) {
RCU.onInstructionExecuted(TokenID);
return llvm::ErrorSuccess();
}
assert(TokenID != RetireControlUnit::UnhandledTokenID);
RCU.onInstructionExecuted(TokenID);
RetireInst.push_back(IR);
return llvm::ErrorSuccess();
}

View File

@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
def A55RCU : RetireControlUnit<64, 0>;
}

View File

@ -8,12 +8,12 @@ add w1, w0, #4
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 8
# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 8
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.80
# CHECK-NEXT: uOps Per Cycle: 0.89
# CHECK-NEXT: IPC: 0.89
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -56,16 +56,16 @@ add w1, w0, #4
# CHECK-NEXT: 1.00 - - - - - - - - - - - add w1, w0, #4
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeeER. . add w2, w3, #1
# CHECK-NEXT: [0,1] DeeER. . add w4, w3, #2, lsl #12
# CHECK-NEXT: [0,2] .DeeER . add w0, w4, #3
# CHECK-NEXT: [0,3] . DeeER . add w1, w0, #4
# CHECK-NEXT: [1,0] . DeeER . add w2, w3, #1
# CHECK-NEXT: [1,1] . DeeER . add w4, w3, #2, lsl #12
# CHECK-NEXT: [1,2] . DeeER. add w0, w4, #3
# CHECK-NEXT: [1,3] . DeeER add w1, w0, #4
# CHECK: [0,0] DeeE . . add w2, w3, #1
# CHECK-NEXT: [0,1] DeeE . . add w4, w3, #2, lsl #12
# CHECK-NEXT: [0,2] .DeeE. . add w0, w4, #3
# CHECK-NEXT: [0,3] . DeeE . add w1, w0, #4
# CHECK-NEXT: [1,0] . DeeE . add w2, w3, #1
# CHECK-NEXT: [1,1] . DeeE . add w4, w3, #2, lsl #12
# CHECK-NEXT: [1,2] . DeeE. add w0, w4, #3
# CHECK-NEXT: [1,3] . DeeE add w1, w0, #4
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
# CHECK-NEXT: Total Cycles: 21
# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.67
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: uOps Per Cycle: 0.70
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 11 (52.4%)
# CHECK-NEXT: 1, 6 (28.6%)
# CHECK-NEXT: 2, 4 (19.0%)
# CHECK-NEXT: 0, 10 (50.0%)
# CHECK-NEXT: 1, 6 (30.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 11 (52.4%)
# CHECK-NEXT: 1, 6 (28.6%)
# CHECK-NEXT: 2, 4 (19.0%)
# CHECK-NEXT: 0, 10 (50.0%)
# CHECK-NEXT: 1, 6 (30.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
# CHECK-NEXT: [# retired], [# cycles]
# CHECK-NEXT: 0, 14 (66.7%)
# CHECK-NEXT: 1, 4 (19.0%)
# CHECK-NEXT: 2, 1 (4.8%)
# CHECK-NEXT: 3, 2 (9.5%)
# CHECK: Total ROB Entries: 64
# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 14
# CHECK-NEXT: Max number of mappings used: 6
# CHECK-NEXT: Max number of mappings used: 4
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU

View File

@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
# CHECK-NEXT: Total Cycles: 21
# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.67
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: uOps Per Cycle: 0.70
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 11 (52.4%)
# CHECK-NEXT: 1, 6 (28.6%)
# CHECK-NEXT: 2, 4 (19.0%)
# CHECK-NEXT: 0, 10 (50.0%)
# CHECK-NEXT: 1, 6 (30.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 11 (52.4%)
# CHECK-NEXT: 1, 6 (28.6%)
# CHECK-NEXT: 2, 4 (19.0%)
# CHECK-NEXT: 0, 10 (50.0%)
# CHECK-NEXT: 1, 6 (30.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
# CHECK-NEXT: [# retired], [# cycles]
# CHECK-NEXT: 0, 14 (66.7%)
# CHECK-NEXT: 1, 4 (19.0%)
# CHECK-NEXT: 2, 1 (4.8%)
# CHECK-NEXT: 3, 2 (9.5%)
# CHECK: Total ROB Entries: 64
# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 14
# CHECK-NEXT: Max number of mappings used: 6
# CHECK-NEXT: Max number of mappings used: 4
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU
@ -101,20 +90,20 @@ str w0, [x21, x18, lsl #2]
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . . ldr w4, [x2], #4
# CHECK-NEXT: [0,1] .DeeER . . . ldr w5, [x3]
# CHECK-NEXT: [0,2] . DeeeER. . . madd w0, w5, w4, w0
# CHECK-NEXT: [0,3] . DeeER. . . add x3, x3, x13
# CHECK-NEXT: [0,4] . DeeER. . . subs x1, x1, #1
# CHECK-NEXT: [0,5] . . DeeeER . . str w0, [x21, x18, lsl #2]
# CHECK-NEXT: [1,0] . . DeeER . . ldr w4, [x2], #4
# CHECK-NEXT: [1,1] . . DeeER . . ldr w5, [x3]
# CHECK-NEXT: [1,2] . . . DeeeER . madd w0, w5, w4, w0
# CHECK-NEXT: [1,3] . . . DeeER . add x3, x3, x13
# CHECK-NEXT: [1,4] . . . DeeER . subs x1, x1, #1
# CHECK-NEXT: [1,5] . . . DeeeER str w0, [x21, x18, lsl #2]
# CHECK: [0,0] DeeE . . . . ldr w4, [x2], #4
# CHECK-NEXT: [0,1] .DeeE. . . . ldr w5, [x3]
# CHECK-NEXT: [0,2] . DeeeE . . . madd w0, w5, w4, w0
# CHECK-NEXT: [0,3] . DeeE . . . add x3, x3, x13
# CHECK-NEXT: [0,4] . DeeE . . . subs x1, x1, #1
# CHECK-NEXT: [0,5] . . DeeeE . . str w0, [x21, x18, lsl #2]
# CHECK-NEXT: [1,0] . . DeeE . . ldr w4, [x2], #4
# CHECK-NEXT: [1,1] . . DeeE . . ldr w5, [x3]
# CHECK-NEXT: [1,2] . . . DeeeE . madd w0, w5, w4, w0
# CHECK-NEXT: [1,3] . . . DeeE . add x3, x3, x13
# CHECK-NEXT: [1,4] . . . DeeE . subs x1, x1, #1
# CHECK-NEXT: [1,5] . . . DeeeE str w0, [x21, x18, lsl #2]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -10,12 +10,12 @@ add w7, w9, w0
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total Cycles: 19
# CHECK-NEXT: Total uOps: 12
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: uOps Per Cycle: 0.63
# CHECK-NEXT: IPC: 0.63
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Instruction Info:
@ -40,37 +40,26 @@ add w7, w9, w0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.0%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.3%)
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 12 (60.0%)
# CHECK-NEXT: 1, 4 (20.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK-NEXT: 0, 11 (57.9%)
# CHECK-NEXT: 1, 4 (21.1%)
# CHECK-NEXT: 2, 4 (21.1%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 12 (60.0%)
# CHECK-NEXT: 1, 4 (20.0%)
# CHECK-NEXT: 2, 4 (20.0%)
# CHECK-NEXT: 0, 11 (57.9%)
# CHECK-NEXT: 1, 4 (21.1%)
# CHECK-NEXT: 2, 4 (21.1%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
# CHECK-NEXT: [# retired], [# cycles]
# CHECK-NEXT: 0, 14 (70.0%)
# CHECK-NEXT: 1, 2 (10.0%)
# CHECK-NEXT: 2, 2 (10.0%)
# CHECK-NEXT: 3, 2 (10.0%)
# CHECK: Total ROB Entries: 64
# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 12
# CHECK-NEXT: Max number of mappings used: 7
# CHECK-NEXT: Max number of mappings used: 6
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU
@ -100,21 +89,21 @@ add w7, w9, w0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeER. . . sdiv w12, w21, w0
# CHECK-NEXT: [0,1] . DeeER. . . add w8, w8, #1
# CHECK-NEXT: [0,2] . DeeER. . . add w1, w2, w0
# CHECK-NEXT: [0,3] . .DeeER . . add w3, w4, #1
# CHECK-NEXT: [0,4] . .DeeER . . add w5, w6, w0
# CHECK-NEXT: [0,5] . . DeeER . . add w7, w9, w0
# CHECK-NEXT: [1,0] . . DeeeeeeeER . sdiv w12, w21, w0
# CHECK-NEXT: [1,1] . . . DeeER . add w8, w8, #1
# CHECK-NEXT: [1,2] . . . DeeER . add w1, w2, w0
# CHECK-NEXT: [1,3] . . . DeeER. add w3, w4, #1
# CHECK-NEXT: [1,4] . . . DeeER. add w5, w6, w0
# CHECK-NEXT: [1,5] . . . DeeER add w7, w9, w0
# CHECK: [0,0] DeeeeeeeE . . . sdiv w12, w21, w0
# CHECK-NEXT: [0,1] . DeeE . . . add w8, w8, #1
# CHECK-NEXT: [0,2] . DeeE . . . add w1, w2, w0
# CHECK-NEXT: [0,3] . .DeeE. . . add w3, w4, #1
# CHECK-NEXT: [0,4] . .DeeE. . . add w5, w6, w0
# CHECK-NEXT: [0,5] . . DeeE . . add w7, w9, w0
# CHECK-NEXT: [1,0] . . DeeeeeeeE . sdiv w12, w21, w0
# CHECK-NEXT: [1,1] . . . DeeE . add w8, w8, #1
# CHECK-NEXT: [1,2] . . . DeeE . add w1, w2, w0
# CHECK-NEXT: [1,3] . . . DeeE. add w3, w4, #1
# CHECK-NEXT: [1,4] . . . DeeE. add w5, w6, w0
# CHECK-NEXT: [1,5] . . . DeeE add w7, w9, w0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -10,12 +10,12 @@ add w7, w9, w0
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
# CHECK-NEXT: Total Cycles: 25
# CHECK-NEXT: Total Cycles: 24
# CHECK-NEXT: Total uOps: 12
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.48
# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Instruction Info:
@ -40,31 +40,21 @@ add w7, w9, w0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (28.0%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (29.2%)
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 19 (76.0%)
# CHECK-NEXT: 2, 6 (24.0%)
# CHECK-NEXT: 0, 18 (75.0%)
# CHECK-NEXT: 2, 6 (25.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 19 (76.0%)
# CHECK-NEXT: 2, 6 (24.0%)
# CHECK-NEXT: 0, 18 (75.0%)
# CHECK-NEXT: 2, 6 (25.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
# CHECK-NEXT: [# retired], [# cycles]
# CHECK-NEXT: 0, 18 (72.0%)
# CHECK-NEXT: 1, 2 (8.0%)
# CHECK-NEXT: 2, 5 (20.0%)
# CHECK: Total ROB Entries: 64
# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 12
# CHECK-NEXT: Max number of mappings used: 7
@ -98,20 +88,20 @@ add w7, w9, w0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123
# CHECK: [0,0] DeeeeeeeeeeeeER. . . fdiv s1, s2, s3
# CHECK-NEXT: [0,1] DeeER. . . . . add w8, w8, #1
# CHECK-NEXT: [0,2] .DeeER . . . . add w1, w2, w0
# CHECK-NEXT: [0,3] .DeeER . . . . add w3, w4, #1
# CHECK-NEXT: [0,4] . DeeER . . . . add w5, w6, w0
# CHECK-NEXT: [0,5] . DeeER . . . . add w7, w9, w0
# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeER fdiv s1, s2, s3
# CHECK-NEXT: [1,1] . . DeeER. . . add w8, w8, #1
# CHECK-NEXT: [1,2] . . .DeeER . . add w1, w2, w0
# CHECK-NEXT: [1,3] . . .DeeER . . add w3, w4, #1
# CHECK-NEXT: [1,4] . . . DeeER . . add w5, w6, w0
# CHECK-NEXT: [1,5] . . . DeeER . . add w7, w9, w0
# CHECK: [0,0] DeeeeeeeeeeeeE . . . fdiv s1, s2, s3
# CHECK-NEXT: [0,1] DeeE . . . . . add w8, w8, #1
# CHECK-NEXT: [0,2] .DeeE. . . . . add w1, w2, w0
# CHECK-NEXT: [0,3] .DeeE. . . . . add w3, w4, #1
# CHECK-NEXT: [0,4] . DeeE . . . . add w5, w6, w0
# CHECK-NEXT: [0,5] . DeeE . . . . add w7, w9, w0
# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeE fdiv s1, s2, s3
# CHECK-NEXT: [1,1] . . DeeE . . . add w8, w8, #1
# CHECK-NEXT: [1,2] . . .DeeE. . . add w1, w2, w0
# CHECK-NEXT: [1,3] . . .DeeE. . . add w3, w4, #1
# CHECK-NEXT: [1,4] . . . DeeE . . add w5, w6, w0
# CHECK-NEXT: [1,5] . . . DeeE . . add w7, w9, w0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -7,12 +7,12 @@ v_add_f32 v2, v1, v0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 13
# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 1
# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
@ -48,12 +48,12 @@ v_add_f32 v2, v1, v0
# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0
# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . . v_add_f32_e32 v0, v0, v0
# CHECK-NEXT: [0,1] .DeeeeER . . v_add_f32_e32 v1, v1, v1
# CHECK-NEXT: [0,2] . .DeeeeER v_add_f32_e32 v2, v1, v0
# CHECK: [0,0] DeeeeE .. v_add_f32_e32 v0, v0, v0
# CHECK-NEXT: [0,1] .DeeeeE .. v_add_f32_e32 v1, v1, v1
# CHECK-NEXT: [0,2] . .DeeeeE v_add_f32_e32 v2, v1, v0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -42,7 +42,7 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 27
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total Cycles: 204
# CHECK-NEXT: Total uOps: 27
# CHECK: Dispatch Width: 1
@ -134,19 +134,19 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK-NEXT: 0123456789 0123456789 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeER. . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_fract_f64_e32 v[4:5], v[4:5]
# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_trunc_f64_e32 v[0:1], v[0:1]
# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_ceil_f64_e32 v[2:3], v[2:3]
# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeER. v_rndne_f64_e32 v[4:5], v[4:5]
# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER v_floor_f64_e32 v[6:7], v[6:7]
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_fract_f64_e32 v[4:5], v[4:5]
# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_trunc_f64_e32 v[0:1], v[0:1]
# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_ceil_f64_e32 v[2:3], v[2:3]
# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . v_rndne_f64_e32 v[4:5], v[4:5]
# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. v_floor_f64_e32 v[6:7], v[6:7]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -9,12 +9,12 @@ vldr d0, [r1]
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 7
# CHECK-NEXT: Total Cycles: 6
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.43
# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
@ -56,11 +56,11 @@ vldr d0, [r1]
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456
# CHECK-NEXT: Index 012345
# CHECK: [0,0] DER .. add.w r1, r1, #1
# CHECK-NEXT: [0,1] .DER .. add.w r1, r1, #2
# CHECK-NEXT: [0,2] . DeER vldr d0, [r1]
# CHECK: [0,0] DE . add.w r1, r1, #1
# CHECK-NEXT: [0,1] .DE . add.w r1, r1, #2
# CHECK-NEXT: [0,2] . DeE vldr d0, [r1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -77,8 +77,10 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
"Instruction cannot be ready if it hasn't been dispatched yet!");
WTEntry.CyclesSpentInSQWhileReady +=
TVEntry.CycleIssued - TVEntry.CycleReady;
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
(CurrentCycle - 1) - TVEntry.CycleExecuted;
if (CurrentCycle > TVEntry.CycleExecuted) {
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
(CurrentCycle - 1) - TVEntry.CycleExecuted;
}
break;
}
case HWInstructionEvent::Ready:
@ -243,7 +245,8 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
OS << TimelineView::DisplayChar::RetireLag;
OS << TimelineView::DisplayChar::Retired;
if (Entry.CycleExecuted < Entry.CycleRetired)
OS << TimelineView::DisplayChar::Retired;
// Skip other columns.
for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)

View File

@ -278,7 +278,8 @@ static void processViewOptions(bool IsOutOfOrder) {
processOptionImpl(PrintRegisterFileStats, Default);
processOptionImpl(PrintDispatchStats, Default);
processOptionImpl(PrintSchedulerStats, Default);
processOptionImpl(PrintRetireStats, Default);
if (IsOutOfOrder)
processOptionImpl(PrintRetireStats, Default);
}
// Returns true on success.