mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
[MCA] Disable RCU for InOrderIssueStage
This is a follow-up for: D98604 [MCA] Ensure that writes occur in-order When instructions are aligned by the order of writes, they retire in-order naturally. There is no need for an RCU, so it is disabled. Differential Revision: https://reviews.llvm.org/D98628
This commit is contained in:
parent
4e38761daa
commit
182b0cd903
@ -975,7 +975,6 @@ met. Multiple instructions can be issued in one cycle according to the value of
|
||||
the ``IssueWidth`` parameter in LLVM's scheduling model.
|
||||
|
||||
Once issued, an instruction is moved to ``IssuedInst`` set until it is ready to
|
||||
retire. If ``RetireControlUnit`` is defined in the LLVM's scheduling model,
|
||||
:program:`llvm-mca` ensures that instructions are retired in-order. However, an
|
||||
instruction is allowed to retire out-of-order if ``RetireOOO`` property is true
|
||||
for at least one of its writes.
|
||||
retire. :program:`llvm-mca` ensures that writes are committed in-order. However,
|
||||
an instruction is allowed to commit writes and retire out-of-order if
|
||||
``RetireOOO`` property is true for at least one of its writes.
|
||||
|
@ -27,12 +27,10 @@ class MCSubtargetInfo;
|
||||
namespace mca {
|
||||
class RegisterFile;
|
||||
class ResourceManager;
|
||||
struct RetireControlUnit;
|
||||
|
||||
class InOrderIssueStage final : public Stage {
|
||||
const MCSchedModel &SM;
|
||||
const MCSubtargetInfo &STI;
|
||||
RetireControlUnit &RCU;
|
||||
RegisterFile &PRF;
|
||||
std::unique_ptr<ResourceManager> RM;
|
||||
|
||||
@ -67,14 +65,16 @@ class InOrderIssueStage final : public Stage {
|
||||
Error tryIssue(InstRef &IR, unsigned *StallCycles);
|
||||
|
||||
/// Update status of instructions from IssuedInst.
|
||||
Error updateIssuedInst();
|
||||
void updateIssuedInst();
|
||||
|
||||
/// Retire instruction once it is executed.
|
||||
void retireInstruction(InstRef &IR);
|
||||
|
||||
public:
|
||||
InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF,
|
||||
const MCSchedModel &SM, const MCSubtargetInfo &STI)
|
||||
: SM(SM), STI(STI), RCU(RCU), PRF(PRF),
|
||||
RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
|
||||
StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
|
||||
InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM,
|
||||
const MCSubtargetInfo &STI)
|
||||
: SM(SM), STI(STI), PRF(PRF), RM(std::make_unique<ResourceManager>(SM)),
|
||||
NumIssued(0), StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
|
||||
|
||||
bool isAvailable(const InstRef &) const override;
|
||||
bool hasWorkToComplete() const override;
|
||||
|
@ -30,7 +30,6 @@ class RetireStage final : public Stage {
|
||||
RetireControlUnit &RCU;
|
||||
RegisterFile &PRF;
|
||||
LSUnitBase &LSU;
|
||||
SmallVector<InstRef, 4> RetireInst;
|
||||
|
||||
RetireStage(const RetireStage &Other) = delete;
|
||||
RetireStage &operator=(const RetireStage &Other) = delete;
|
||||
@ -39,9 +38,7 @@ public:
|
||||
RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
|
||||
: Stage(), RCU(R), PRF(F), LSU(LS) {}
|
||||
|
||||
bool hasWorkToComplete() const override {
|
||||
return !RCU.isEmpty() || !RetireInst.empty();
|
||||
}
|
||||
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
|
||||
Error cycleStart() override;
|
||||
Error cycleEnd() override;
|
||||
Error execute(InstRef &IR) override;
|
||||
|
@ -71,23 +71,16 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
|
||||
std::unique_ptr<Pipeline>
|
||||
Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
|
||||
const MCSchedModel &SM = STI.getSchedModel();
|
||||
auto RCU = std::make_unique<RetireControlUnit>(SM);
|
||||
auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
|
||||
auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
|
||||
Opts.StoreQueueSize, Opts.AssumeNoAlias);
|
||||
|
||||
auto Entry = std::make_unique<EntryStage>(SrcMgr);
|
||||
auto InOrderIssue = std::make_unique<InOrderIssueStage>(*RCU, *PRF, SM, STI);
|
||||
auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
|
||||
auto InOrderIssue = std::make_unique<InOrderIssueStage>(*PRF, SM, STI);
|
||||
|
||||
auto StagePipeline = std::make_unique<Pipeline>();
|
||||
StagePipeline->appendStage(std::move(Entry));
|
||||
StagePipeline->appendStage(std::move(InOrderIssue));
|
||||
StagePipeline->appendStage(std::move(Retire));
|
||||
|
||||
addHardwareUnit(std::move(RCU));
|
||||
addHardwareUnit(std::move(PRF));
|
||||
addHardwareUnit(std::move(LSU));
|
||||
|
||||
return StagePipeline;
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
|
||||
: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
|
||||
AvailableEntries(SM.isOutOfOrder() ? SM.MicroOpBufferSize : 0),
|
||||
MaxRetirePerCycle(0) {
|
||||
assert(SM.isOutOfOrder() &&
|
||||
"RetireControlUnit is not available for in-order processors");
|
||||
// Check if the scheduling model provides extra information about the machine
|
||||
// processor. If so, then use that information to set the reorder buffer size
|
||||
// and the maximum number of instructions retired per cycle.
|
||||
@ -33,17 +35,12 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
|
||||
MaxRetirePerCycle = EPI.MaxRetirePerCycle;
|
||||
}
|
||||
NumROBEntries = AvailableEntries;
|
||||
if (!SM.isOutOfOrder() && !NumROBEntries)
|
||||
return;
|
||||
assert(NumROBEntries && "Invalid reorder buffer size!");
|
||||
Queue.resize(2 * NumROBEntries);
|
||||
}
|
||||
|
||||
// Reserves a number of slots, and returns a new token.
|
||||
unsigned RetireControlUnit::dispatch(const InstRef &IR) {
|
||||
if (!NumROBEntries)
|
||||
return UnhandledTokenID;
|
||||
|
||||
const Instruction &Inst = *IR.getInstruction();
|
||||
unsigned Entries = normalizeQuantity(Inst.getNumMicroOps());
|
||||
assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!");
|
||||
|
@ -182,7 +182,7 @@ static void addRegisterReadWrite(RegisterFile &PRF, Instruction &IS,
|
||||
PRF.addRegisterWrite(WriteRef(SourceIndex, &WS), UsedRegs);
|
||||
}
|
||||
|
||||
static void notifyInstructionExecute(
|
||||
static void notifyInstructionIssue(
|
||||
const InstRef &IR,
|
||||
const SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedRes,
|
||||
const Stage &S) {
|
||||
@ -205,28 +205,11 @@ static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops,
|
||||
}
|
||||
|
||||
llvm::Error InOrderIssueStage::execute(InstRef &IR) {
|
||||
Instruction &IS = *IR.getInstruction();
|
||||
const InstrDesc &Desc = IS.getDesc();
|
||||
|
||||
unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
|
||||
if (!Desc.RetireOOO)
|
||||
RCUTokenID = RCU.dispatch(IR);
|
||||
IS.dispatch(RCUTokenID);
|
||||
|
||||
if (Desc.EndGroup) {
|
||||
Bandwidth = 0;
|
||||
} else {
|
||||
unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps();
|
||||
assert(Bandwidth >= NumMicroOps);
|
||||
Bandwidth -= NumMicroOps;
|
||||
}
|
||||
|
||||
if (llvm::Error E = tryIssue(IR, &StallCyclesLeft))
|
||||
return E;
|
||||
|
||||
if (StallCyclesLeft) {
|
||||
StalledInst = IR;
|
||||
Bandwidth = 0;
|
||||
}
|
||||
|
||||
return llvm::ErrorSuccess();
|
||||
@ -235,20 +218,26 @@ llvm::Error InOrderIssueStage::execute(InstRef &IR) {
|
||||
llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
|
||||
Instruction &IS = *IR.getInstruction();
|
||||
unsigned SourceIndex = IR.getSourceIndex();
|
||||
const InstrDesc &Desc = IS.getDesc();
|
||||
|
||||
if (!canExecute(IR, StallCycles)) {
|
||||
LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles
|
||||
<< " cycles\n");
|
||||
Bandwidth = 0;
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
|
||||
unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
|
||||
IS.dispatch(RCUTokenID);
|
||||
|
||||
SmallVector<unsigned, 4> UsedRegs(PRF.getNumRegisterFiles());
|
||||
addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs);
|
||||
|
||||
notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this);
|
||||
unsigned NumMicroOps = IS.getNumMicroOps();
|
||||
notifyInstructionDispatch(IR, NumMicroOps, UsedRegs, *this);
|
||||
|
||||
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
|
||||
RM->issueInstruction(IS.getDesc(), UsedResources);
|
||||
RM->issueInstruction(Desc, UsedResources);
|
||||
IS.execute(SourceIndex);
|
||||
|
||||
// Replace resource masks with valid resource processor IDs.
|
||||
@ -256,10 +245,17 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
|
||||
uint64_t Mask = Use.first.first;
|
||||
Use.first.first = RM->resolveResourceMask(Mask);
|
||||
}
|
||||
notifyInstructionExecute(IR, UsedResources, *this);
|
||||
notifyInstructionIssue(IR, UsedResources, *this);
|
||||
|
||||
if (Desc.EndGroup) {
|
||||
Bandwidth = 0;
|
||||
} else {
|
||||
assert(Bandwidth >= NumMicroOps);
|
||||
Bandwidth -= NumMicroOps;
|
||||
}
|
||||
|
||||
IssuedInst.push_back(IR);
|
||||
++NumIssued;
|
||||
NumIssued += NumMicroOps;
|
||||
|
||||
if (!IR.getInstruction()->getDesc().RetireOOO)
|
||||
LastWriteBackCycle = findLastWriteBackCycle(IR);
|
||||
@ -267,7 +263,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
|
||||
llvm::Error InOrderIssueStage::updateIssuedInst() {
|
||||
void InOrderIssueStage::updateIssuedInst() {
|
||||
// Update other instructions. Executed instructions will be retired during the
|
||||
// next cycle.
|
||||
unsigned NumExecuted = 0;
|
||||
@ -283,29 +279,37 @@ llvm::Error InOrderIssueStage::updateIssuedInst() {
|
||||
++I;
|
||||
continue;
|
||||
}
|
||||
|
||||
PRF.onInstructionExecuted(&IS);
|
||||
notifyEvent<HWInstructionEvent>(
|
||||
HWInstructionEvent(HWInstructionEvent::Executed, IR));
|
||||
|
||||
LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
|
||||
++NumExecuted;
|
||||
|
||||
retireInstruction(*I);
|
||||
|
||||
std::iter_swap(I, E - NumExecuted);
|
||||
}
|
||||
|
||||
// Retire instructions in the next cycle
|
||||
if (NumExecuted) {
|
||||
for (auto I = IssuedInst.end() - NumExecuted, E = IssuedInst.end(); I != E;
|
||||
++I) {
|
||||
if (llvm::Error E = moveToTheNextStage(*I))
|
||||
return E;
|
||||
}
|
||||
if (NumExecuted)
|
||||
IssuedInst.resize(IssuedInst.size() - NumExecuted);
|
||||
}
|
||||
}
|
||||
|
||||
return llvm::ErrorSuccess();
|
||||
void InOrderIssueStage::retireInstruction(InstRef &IR) {
|
||||
Instruction &IS = *IR.getInstruction();
|
||||
IS.retire();
|
||||
|
||||
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
|
||||
for (const WriteState &WS : IS.getDefs())
|
||||
PRF.removeRegisterWrite(WS, FreedRegs);
|
||||
|
||||
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
|
||||
LLVM_DEBUG(dbgs() << "[E] Retired #" << IR << " \n");
|
||||
}
|
||||
|
||||
llvm::Error InOrderIssueStage::cycleStart() {
|
||||
NumIssued = 0;
|
||||
Bandwidth = SM.IssueWidth;
|
||||
|
||||
PRF.cycleStart();
|
||||
|
||||
@ -313,8 +317,7 @@ llvm::Error InOrderIssueStage::cycleStart() {
|
||||
SmallVector<ResourceRef, 4> Freed;
|
||||
RM->cycleEvent(Freed);
|
||||
|
||||
if (llvm::Error E = updateIssuedInst())
|
||||
return E;
|
||||
updateIssuedInst();
|
||||
|
||||
// Issue instructions scheduled for this cycle
|
||||
if (!StallCyclesLeft && StalledInst) {
|
||||
@ -325,7 +328,6 @@ llvm::Error InOrderIssueStage::cycleStart() {
|
||||
if (!StallCyclesLeft) {
|
||||
StalledInst.invalidate();
|
||||
assert(NumIssued <= SM.IssueWidth && "Overflow.");
|
||||
Bandwidth = SM.IssueWidth - NumIssued;
|
||||
} else {
|
||||
// The instruction is still stalled, cannot issue any new instructions in
|
||||
// this cycle.
|
||||
|
@ -38,13 +38,6 @@ llvm::Error RetireStage::cycleStart() {
|
||||
NumRetired++;
|
||||
}
|
||||
|
||||
// Retire instructions that are not controlled by the RCU
|
||||
for (InstRef &IR : RetireInst) {
|
||||
IR.getInstruction()->retire();
|
||||
notifyInstructionRetired(IR);
|
||||
}
|
||||
RetireInst.resize(0);
|
||||
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
|
||||
@ -58,12 +51,9 @@ llvm::Error RetireStage::execute(InstRef &IR) {
|
||||
|
||||
PRF.onInstructionExecuted(&IS);
|
||||
unsigned TokenID = IS.getRCUTokenID();
|
||||
if (TokenID != RetireControlUnit::UnhandledTokenID) {
|
||||
RCU.onInstructionExecuted(TokenID);
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
assert(TokenID != RetireControlUnit::UnhandledTokenID);
|
||||
RCU.onInstructionExecuted(TokenID);
|
||||
|
||||
RetireInst.push_back(IR);
|
||||
return llvm::ErrorSuccess();
|
||||
}
|
||||
|
||||
|
@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
|
||||
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
|
||||
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
|
||||
|
||||
def A55RCU : RetireControlUnit<64, 0>;
|
||||
}
|
||||
|
@ -8,12 +8,12 @@ add w1, w0, #4
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 8
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Total Cycles: 9
|
||||
# CHECK-NEXT: Total uOps: 8
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.80
|
||||
# CHECK-NEXT: IPC: 0.80
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.89
|
||||
# CHECK-NEXT: IPC: 0.89
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -56,16 +56,16 @@ add w1, w0, #4
|
||||
# CHECK-NEXT: 1.00 - - - - - - - - - - - add w1, w0, #4
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
# CHECK-NEXT: Index 012345678
|
||||
|
||||
# CHECK: [0,0] DeeER. . add w2, w3, #1
|
||||
# CHECK-NEXT: [0,1] DeeER. . add w4, w3, #2, lsl #12
|
||||
# CHECK-NEXT: [0,2] .DeeER . add w0, w4, #3
|
||||
# CHECK-NEXT: [0,3] . DeeER . add w1, w0, #4
|
||||
# CHECK-NEXT: [1,0] . DeeER . add w2, w3, #1
|
||||
# CHECK-NEXT: [1,1] . DeeER . add w4, w3, #2, lsl #12
|
||||
# CHECK-NEXT: [1,2] . DeeER. add w0, w4, #3
|
||||
# CHECK-NEXT: [1,3] . DeeER add w1, w0, #4
|
||||
# CHECK: [0,0] DeeE . . add w2, w3, #1
|
||||
# CHECK-NEXT: [0,1] DeeE . . add w4, w3, #2, lsl #12
|
||||
# CHECK-NEXT: [0,2] .DeeE. . add w0, w4, #3
|
||||
# CHECK-NEXT: [0,3] . DeeE . add w1, w0, #4
|
||||
# CHECK-NEXT: [1,0] . DeeE . add w2, w3, #1
|
||||
# CHECK-NEXT: [1,1] . DeeE . add w4, w3, #2, lsl #12
|
||||
# CHECK-NEXT: [1,2] . DeeE. add w0, w4, #3
|
||||
# CHECK-NEXT: [1,3] . DeeE add w1, w0, #4
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 12
|
||||
# CHECK-NEXT: Total Cycles: 21
|
||||
# CHECK-NEXT: Total Cycles: 20
|
||||
# CHECK-NEXT: Total uOps: 14
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.67
|
||||
# CHECK-NEXT: IPC: 0.57
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.70
|
||||
# CHECK-NEXT: IPC: 0.60
|
||||
# CHECK-NEXT: Block RThroughput: 3.5
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
|
||||
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
|
||||
# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 11 (52.4%)
|
||||
# CHECK-NEXT: 1, 6 (28.6%)
|
||||
# CHECK-NEXT: 2, 4 (19.0%)
|
||||
# CHECK-NEXT: 0, 10 (50.0%)
|
||||
# CHECK-NEXT: 1, 6 (30.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 11 (52.4%)
|
||||
# CHECK-NEXT: 1, 6 (28.6%)
|
||||
# CHECK-NEXT: 2, 4 (19.0%)
|
||||
# CHECK-NEXT: 0, 10 (50.0%)
|
||||
# CHECK-NEXT: 1, 6 (30.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: No scheduler resources used.
|
||||
|
||||
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||
# CHECK-NEXT: [# retired], [# cycles]
|
||||
# CHECK-NEXT: 0, 14 (66.7%)
|
||||
# CHECK-NEXT: 1, 4 (19.0%)
|
||||
# CHECK-NEXT: 2, 1 (4.8%)
|
||||
# CHECK-NEXT: 3, 2 (9.5%)
|
||||
|
||||
# CHECK: Total ROB Entries: 64
|
||||
# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
|
||||
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 14
|
||||
# CHECK-NEXT: Max number of mappings used: 6
|
||||
# CHECK-NEXT: Max number of mappings used: 4
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - CortexA55UnitALU
|
||||
|
@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 12
|
||||
# CHECK-NEXT: Total Cycles: 21
|
||||
# CHECK-NEXT: Total Cycles: 20
|
||||
# CHECK-NEXT: Total uOps: 14
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.67
|
||||
# CHECK-NEXT: IPC: 0.57
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.70
|
||||
# CHECK-NEXT: IPC: 0.60
|
||||
# CHECK-NEXT: Block RThroughput: 3.5
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
|
||||
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
|
||||
# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 11 (52.4%)
|
||||
# CHECK-NEXT: 1, 6 (28.6%)
|
||||
# CHECK-NEXT: 2, 4 (19.0%)
|
||||
# CHECK-NEXT: 0, 10 (50.0%)
|
||||
# CHECK-NEXT: 1, 6 (30.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 11 (52.4%)
|
||||
# CHECK-NEXT: 1, 6 (28.6%)
|
||||
# CHECK-NEXT: 2, 4 (19.0%)
|
||||
# CHECK-NEXT: 0, 10 (50.0%)
|
||||
# CHECK-NEXT: 1, 6 (30.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: No scheduler resources used.
|
||||
|
||||
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||
# CHECK-NEXT: [# retired], [# cycles]
|
||||
# CHECK-NEXT: 0, 14 (66.7%)
|
||||
# CHECK-NEXT: 1, 4 (19.0%)
|
||||
# CHECK-NEXT: 2, 1 (4.8%)
|
||||
# CHECK-NEXT: 3, 2 (9.5%)
|
||||
|
||||
# CHECK: Total ROB Entries: 64
|
||||
# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
|
||||
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 14
|
||||
# CHECK-NEXT: Max number of mappings used: 6
|
||||
# CHECK-NEXT: Max number of mappings used: 4
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - CortexA55UnitALU
|
||||
@ -101,20 +90,20 @@ str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeER. . . . ldr w4, [x2], #4
|
||||
# CHECK-NEXT: [0,1] .DeeER . . . ldr w5, [x3]
|
||||
# CHECK-NEXT: [0,2] . DeeeER. . . madd w0, w5, w4, w0
|
||||
# CHECK-NEXT: [0,3] . DeeER. . . add x3, x3, x13
|
||||
# CHECK-NEXT: [0,4] . DeeER. . . subs x1, x1, #1
|
||||
# CHECK-NEXT: [0,5] . . DeeeER . . str w0, [x21, x18, lsl #2]
|
||||
# CHECK-NEXT: [1,0] . . DeeER . . ldr w4, [x2], #4
|
||||
# CHECK-NEXT: [1,1] . . DeeER . . ldr w5, [x3]
|
||||
# CHECK-NEXT: [1,2] . . . DeeeER . madd w0, w5, w4, w0
|
||||
# CHECK-NEXT: [1,3] . . . DeeER . add x3, x3, x13
|
||||
# CHECK-NEXT: [1,4] . . . DeeER . subs x1, x1, #1
|
||||
# CHECK-NEXT: [1,5] . . . DeeeER str w0, [x21, x18, lsl #2]
|
||||
# CHECK: [0,0] DeeE . . . . ldr w4, [x2], #4
|
||||
# CHECK-NEXT: [0,1] .DeeE. . . . ldr w5, [x3]
|
||||
# CHECK-NEXT: [0,2] . DeeeE . . . madd w0, w5, w4, w0
|
||||
# CHECK-NEXT: [0,3] . DeeE . . . add x3, x3, x13
|
||||
# CHECK-NEXT: [0,4] . DeeE . . . subs x1, x1, #1
|
||||
# CHECK-NEXT: [0,5] . . DeeeE . . str w0, [x21, x18, lsl #2]
|
||||
# CHECK-NEXT: [1,0] . . DeeE . . ldr w4, [x2], #4
|
||||
# CHECK-NEXT: [1,1] . . DeeE . . ldr w5, [x3]
|
||||
# CHECK-NEXT: [1,2] . . . DeeeE . madd w0, w5, w4, w0
|
||||
# CHECK-NEXT: [1,3] . . . DeeE . add x3, x3, x13
|
||||
# CHECK-NEXT: [1,4] . . . DeeE . subs x1, x1, #1
|
||||
# CHECK-NEXT: [1,5] . . . DeeeE str w0, [x21, x18, lsl #2]
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -10,12 +10,12 @@ add w7, w9, w0
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 12
|
||||
# CHECK-NEXT: Total Cycles: 20
|
||||
# CHECK-NEXT: Total Cycles: 19
|
||||
# CHECK-NEXT: Total uOps: 12
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.60
|
||||
# CHECK-NEXT: IPC: 0.60
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.63
|
||||
# CHECK-NEXT: IPC: 0.63
|
||||
# CHECK-NEXT: Block RThroughput: 8.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -40,37 +40,26 @@ add w7, w9, w0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.0%)
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.3%)
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 12 (60.0%)
|
||||
# CHECK-NEXT: 1, 4 (20.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
# CHECK-NEXT: 0, 11 (57.9%)
|
||||
# CHECK-NEXT: 1, 4 (21.1%)
|
||||
# CHECK-NEXT: 2, 4 (21.1%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 12 (60.0%)
|
||||
# CHECK-NEXT: 1, 4 (20.0%)
|
||||
# CHECK-NEXT: 2, 4 (20.0%)
|
||||
# CHECK-NEXT: 0, 11 (57.9%)
|
||||
# CHECK-NEXT: 1, 4 (21.1%)
|
||||
# CHECK-NEXT: 2, 4 (21.1%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: No scheduler resources used.
|
||||
|
||||
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||
# CHECK-NEXT: [# retired], [# cycles]
|
||||
# CHECK-NEXT: 0, 14 (70.0%)
|
||||
# CHECK-NEXT: 1, 2 (10.0%)
|
||||
# CHECK-NEXT: 2, 2 (10.0%)
|
||||
# CHECK-NEXT: 3, 2 (10.0%)
|
||||
|
||||
# CHECK: Total ROB Entries: 64
|
||||
# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
|
||||
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 12
|
||||
# CHECK-NEXT: Max number of mappings used: 7
|
||||
# CHECK-NEXT: Max number of mappings used: 6
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - CortexA55UnitALU
|
||||
@ -100,21 +89,21 @@ add w7, w9, w0
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: 012345678
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeER. . . sdiv w12, w21, w0
|
||||
# CHECK-NEXT: [0,1] . DeeER. . . add w8, w8, #1
|
||||
# CHECK-NEXT: [0,2] . DeeER. . . add w1, w2, w0
|
||||
# CHECK-NEXT: [0,3] . .DeeER . . add w3, w4, #1
|
||||
# CHECK-NEXT: [0,4] . .DeeER . . add w5, w6, w0
|
||||
# CHECK-NEXT: [0,5] . . DeeER . . add w7, w9, w0
|
||||
# CHECK-NEXT: [1,0] . . DeeeeeeeER . sdiv w12, w21, w0
|
||||
# CHECK-NEXT: [1,1] . . . DeeER . add w8, w8, #1
|
||||
# CHECK-NEXT: [1,2] . . . DeeER . add w1, w2, w0
|
||||
# CHECK-NEXT: [1,3] . . . DeeER. add w3, w4, #1
|
||||
# CHECK-NEXT: [1,4] . . . DeeER. add w5, w6, w0
|
||||
# CHECK-NEXT: [1,5] . . . DeeER add w7, w9, w0
|
||||
# CHECK: [0,0] DeeeeeeeE . . . sdiv w12, w21, w0
|
||||
# CHECK-NEXT: [0,1] . DeeE . . . add w8, w8, #1
|
||||
# CHECK-NEXT: [0,2] . DeeE . . . add w1, w2, w0
|
||||
# CHECK-NEXT: [0,3] . .DeeE. . . add w3, w4, #1
|
||||
# CHECK-NEXT: [0,4] . .DeeE. . . add w5, w6, w0
|
||||
# CHECK-NEXT: [0,5] . . DeeE . . add w7, w9, w0
|
||||
# CHECK-NEXT: [1,0] . . DeeeeeeeE . sdiv w12, w21, w0
|
||||
# CHECK-NEXT: [1,1] . . . DeeE . add w8, w8, #1
|
||||
# CHECK-NEXT: [1,2] . . . DeeE . add w1, w2, w0
|
||||
# CHECK-NEXT: [1,3] . . . DeeE. add w3, w4, #1
|
||||
# CHECK-NEXT: [1,4] . . . DeeE. add w5, w6, w0
|
||||
# CHECK-NEXT: [1,5] . . . DeeE add w7, w9, w0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -10,12 +10,12 @@ add w7, w9, w0
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 12
|
||||
# CHECK-NEXT: Total Cycles: 25
|
||||
# CHECK-NEXT: Total Cycles: 24
|
||||
# CHECK-NEXT: Total uOps: 12
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.48
|
||||
# CHECK-NEXT: IPC: 0.48
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.50
|
||||
# CHECK-NEXT: IPC: 0.50
|
||||
# CHECK-NEXT: Block RThroughput: 10.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -40,31 +40,21 @@ add w7, w9, w0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (28.0%)
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (29.2%)
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 19 (76.0%)
|
||||
# CHECK-NEXT: 2, 6 (24.0%)
|
||||
# CHECK-NEXT: 0, 18 (75.0%)
|
||||
# CHECK-NEXT: 2, 6 (25.0%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 19 (76.0%)
|
||||
# CHECK-NEXT: 2, 6 (24.0%)
|
||||
# CHECK-NEXT: 0, 18 (75.0%)
|
||||
# CHECK-NEXT: 2, 6 (25.0%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: No scheduler resources used.
|
||||
|
||||
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||
# CHECK-NEXT: [# retired], [# cycles]
|
||||
# CHECK-NEXT: 0, 18 (72.0%)
|
||||
# CHECK-NEXT: 1, 2 (8.0%)
|
||||
# CHECK-NEXT: 2, 5 (20.0%)
|
||||
|
||||
# CHECK: Total ROB Entries: 64
|
||||
# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
|
||||
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 12
|
||||
# CHECK-NEXT: Max number of mappings used: 7
|
||||
@ -98,20 +88,20 @@ add w7, w9, w0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234
|
||||
# CHECK-NEXT: Index 0123456789 0123
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeER. . . fdiv s1, s2, s3
|
||||
# CHECK-NEXT: [0,1] DeeER. . . . . add w8, w8, #1
|
||||
# CHECK-NEXT: [0,2] .DeeER . . . . add w1, w2, w0
|
||||
# CHECK-NEXT: [0,3] .DeeER . . . . add w3, w4, #1
|
||||
# CHECK-NEXT: [0,4] . DeeER . . . . add w5, w6, w0
|
||||
# CHECK-NEXT: [0,5] . DeeER . . . . add w7, w9, w0
|
||||
# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeER fdiv s1, s2, s3
|
||||
# CHECK-NEXT: [1,1] . . DeeER. . . add w8, w8, #1
|
||||
# CHECK-NEXT: [1,2] . . .DeeER . . add w1, w2, w0
|
||||
# CHECK-NEXT: [1,3] . . .DeeER . . add w3, w4, #1
|
||||
# CHECK-NEXT: [1,4] . . . DeeER . . add w5, w6, w0
|
||||
# CHECK-NEXT: [1,5] . . . DeeER . . add w7, w9, w0
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeE . . . fdiv s1, s2, s3
|
||||
# CHECK-NEXT: [0,1] DeeE . . . . . add w8, w8, #1
|
||||
# CHECK-NEXT: [0,2] .DeeE. . . . . add w1, w2, w0
|
||||
# CHECK-NEXT: [0,3] .DeeE. . . . . add w3, w4, #1
|
||||
# CHECK-NEXT: [0,4] . DeeE . . . . add w5, w6, w0
|
||||
# CHECK-NEXT: [0,5] . DeeE . . . . add w7, w9, w0
|
||||
# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeE fdiv s1, s2, s3
|
||||
# CHECK-NEXT: [1,1] . . DeeE . . . add w8, w8, #1
|
||||
# CHECK-NEXT: [1,2] . . .DeeE. . . add w1, w2, w0
|
||||
# CHECK-NEXT: [1,3] . . .DeeE. . . add w3, w4, #1
|
||||
# CHECK-NEXT: [1,4] . . . DeeE . . add w5, w6, w0
|
||||
# CHECK-NEXT: [1,5] . . . DeeE . . add w7, w9, w0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -7,12 +7,12 @@ v_add_f32 v2, v1, v0
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 3
|
||||
# CHECK-NEXT: Total Cycles: 13
|
||||
# CHECK-NEXT: Total Cycles: 12
|
||||
# CHECK-NEXT: Total uOps: 3
|
||||
|
||||
# CHECK: Dispatch Width: 1
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.23
|
||||
# CHECK-NEXT: IPC: 0.23
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.25
|
||||
# CHECK-NEXT: IPC: 0.25
|
||||
# CHECK-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -48,12 +48,12 @@ v_add_f32 v2, v1, v0
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 012
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeER . . v_add_f32_e32 v0, v0, v0
|
||||
# CHECK-NEXT: [0,1] .DeeeeER . . v_add_f32_e32 v1, v1, v1
|
||||
# CHECK-NEXT: [0,2] . .DeeeeER v_add_f32_e32 v2, v1, v0
|
||||
# CHECK: [0,0] DeeeeE .. v_add_f32_e32 v0, v0, v0
|
||||
# CHECK-NEXT: [0,1] .DeeeeE .. v_add_f32_e32 v1, v1, v1
|
||||
# CHECK-NEXT: [0,2] . .DeeeeE v_add_f32_e32 v2, v1, v0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -42,7 +42,7 @@ v_sqrt_f64 v[4:5], v[4:5]
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 27
|
||||
# CHECK-NEXT: Total Cycles: 205
|
||||
# CHECK-NEXT: Total Cycles: 204
|
||||
# CHECK-NEXT: Total uOps: 27
|
||||
|
||||
# CHECK: Dispatch Width: 1
|
||||
@ -134,19 +134,19 @@ v_sqrt_f64 v[4:5], v[4:5]
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 0
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
|
||||
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeER. . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
|
||||
# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
|
||||
# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
|
||||
# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
|
||||
# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
|
||||
# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
|
||||
# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
|
||||
# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_fract_f64_e32 v[4:5], v[4:5]
|
||||
# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_trunc_f64_e32 v[0:1], v[0:1]
|
||||
# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_ceil_f64_e32 v[2:3], v[2:3]
|
||||
# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeER. v_rndne_f64_e32 v[4:5], v[4:5]
|
||||
# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER v_floor_f64_e32 v[6:7], v[6:7]
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
|
||||
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
|
||||
# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
|
||||
# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
|
||||
# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
|
||||
# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
|
||||
# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
|
||||
# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
|
||||
# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_fract_f64_e32 v[4:5], v[4:5]
|
||||
# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_trunc_f64_e32 v[0:1], v[0:1]
|
||||
# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_ceil_f64_e32 v[2:3], v[2:3]
|
||||
# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . v_rndne_f64_e32 v[4:5], v[4:5]
|
||||
# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. v_floor_f64_e32 v[6:7], v[6:7]
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -9,12 +9,12 @@ vldr d0, [r1]
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 3
|
||||
# CHECK-NEXT: Total Cycles: 7
|
||||
# CHECK-NEXT: Total Cycles: 6
|
||||
# CHECK-NEXT: Total uOps: 3
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.43
|
||||
# CHECK-NEXT: IPC: 0.43
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.50
|
||||
# CHECK-NEXT: IPC: 0.50
|
||||
# CHECK-NEXT: Block RThroughput: 1.5
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -56,11 +56,11 @@ vldr d0, [r1]
|
||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: Index 0123456
|
||||
# CHECK-NEXT: Index 012345
|
||||
|
||||
# CHECK: [0,0] DER .. add.w r1, r1, #1
|
||||
# CHECK-NEXT: [0,1] .DER .. add.w r1, r1, #2
|
||||
# CHECK-NEXT: [0,2] . DeER vldr d0, [r1]
|
||||
# CHECK: [0,0] DE . add.w r1, r1, #1
|
||||
# CHECK-NEXT: [0,1] .DE . add.w r1, r1, #2
|
||||
# CHECK-NEXT: [0,2] . DeE vldr d0, [r1]
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -77,8 +77,10 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
|
||||
"Instruction cannot be ready if it hasn't been dispatched yet!");
|
||||
WTEntry.CyclesSpentInSQWhileReady +=
|
||||
TVEntry.CycleIssued - TVEntry.CycleReady;
|
||||
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
|
||||
(CurrentCycle - 1) - TVEntry.CycleExecuted;
|
||||
if (CurrentCycle > TVEntry.CycleExecuted) {
|
||||
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
|
||||
(CurrentCycle - 1) - TVEntry.CycleExecuted;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HWInstructionEvent::Ready:
|
||||
@ -243,7 +245,8 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
|
||||
|
||||
for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
|
||||
OS << TimelineView::DisplayChar::RetireLag;
|
||||
OS << TimelineView::DisplayChar::Retired;
|
||||
if (Entry.CycleExecuted < Entry.CycleRetired)
|
||||
OS << TimelineView::DisplayChar::Retired;
|
||||
|
||||
// Skip other columns.
|
||||
for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
|
||||
|
@ -278,7 +278,8 @@ static void processViewOptions(bool IsOutOfOrder) {
|
||||
processOptionImpl(PrintRegisterFileStats, Default);
|
||||
processOptionImpl(PrintDispatchStats, Default);
|
||||
processOptionImpl(PrintSchedulerStats, Default);
|
||||
processOptionImpl(PrintRetireStats, Default);
|
||||
if (IsOutOfOrder)
|
||||
processOptionImpl(PrintRetireStats, Default);
|
||||
}
|
||||
|
||||
// Returns true on success.
|
||||
|
Loading…
Reference in New Issue
Block a user