mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[MCA][LSUnit] Track loads and stores until retirement.
Before this patch, loads and stores were only tracked by their corresponding queues in the LSUnit from dispatch until execute stage. In practice we should be more conservative and assume that memory opcodes leave their queues at retirement stage. Basically, loads should leave the load queue only when they have completed and delivered their data. We conservatively assume that a load is completed when it is retired. Stores should be tracked by the store queue from dispatch until retirement. In practice, stores can only leave the store queue if their data can be written to the data cache. This is mostly a mechanical change. With this patch, the retire stage notifies the LSUnit when a memory instruction is retired. That would triggers the release of LDQ/STQ entries. The only visible change is in memory tests for the bdver2 model. That is because bdver2 is the only model that defines the load/store queue size. This patch partially addresses PR39830. Differential Revision: https://reviews.llvm.org/D68266 llvm-svn: 374034
This commit is contained in:
parent
1e58a1b47b
commit
13160fb6a6
@ -291,9 +291,14 @@ public:
|
|||||||
return NextGroupID++;
|
return NextGroupID++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Instruction executed event handlers.
|
|
||||||
virtual void onInstructionExecuted(const InstRef &IR);
|
virtual void onInstructionExecuted(const InstRef &IR);
|
||||||
|
|
||||||
|
// Loads are tracked by the LDQ (load queue) from dispatch until completion.
|
||||||
|
// Stores are tracked by the STQ (store queue) from dispatch until commitment.
|
||||||
|
// By default we conservatively assume that the LDQ receives a load at
|
||||||
|
// dispatch. Loads leave the LDQ at retirement stage.
|
||||||
|
virtual void onInstructionRetired(const InstRef &IR);
|
||||||
|
|
||||||
virtual void onInstructionIssued(const InstRef &IR) {
|
virtual void onInstructionIssued(const InstRef &IR) {
|
||||||
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
|
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
|
||||||
Groups[GroupID]->onInstructionIssued(IR);
|
Groups[GroupID]->onInstructionIssued(IR);
|
||||||
@ -438,9 +443,6 @@ public:
|
|||||||
/// 6. A store has to wait until an older store barrier is fully executed.
|
/// 6. A store has to wait until an older store barrier is fully executed.
|
||||||
unsigned dispatch(const InstRef &IR) override;
|
unsigned dispatch(const InstRef &IR) override;
|
||||||
|
|
||||||
// FIXME: For simplicity, we optimistically assume a similar behavior for
|
|
||||||
// store instructions. In practice, store operations don't tend to leave the
|
|
||||||
// store queue until they reach the 'Retired' stage (See PR39830).
|
|
||||||
void onInstructionExecuted(const InstRef &IR) override;
|
void onInstructionExecuted(const InstRef &IR) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#ifndef LLVM_MCA_RETIRE_STAGE_H
|
#ifndef LLVM_MCA_RETIRE_STAGE_H
|
||||||
#define LLVM_MCA_RETIRE_STAGE_H
|
#define LLVM_MCA_RETIRE_STAGE_H
|
||||||
|
|
||||||
|
#include "llvm/MCA/HardwareUnits/LSUnit.h"
|
||||||
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
|
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
|
||||||
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
|
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
|
||||||
#include "llvm/MCA/Stages/Stage.h"
|
#include "llvm/MCA/Stages/Stage.h"
|
||||||
@ -27,13 +28,14 @@ class RetireStage final : public Stage {
|
|||||||
// Owner will go away when we move listeners/eventing to the stages.
|
// Owner will go away when we move listeners/eventing to the stages.
|
||||||
RetireControlUnit &RCU;
|
RetireControlUnit &RCU;
|
||||||
RegisterFile &PRF;
|
RegisterFile &PRF;
|
||||||
|
LSUnitBase &LSU;
|
||||||
|
|
||||||
RetireStage(const RetireStage &Other) = delete;
|
RetireStage(const RetireStage &Other) = delete;
|
||||||
RetireStage &operator=(const RetireStage &Other) = delete;
|
RetireStage &operator=(const RetireStage &Other) = delete;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RetireStage(RetireControlUnit &R, RegisterFile &F)
|
RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
|
||||||
: Stage(), RCU(R), PRF(F) {}
|
: Stage(), RCU(R), PRF(F), LSU(LS) {}
|
||||||
|
|
||||||
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
|
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
|
||||||
Error cycleStart() override;
|
Error cycleStart() override;
|
||||||
|
@ -44,7 +44,7 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
|
|||||||
*RCU, *PRF);
|
*RCU, *PRF);
|
||||||
auto Execute =
|
auto Execute =
|
||||||
std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
|
std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
|
||||||
auto Retire = std::make_unique<RetireStage>(*RCU, *PRF);
|
auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
|
||||||
|
|
||||||
// Pass the ownership of all the hardware units to this Context.
|
// Pass the ownership of all the hardware units to this Context.
|
||||||
addHardwareUnit(std::move(RCU));
|
addHardwareUnit(std::move(RCU));
|
||||||
|
@ -160,18 +160,20 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
|
void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
|
||||||
|
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
|
||||||
|
auto It = Groups.find(GroupID);
|
||||||
|
assert(It != Groups.end() && "Instruction not dispatched to the LS unit");
|
||||||
|
It->second->onInstructionExecuted();
|
||||||
|
if (It->second->isExecuted())
|
||||||
|
Groups.erase(It);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LSUnitBase::onInstructionRetired(const InstRef &IR) {
|
||||||
const InstrDesc &Desc = IR.getInstruction()->getDesc();
|
const InstrDesc &Desc = IR.getInstruction()->getDesc();
|
||||||
bool IsALoad = Desc.MayLoad;
|
bool IsALoad = Desc.MayLoad;
|
||||||
bool IsAStore = Desc.MayStore;
|
bool IsAStore = Desc.MayStore;
|
||||||
assert((IsALoad || IsAStore) && "Expected a memory operation!");
|
assert((IsALoad || IsAStore) && "Expected a memory operation!");
|
||||||
|
|
||||||
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
|
|
||||||
auto It = Groups.find(GroupID);
|
|
||||||
It->second->onInstructionExecuted();
|
|
||||||
if (It->second->isExecuted()) {
|
|
||||||
Groups.erase(It);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IsALoad) {
|
if (IsALoad) {
|
||||||
releaseLQSlot();
|
releaseLQSlot();
|
||||||
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
|
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
|
||||||
|
@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
|
|||||||
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
|
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
|
||||||
const Instruction &Inst = *IR.getInstruction();
|
const Instruction &Inst = *IR.getInstruction();
|
||||||
|
|
||||||
|
// Release the load/store queue entries.
|
||||||
|
if (Inst.isMemOp())
|
||||||
|
LSU.onInstructionRetired(IR);
|
||||||
|
|
||||||
for (const WriteState &WS : Inst.getDefs())
|
for (const WriteState &WS : Inst.getDefs())
|
||||||
PRF.removeRegisterWrite(WS, FreedRegs);
|
PRF.removeRegisterWrite(WS, FreedRegs);
|
||||||
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
|
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
|
||||||
|
@ -507,12 +507,12 @@ movaps %xmm3, (%rbx)
|
|||||||
|
|
||||||
# CHECK: Iterations: 100
|
# CHECK: Iterations: 100
|
||||||
# CHECK-NEXT: Instructions: 400
|
# CHECK-NEXT: Instructions: 400
|
||||||
# CHECK-NEXT: Total Cycles: 593
|
# CHECK-NEXT: Total Cycles: 554
|
||||||
# CHECK-NEXT: Total uOps: 400
|
# CHECK-NEXT: Total uOps: 400
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 4
|
# CHECK: Dispatch Width: 4
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.67
|
# CHECK-NEXT: uOps Per Cycle: 0.72
|
||||||
# CHECK-NEXT: IPC: 0.67
|
# CHECK-NEXT: IPC: 0.72
|
||||||
# CHECK-NEXT: Block RThroughput: 4.0
|
# CHECK-NEXT: Block RThroughput: 4.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
@ -532,24 +532,24 @@ movaps %xmm3, (%rbx)
|
|||||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 187 (31.5%)
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%)
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 342 (57.7%)
|
# CHECK-NEXT: SQ - Store queue full: 437 (78.9%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 403 (68.0%)
|
# CHECK-NEXT: 0, 365 (65.9%)
|
||||||
# CHECK-NEXT: 1, 90 (15.2%)
|
# CHECK-NEXT: 1, 88 (15.9%)
|
||||||
# CHECK-NEXT: 2, 2 (0.3%)
|
# CHECK-NEXT: 2, 3 (0.5%)
|
||||||
# CHECK-NEXT: 3, 86 (14.5%)
|
# CHECK-NEXT: 3, 86 (15.5%)
|
||||||
# CHECK-NEXT: 4, 12 (2.0%)
|
# CHECK-NEXT: 4, 12 (2.2%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
# CHECK-NEXT: [# issued], [# cycles]
|
# CHECK-NEXT: [# issued], [# cycles]
|
||||||
# CHECK-NEXT: 0, 292 (49.2%)
|
# CHECK-NEXT: 0, 253 (45.7%)
|
||||||
# CHECK-NEXT: 1, 202 (34.1%)
|
# CHECK-NEXT: 1, 202 (36.5%)
|
||||||
# CHECK-NEXT: 2, 99 (16.7%)
|
# CHECK-NEXT: 2, 99 (17.9%)
|
||||||
|
|
||||||
# CHECK: Scheduler's queue usage:
|
# CHECK: Scheduler's queue usage:
|
||||||
# CHECK-NEXT: [1] Resource name.
|
# CHECK-NEXT: [1] Resource name.
|
||||||
@ -595,8 +595,8 @@ movaps %xmm3, (%rbx)
|
|||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
|
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
|
||||||
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
|
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
|
||||||
# CHECK-NEXT: 0.36 2.64 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
|
# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
|
||||||
# CHECK-NEXT: 2.64 0.36 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
|
# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
|
||||||
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
|
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
|
@ -80,7 +80,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
|
# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -102,9 +102,9 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 32 36 40
|
# CHECK-NEXT: PdEX 31 34 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 36 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
@ -193,7 +193,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
|
# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -215,9 +215,9 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 32 36 40
|
# CHECK-NEXT: PdEX 31 34 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 36 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
@ -306,7 +306,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
|
# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -328,9 +328,9 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 32 36 40
|
# CHECK-NEXT: PdEX 31 34 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 36 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
@ -419,7 +419,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
|
# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -441,9 +441,9 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 32 36 40
|
# CHECK-NEXT: PdEX 31 34 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 36 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
@ -532,7 +532,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 532 (87.9%)
|
# CHECK-NEXT: LQ - Load queue full: 533 (88.1%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -554,8 +554,8 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 34 38 40
|
# CHECK-NEXT: PdEX 33 36 40
|
||||||
# CHECK-NEXT: PdFPU 34 38 64
|
# CHECK-NEXT: PdFPU 33 36 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 37 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
@ -646,7 +646,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 532 (87.9%)
|
# CHECK-NEXT: LQ - Load queue full: 533 (88.1%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -668,8 +668,8 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 34 38 40
|
# CHECK-NEXT: PdEX 33 36 40
|
||||||
# CHECK-NEXT: PdFPU 34 38 64
|
# CHECK-NEXT: PdFPU 33 36 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 37 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
@ -760,7 +760,7 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 344 (56.9%)
|
# CHECK-NEXT: LQ - Load queue full: 345 (57.0%)
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
@ -781,9 +781,9 @@ vmovaps (%rbx), %ymm3
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 33 38 40
|
# CHECK-NEXT: PdEX 33 36 40
|
||||||
# CHECK-NEXT: PdFPU 33 38 64
|
# CHECK-NEXT: PdFPU 33 36 64
|
||||||
# CHECK-NEXT: PdLoad 37 40 40
|
# CHECK-NEXT: PdLoad 36 40 40
|
||||||
# CHECK-NEXT: PdStore 0 0 24
|
# CHECK-NEXT: PdStore 0 0 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
|
@ -81,14 +81,13 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
|
# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 25 (6.2%)
|
# CHECK-NEXT: 0, 24 (6.0%)
|
||||||
# CHECK-NEXT: 1, 370 (91.8%)
|
# CHECK-NEXT: 1, 372 (92.3%)
|
||||||
# CHECK-NEXT: 2, 1 (0.2%)
|
|
||||||
# CHECK-NEXT: 4, 7 (1.7%)
|
# CHECK-NEXT: 4, 7 (1.7%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
@ -103,10 +102,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 23 40
|
# CHECK-NEXT: PdEX 21 22 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 23 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -195,14 +194,13 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
|
# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 25 (6.2%)
|
# CHECK-NEXT: 0, 24 (6.0%)
|
||||||
# CHECK-NEXT: 1, 370 (91.8%)
|
# CHECK-NEXT: 1, 372 (92.3%)
|
||||||
# CHECK-NEXT: 2, 1 (0.2%)
|
|
||||||
# CHECK-NEXT: 4, 7 (1.7%)
|
# CHECK-NEXT: 4, 7 (1.7%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
@ -217,10 +215,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 23 40
|
# CHECK-NEXT: PdEX 21 22 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 23 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -309,14 +307,13 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
|
# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 25 (6.2%)
|
# CHECK-NEXT: 0, 24 (6.0%)
|
||||||
# CHECK-NEXT: 1, 370 (91.8%)
|
# CHECK-NEXT: 1, 372 (92.3%)
|
||||||
# CHECK-NEXT: 2, 1 (0.2%)
|
|
||||||
# CHECK-NEXT: 4, 7 (1.7%)
|
# CHECK-NEXT: 4, 7 (1.7%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
@ -331,10 +328,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 23 40
|
# CHECK-NEXT: PdEX 21 22 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 23 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -423,14 +420,13 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
|
# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 25 (6.2%)
|
# CHECK-NEXT: 0, 24 (6.0%)
|
||||||
# CHECK-NEXT: 1, 370 (91.8%)
|
# CHECK-NEXT: 1, 372 (92.3%)
|
||||||
# CHECK-NEXT: 2, 1 (0.2%)
|
|
||||||
# CHECK-NEXT: 4, 7 (1.7%)
|
# CHECK-NEXT: 4, 7 (1.7%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
@ -445,10 +441,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 23 40
|
# CHECK-NEXT: PdEX 21 22 40
|
||||||
# CHECK-NEXT: PdFPU 0 0 64
|
# CHECK-NEXT: PdFPU 0 0 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 23 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -537,7 +533,7 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 747 (93.0%)
|
# CHECK-NEXT: SQ - Store queue full: 748 (93.2%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
@ -559,10 +555,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 23 40
|
# CHECK-NEXT: PdEX 21 23 40
|
||||||
# CHECK-NEXT: PdFPU 22 23 64
|
# CHECK-NEXT: PdFPU 21 23 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 24 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -650,16 +646,17 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 185 (30.7%)
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 372 (61.8%)
|
# CHECK-NEXT: SQ - Store queue full: 559 (92.9%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 223 (37.0%)
|
# CHECK-NEXT: 0, 222 (36.9%)
|
||||||
# CHECK-NEXT: 1, 372 (61.8%)
|
# CHECK-NEXT: 1, 373 (62.0%)
|
||||||
# CHECK-NEXT: 4, 7 (1.2%)
|
# CHECK-NEXT: 3, 1 (0.2%)
|
||||||
|
# CHECK-NEXT: 4, 6 (1.0%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
# CHECK-NEXT: [# issued], [# cycles]
|
# CHECK-NEXT: [# issued], [# cycles]
|
||||||
@ -673,10 +670,10 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK-NEXT: [4] Total number of buffer entries.
|
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4]
|
# CHECK: [1] [2] [3] [4]
|
||||||
# CHECK-NEXT: PdEX 22 24 40
|
# CHECK-NEXT: PdEX 21 23 40
|
||||||
# CHECK-NEXT: PdFPU 22 24 64
|
# CHECK-NEXT: PdFPU 21 23 64
|
||||||
# CHECK-NEXT: PdLoad 0 0 40
|
# CHECK-NEXT: PdLoad 0 0 40
|
||||||
# CHECK-NEXT: PdStore 23 24 24
|
# CHECK-NEXT: PdStore 22 24 24
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||||
@ -763,9 +760,9 @@ vmovaps %ymm3, (%rbx)
|
|||||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 5963 (83.2%)
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 5777 (80.6%)
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 374 (5.2%)
|
# CHECK-NEXT: SQ - Store queue full: 561 (7.8%)
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
|
Loading…
Reference in New Issue
Block a user