mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[MCA] Ensure that writes occur in-order
Delay the issue of a new instruction if that leads to out-of-order commits of writes. This patch fixes the problem described in: https://bugs.llvm.org/show_bug.cgi?id=41796#c3 Differential Revision: https://reviews.llvm.org/D98604
This commit is contained in:
parent
8d2c2f42b8
commit
f475941ee5
@ -50,6 +50,11 @@ class InOrderIssueStage final : public Stage {
|
|||||||
/// Number of instructions that can be issued in the current cycle.
|
/// Number of instructions that can be issued in the current cycle.
|
||||||
unsigned Bandwidth;
|
unsigned Bandwidth;
|
||||||
|
|
||||||
|
/// Number of cycles (counted from the current cycle) until the last write is
|
||||||
|
/// committed. This is taken into account to ensure that writes commit in the
|
||||||
|
/// program order.
|
||||||
|
unsigned LastWriteBackCycle;
|
||||||
|
|
||||||
InOrderIssueStage(const InOrderIssueStage &Other) = delete;
|
InOrderIssueStage(const InOrderIssueStage &Other) = delete;
|
||||||
InOrderIssueStage &operator=(const InOrderIssueStage &Other) = delete;
|
InOrderIssueStage &operator=(const InOrderIssueStage &Other) = delete;
|
||||||
|
|
||||||
@ -69,7 +74,7 @@ public:
|
|||||||
const MCSchedModel &SM, const MCSubtargetInfo &STI)
|
const MCSchedModel &SM, const MCSubtargetInfo &STI)
|
||||||
: SM(SM), STI(STI), RCU(RCU), PRF(PRF),
|
: SM(SM), STI(STI), RCU(RCU), PRF(PRF),
|
||||||
RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
|
RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
|
||||||
StallCyclesLeft(0), Bandwidth(0) {}
|
StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
|
||||||
|
|
||||||
bool isAvailable(const InstRef &) const override;
|
bool isAvailable(const InstRef &) const override;
|
||||||
bool hasWorkToComplete() const override;
|
bool hasWorkToComplete() const override;
|
||||||
|
@ -57,6 +57,32 @@ static bool hasResourceHazard(const ResourceManager &RM, const InstRef &IR) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned findLastWriteBackCycle(const InstRef &IR) {
|
||||||
|
unsigned LastWBCycle = 0;
|
||||||
|
for (const WriteState &WS : IR.getInstruction()->getDefs()) {
|
||||||
|
int CyclesLeft = WS.getCyclesLeft();
|
||||||
|
if (CyclesLeft == UNKNOWN_CYCLES)
|
||||||
|
CyclesLeft = WS.getLatency();
|
||||||
|
if (CyclesLeft < 0)
|
||||||
|
CyclesLeft = 0;
|
||||||
|
LastWBCycle = std::max(LastWBCycle, (unsigned)CyclesLeft);
|
||||||
|
}
|
||||||
|
return LastWBCycle;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned findFirstWriteBackCycle(const InstRef &IR) {
|
||||||
|
unsigned FirstWBCycle = ~0U;
|
||||||
|
for (const WriteState &WS : IR.getInstruction()->getDefs()) {
|
||||||
|
int CyclesLeft = WS.getCyclesLeft();
|
||||||
|
if (CyclesLeft == UNKNOWN_CYCLES)
|
||||||
|
CyclesLeft = WS.getLatency();
|
||||||
|
if (CyclesLeft < 0)
|
||||||
|
CyclesLeft = 0;
|
||||||
|
FirstWBCycle = std::min(FirstWBCycle, (unsigned)CyclesLeft);
|
||||||
|
}
|
||||||
|
return FirstWBCycle;
|
||||||
|
}
|
||||||
|
|
||||||
/// Return a number of cycles left until register requirements of the
|
/// Return a number of cycles left until register requirements of the
|
||||||
/// instructions are met.
|
/// instructions are met.
|
||||||
static unsigned checkRegisterHazard(const RegisterFile &PRF,
|
static unsigned checkRegisterHazard(const RegisterFile &PRF,
|
||||||
@ -116,6 +142,14 @@ bool InOrderIssueStage::canExecute(const InstRef &IR,
|
|||||||
HWStallEvent(HWStallEvent::DispatchGroupStall, IR));
|
HWStallEvent(HWStallEvent::DispatchGroupStall, IR));
|
||||||
notifyEvent<HWPressureEvent>(
|
notifyEvent<HWPressureEvent>(
|
||||||
HWPressureEvent(HWPressureEvent::RESOURCES, IR));
|
HWPressureEvent(HWPressureEvent::RESOURCES, IR));
|
||||||
|
} else if (LastWriteBackCycle) {
|
||||||
|
if (!IR.getInstruction()->getDesc().RetireOOO) {
|
||||||
|
unsigned NextWriteBackCycle = findFirstWriteBackCycle(IR);
|
||||||
|
// Delay the instruction to ensure that writes occur in program order
|
||||||
|
if (NextWriteBackCycle < LastWriteBackCycle) {
|
||||||
|
*StallCycles = LastWriteBackCycle - NextWriteBackCycle;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return *StallCycles == 0;
|
return *StallCycles == 0;
|
||||||
@ -213,6 +247,9 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
|
|||||||
IssuedInst.push_back(IR);
|
IssuedInst.push_back(IR);
|
||||||
++NumIssued;
|
++NumIssued;
|
||||||
|
|
||||||
|
if (!IR.getInstruction()->getDesc().RetireOOO)
|
||||||
|
LastWriteBackCycle = findLastWriteBackCycle(IR);
|
||||||
|
|
||||||
return llvm::ErrorSuccess();
|
return llvm::ErrorSuccess();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,6 +322,10 @@ llvm::Error InOrderIssueStage::cycleStart() {
|
|||||||
llvm::Error InOrderIssueStage::cycleEnd() {
|
llvm::Error InOrderIssueStage::cycleEnd() {
|
||||||
if (StallCyclesLeft > 0)
|
if (StallCyclesLeft > 0)
|
||||||
--StallCyclesLeft;
|
--StallCyclesLeft;
|
||||||
|
|
||||||
|
if (LastWriteBackCycle > 0)
|
||||||
|
--LastWriteBackCycle;
|
||||||
|
|
||||||
return llvm::ErrorSuccess();
|
return llvm::ErrorSuccess();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
|
|||||||
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
||||||
|
|
||||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||||
# CHECK-NEXT: RAT - Register unavailable: 10 (47.6%)
|
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
|
@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
|
|||||||
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
|
||||||
|
|
||||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||||
# CHECK-NEXT: RAT - Register unavailable: 10 (47.6%)
|
# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
|
||||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
@ -106,13 +106,13 @@ str w0, [x21, x18, lsl #2]
|
|||||||
# CHECK: [0,0] DeeER. . . . ldr w4, [x2], #4
|
# CHECK: [0,0] DeeER. . . . ldr w4, [x2], #4
|
||||||
# CHECK-NEXT: [0,1] .DeeER . . . ldr w5, [x3]
|
# CHECK-NEXT: [0,1] .DeeER . . . ldr w5, [x3]
|
||||||
# CHECK-NEXT: [0,2] . DeeeER. . . madd w0, w5, w4, w0
|
# CHECK-NEXT: [0,2] . DeeeER. . . madd w0, w5, w4, w0
|
||||||
# CHECK-NEXT: [0,3] . DeeE-R. . . add x3, x3, x13
|
# CHECK-NEXT: [0,3] . DeeER. . . add x3, x3, x13
|
||||||
# CHECK-NEXT: [0,4] . DeeER. . . subs x1, x1, #1
|
# CHECK-NEXT: [0,4] . DeeER. . . subs x1, x1, #1
|
||||||
# CHECK-NEXT: [0,5] . . DeeeER . . str w0, [x21, x18, lsl #2]
|
# CHECK-NEXT: [0,5] . . DeeeER . . str w0, [x21, x18, lsl #2]
|
||||||
# CHECK-NEXT: [1,0] . . DeeER . . ldr w4, [x2], #4
|
# CHECK-NEXT: [1,0] . . DeeER . . ldr w4, [x2], #4
|
||||||
# CHECK-NEXT: [1,1] . . DeeER . . ldr w5, [x3]
|
# CHECK-NEXT: [1,1] . . DeeER . . ldr w5, [x3]
|
||||||
# CHECK-NEXT: [1,2] . . . DeeeER . madd w0, w5, w4, w0
|
# CHECK-NEXT: [1,2] . . . DeeeER . madd w0, w5, w4, w0
|
||||||
# CHECK-NEXT: [1,3] . . . DeeE-R . add x3, x3, x13
|
# CHECK-NEXT: [1,3] . . . DeeER . add x3, x3, x13
|
||||||
# CHECK-NEXT: [1,4] . . . DeeER . subs x1, x1, #1
|
# CHECK-NEXT: [1,4] . . . DeeER . subs x1, x1, #1
|
||||||
# CHECK-NEXT: [1,5] . . . DeeeER str w0, [x21, x18, lsl #2]
|
# CHECK-NEXT: [1,5] . . . DeeeER str w0, [x21, x18, lsl #2]
|
||||||
|
|
||||||
@ -126,7 +126,7 @@ str w0, [x21, x18, lsl #2]
|
|||||||
# CHECK-NEXT: 0. 2 0.0 0.0 0.0 ldr w4, [x2], #4
|
# CHECK-NEXT: 0. 2 0.0 0.0 0.0 ldr w4, [x2], #4
|
||||||
# CHECK-NEXT: 1. 2 0.0 0.0 0.0 ldr w5, [x3]
|
# CHECK-NEXT: 1. 2 0.0 0.0 0.0 ldr w5, [x3]
|
||||||
# CHECK-NEXT: 2. 2 0.0 0.0 0.0 madd w0, w5, w4, w0
|
# CHECK-NEXT: 2. 2 0.0 0.0 0.0 madd w0, w5, w4, w0
|
||||||
# CHECK-NEXT: 3. 2 0.0 0.0 1.0 add x3, x3, x13
|
# CHECK-NEXT: 3. 2 0.0 0.0 0.0 add x3, x3, x13
|
||||||
# CHECK-NEXT: 4. 2 0.0 0.0 0.0 subs x1, x1, #1
|
# CHECK-NEXT: 4. 2 0.0 0.0 0.0 subs x1, x1, #1
|
||||||
# CHECK-NEXT: 5. 2 0.0 0.0 0.0 str w0, [x21, x18, lsl #2]
|
# CHECK-NEXT: 5. 2 0.0 0.0 0.0 str w0, [x21, x18, lsl #2]
|
||||||
# CHECK-NEXT: 2 0.0 0.0 0.2 <total>
|
# CHECK-NEXT: 2 0.0 0.0 0.0 <total>
|
||||||
|
@ -10,12 +10,12 @@ add w7, w9, w0
|
|||||||
|
|
||||||
# CHECK: Iterations: 2
|
# CHECK: Iterations: 2
|
||||||
# CHECK-NEXT: Instructions: 12
|
# CHECK-NEXT: Instructions: 12
|
||||||
# CHECK-NEXT: Total Cycles: 18
|
# CHECK-NEXT: Total Cycles: 20
|
||||||
# CHECK-NEXT: Total uOps: 12
|
# CHECK-NEXT: Total uOps: 12
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 2
|
# CHECK: Dispatch Width: 2
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.67
|
# CHECK-NEXT: uOps Per Cycle: 0.60
|
||||||
# CHECK-NEXT: IPC: 0.67
|
# CHECK-NEXT: IPC: 0.60
|
||||||
# CHECK-NEXT: Block RThroughput: 8.0
|
# CHECK-NEXT: Block RThroughput: 8.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
@ -40,33 +40,37 @@ add w7, w9, w0
|
|||||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||||
# CHECK-NEXT: LQ - Load queue full: 0
|
# CHECK-NEXT: LQ - Load queue full: 0
|
||||||
# CHECK-NEXT: SQ - Store queue full: 0
|
# CHECK-NEXT: SQ - Store queue full: 0
|
||||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 5 (27.8%)
|
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.0%)
|
||||||
|
|
||||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||||
# CHECK-NEXT: 0, 12 (66.7%)
|
# CHECK-NEXT: 0, 12 (60.0%)
|
||||||
# CHECK-NEXT: 2, 6 (33.3%)
|
# CHECK-NEXT: 1, 4 (20.0%)
|
||||||
|
# CHECK-NEXT: 2, 4 (20.0%)
|
||||||
|
|
||||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||||
# CHECK-NEXT: [# issued], [# cycles]
|
# CHECK-NEXT: [# issued], [# cycles]
|
||||||
# CHECK-NEXT: 0, 12 (66.7%)
|
# CHECK-NEXT: 0, 12 (60.0%)
|
||||||
# CHECK-NEXT: 2, 6 (33.3%)
|
# CHECK-NEXT: 1, 4 (20.0%)
|
||||||
|
# CHECK-NEXT: 2, 4 (20.0%)
|
||||||
|
|
||||||
# CHECK: Scheduler's queue usage:
|
# CHECK: Scheduler's queue usage:
|
||||||
# CHECK-NEXT: No scheduler resources used.
|
# CHECK-NEXT: No scheduler resources used.
|
||||||
|
|
||||||
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||||
# CHECK-NEXT: [# retired], [# cycles]
|
# CHECK-NEXT: [# retired], [# cycles]
|
||||||
# CHECK-NEXT: 0, 16 (88.9%)
|
# CHECK-NEXT: 0, 14 (70.0%)
|
||||||
# CHECK-NEXT: 6, 2 (11.1%)
|
# CHECK-NEXT: 1, 2 (10.0%)
|
||||||
|
# CHECK-NEXT: 2, 2 (10.0%)
|
||||||
|
# CHECK-NEXT: 3, 2 (10.0%)
|
||||||
|
|
||||||
# CHECK: Total ROB Entries: 64
|
# CHECK: Total ROB Entries: 64
|
||||||
# CHECK-NEXT: Max Used ROB Entries: 8 ( 12.5% )
|
# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
|
||||||
# CHECK-NEXT: Average Used ROB Entries per cy: 5 ( 7.8% )
|
# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
|
||||||
|
|
||||||
# CHECK: Register File statistics:
|
# CHECK: Register File statistics:
|
||||||
# CHECK-NEXT: Total number of mappings created: 12
|
# CHECK-NEXT: Total number of mappings created: 12
|
||||||
# CHECK-NEXT: Max number of mappings used: 8
|
# CHECK-NEXT: Max number of mappings used: 7
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0.0] - CortexA55UnitALU
|
# CHECK-NEXT: [0.0] - CortexA55UnitALU
|
||||||
@ -96,21 +100,21 @@ add w7, w9, w0
|
|||||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0
|
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
# CHECK-NEXT: 01234567
|
# CHECK-NEXT: 0123456789
|
||||||
# CHECK-NEXT: Index 0123456789
|
# CHECK-NEXT: Index 0123456789
|
||||||
|
|
||||||
# CHECK: [0,0] DeeeeeeeER. . . sdiv w12, w21, w0
|
# CHECK: [0,0] DeeeeeeeER. . . sdiv w12, w21, w0
|
||||||
# CHECK-NEXT: [0,1] DeeE-----R. . . add w8, w8, #1
|
# CHECK-NEXT: [0,1] . DeeER. . . add w8, w8, #1
|
||||||
# CHECK-NEXT: [0,2] .DeeE----R. . . add w1, w2, w0
|
# CHECK-NEXT: [0,2] . DeeER. . . add w1, w2, w0
|
||||||
# CHECK-NEXT: [0,3] .DeeE----R. . . add w3, w4, #1
|
# CHECK-NEXT: [0,3] . .DeeER . . add w3, w4, #1
|
||||||
# CHECK-NEXT: [0,4] . DeeE---R. . . add w5, w6, w0
|
# CHECK-NEXT: [0,4] . .DeeER . . add w5, w6, w0
|
||||||
# CHECK-NEXT: [0,5] . DeeE---R. . . add w7, w9, w0
|
# CHECK-NEXT: [0,5] . . DeeER . . add w7, w9, w0
|
||||||
# CHECK-NEXT: [1,0] . . DeeeeeeeER sdiv w12, w21, w0
|
# CHECK-NEXT: [1,0] . . DeeeeeeeER . sdiv w12, w21, w0
|
||||||
# CHECK-NEXT: [1,1] . . DeeE-----R add w8, w8, #1
|
# CHECK-NEXT: [1,1] . . . DeeER . add w8, w8, #1
|
||||||
# CHECK-NEXT: [1,2] . . DeeE----R add w1, w2, w0
|
# CHECK-NEXT: [1,2] . . . DeeER . add w1, w2, w0
|
||||||
# CHECK-NEXT: [1,3] . . DeeE----R add w3, w4, #1
|
# CHECK-NEXT: [1,3] . . . DeeER. add w3, w4, #1
|
||||||
# CHECK-NEXT: [1,4] . . DeeE---R add w5, w6, w0
|
# CHECK-NEXT: [1,4] . . . DeeER. add w5, w6, w0
|
||||||
# CHECK-NEXT: [1,5] . . DeeE---R add w7, w9, w0
|
# CHECK-NEXT: [1,5] . . . DeeER add w7, w9, w0
|
||||||
|
|
||||||
# CHECK: Average Wait times (based on the timeline view):
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
# CHECK-NEXT: [0]: Executions
|
# CHECK-NEXT: [0]: Executions
|
||||||
@ -120,9 +124,9 @@ add w7, w9, w0
|
|||||||
|
|
||||||
# CHECK: [0] [1] [2] [3]
|
# CHECK: [0] [1] [2] [3]
|
||||||
# CHECK-NEXT: 0. 2 0.0 0.0 0.0 sdiv w12, w21, w0
|
# CHECK-NEXT: 0. 2 0.0 0.0 0.0 sdiv w12, w21, w0
|
||||||
# CHECK-NEXT: 1. 2 0.0 0.0 5.0 add w8, w8, #1
|
# CHECK-NEXT: 1. 2 0.0 0.0 0.0 add w8, w8, #1
|
||||||
# CHECK-NEXT: 2. 2 0.0 0.0 4.0 add w1, w2, w0
|
# CHECK-NEXT: 2. 2 0.0 0.0 0.0 add w1, w2, w0
|
||||||
# CHECK-NEXT: 3. 2 0.0 0.0 4.0 add w3, w4, #1
|
# CHECK-NEXT: 3. 2 0.0 0.0 0.0 add w3, w4, #1
|
||||||
# CHECK-NEXT: 4. 2 0.0 0.0 3.0 add w5, w6, w0
|
# CHECK-NEXT: 4. 2 0.0 0.0 0.0 add w5, w6, w0
|
||||||
# CHECK-NEXT: 5. 2 0.0 0.0 3.0 add w7, w9, w0
|
# CHECK-NEXT: 5. 2 0.0 0.0 0.0 add w7, w9, w0
|
||||||
# CHECK-NEXT: 2 0.0 0.0 3.2 <total>
|
# CHECK-NEXT: 2 0.0 0.0 0.0 <total>
|
||||||
|
Loading…
Reference in New Issue
Block a user