mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Fix shortfalls in WQM marking
When tracking defined lanes through phi nodes in the live range graph each branch of the phi must be handled independently. Also rewrite the marking algorithm to reduce unnecessary operations. Previously a shared set of defined lanes was used which caused marking to stop prematurely. This was observable in existing lit tests, but test patterns did not cover this detail. Reviewed By: piotr Differential Revision: https://reviews.llvm.org/D98614
This commit is contained in:
parent
f127ae93b8
commit
aa27a4d8bb
@ -318,38 +318,63 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
|
||||
LLVM_DEBUG(dbgs() << "markDefs " << PrintState(Flag) << ": " << UseMI);
|
||||
|
||||
LiveQueryResult UseLRQ = LR.Query(LIS->getInstructionIndex(UseMI));
|
||||
if (!UseLRQ.valueIn())
|
||||
const VNInfo *Value = UseLRQ.valueIn();
|
||||
if (!Value)
|
||||
return;
|
||||
|
||||
// Note: this code assumes that lane masks on AMDGPU completely
|
||||
// cover registers.
|
||||
LaneBitmask DefinedLanes;
|
||||
LaneBitmask UseLanes;
|
||||
if (SubReg) {
|
||||
UseLanes = TRI->getSubRegIndexLaneMask(SubReg);
|
||||
} else if (Reg.isVirtual()) {
|
||||
UseLanes = MRI->getMaxLaneMaskForVReg(Reg);
|
||||
}
|
||||
const LaneBitmask UseLanes =
|
||||
SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
|
||||
: (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)
|
||||
: LaneBitmask::getNone());
|
||||
|
||||
SmallPtrSet<const VNInfo *, 4> Visited;
|
||||
SmallVector<const VNInfo *, 4> ToProcess;
|
||||
ToProcess.push_back(UseLRQ.valueIn());
|
||||
// Perform a depth-first iteration of the LiveRange graph marking defs.
|
||||
// Stop processing of a given branch when all use lanes have been defined.
|
||||
// The first definition stops processing for a physical register.
|
||||
struct PhiEntry {
|
||||
const VNInfo *Phi;
|
||||
unsigned PredIdx;
|
||||
unsigned VisitIdx;
|
||||
LaneBitmask DefinedLanes;
|
||||
|
||||
PhiEntry(const VNInfo *Phi, unsigned PredIdx, unsigned VisitIdx,
|
||||
LaneBitmask DefinedLanes)
|
||||
: Phi(Phi), PredIdx(PredIdx), VisitIdx(VisitIdx),
|
||||
DefinedLanes(DefinedLanes) {}
|
||||
};
|
||||
SmallSetVector<const VNInfo *, 4> Visited;
|
||||
SmallVector<PhiEntry, 2> PhiStack;
|
||||
LaneBitmask DefinedLanes;
|
||||
unsigned NextPredIdx; // Only used for processing phi nodes
|
||||
do {
|
||||
const VNInfo *Value = ToProcess.pop_back_val();
|
||||
Visited.insert(Value);
|
||||
const VNInfo *NextValue = nullptr;
|
||||
|
||||
if (!Visited.count(Value)) {
|
||||
Visited.insert(Value);
|
||||
// On first visit to a phi then start processing first predecessor
|
||||
NextPredIdx = 0;
|
||||
}
|
||||
|
||||
if (Value->isPHIDef()) {
|
||||
// Need to mark all defs used in the PHI node
|
||||
// Each predecessor node in the phi must be processed as a subgraph
|
||||
const MachineBasicBlock *MBB = LIS->getMBBFromIndex(Value->def);
|
||||
assert(MBB && "Phi-def has no defining MBB");
|
||||
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
|
||||
PE = MBB->pred_end();
|
||||
PI != PE; ++PI) {
|
||||
|
||||
// Find next predecessor to process
|
||||
unsigned Idx = NextPredIdx;
|
||||
auto PI = MBB->pred_begin() + Idx;
|
||||
auto PE = MBB->pred_end();
|
||||
for (; PI != PE && !NextValue; ++PI, ++Idx) {
|
||||
if (const VNInfo *VN = LR.getVNInfoBefore(LIS->getMBBEndIdx(*PI))) {
|
||||
if (!Visited.count(VN))
|
||||
ToProcess.push_back(VN);
|
||||
NextValue = VN;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are more predecessors to process; add phi to stack
|
||||
if (PI != PE)
|
||||
PhiStack.emplace_back(Value, Idx, Visited.size(), DefinedLanes);
|
||||
} else {
|
||||
MachineInstr *MI = LIS->getInstructionFromIndex(Value->def);
|
||||
assert(MI && "Def has no defining instruction");
|
||||
@ -370,17 +395,20 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
|
||||
// Record if this instruction defined any of use
|
||||
HasDef |= Overlap.any();
|
||||
|
||||
// Check if all lanes of use have been defined
|
||||
// Mark any lanes defined
|
||||
DefinedLanes |= OpLanes;
|
||||
if ((DefinedLanes & UseLanes) != UseLanes) {
|
||||
// Definition not complete; need to process input value
|
||||
LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
|
||||
if (const VNInfo *VN = LRQ.valueIn()) {
|
||||
if (!Visited.count(VN))
|
||||
ToProcess.push_back(VN);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all lanes of use have been defined
|
||||
if ((DefinedLanes & UseLanes) != UseLanes) {
|
||||
// Definition not complete; need to process input value
|
||||
LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
|
||||
if (const VNInfo *VN = LRQ.valueIn()) {
|
||||
if (!Visited.count(VN))
|
||||
NextValue = VN;
|
||||
}
|
||||
}
|
||||
|
||||
// Only mark the instruction if it defines some part of the use
|
||||
if (HasDef)
|
||||
markInstruction(*MI, Flag, Worklist);
|
||||
@ -389,9 +417,21 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
|
||||
markInstruction(*MI, Flag, Worklist);
|
||||
}
|
||||
}
|
||||
} while (!ToProcess.empty());
|
||||
|
||||
assert(!Reg.isVirtual() || ((DefinedLanes & UseLanes) == UseLanes));
|
||||
if (!NextValue && !PhiStack.empty()) {
|
||||
// Reach end of chain; revert to processing last phi
|
||||
PhiEntry &Entry = PhiStack.back();
|
||||
NextValue = Entry.Phi;
|
||||
NextPredIdx = Entry.PredIdx;
|
||||
DefinedLanes = Entry.DefinedLanes;
|
||||
// Rewind visited set to correct state
|
||||
while (Visited.size() > Entry.VisitIdx)
|
||||
Visited.pop_back();
|
||||
PhiStack.pop_back();
|
||||
}
|
||||
|
||||
Value = NextValue;
|
||||
} while (Value);
|
||||
}
|
||||
|
||||
void SIWholeQuadMode::markOperand(const MachineInstr &MI,
|
||||
|
@ -859,6 +859,10 @@ main_body:
|
||||
; CHECK-NEXT: ; %entry
|
||||
; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
; CHECK: v_mov
|
||||
; CHECK: v_mov
|
||||
; CHECK: v_mov
|
||||
; CHECK: v_mov
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK: image_store
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
|
@ -259,3 +259,42 @@ body: |
|
||||
$vgpr1 = STRICT_WWM %3.sub1:vreg_64, implicit $exec
|
||||
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
||||
...
|
||||
|
||||
---
|
||||
# Check that WQM marking occurs correctly through phi nodes in live range graph.
|
||||
# If not then initial V_MOV will not be in WQM.
|
||||
#
|
||||
#CHECK-LABEL: name: test_wqm_lr_phi
|
||||
#CHECK: COPY $exec
|
||||
#CHECK-NEXT: S_WQM
|
||||
#CHECK-NEXT: V_MOV_B32_e32 -10
|
||||
#CHECK-NEXT: V_MOV_B32_e32 0
|
||||
name: test_wqm_lr_phi
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
|
||||
%0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:sreg_64 = S_GETPC_B64
|
||||
%2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0, 0
|
||||
|
||||
bb.1:
|
||||
$vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.3, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
%0.sub0:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
%0.sub1:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.4:
|
||||
%3:sgpr_128 = IMPLICIT_DEF
|
||||
%4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
$vgpr0 = COPY %4.sub0:vreg_128
|
||||
$vgpr1 = COPY %4.sub1:vreg_128
|
||||
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user