1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Fix WMM Entry SCC preservation

SCC was not correctly preserved when entering WWM.
Current lit test was unable to detect this as entry block is
handled differently.
Additionally fix an issue where SCC was unnecessarily preserved
when exiting from WWM to Exact mode.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D95500
This commit is contained in:
Carl Ritson 2021-01-27 12:48:19 +09:00
parent a395dfa5bd
commit bc3428f564
2 changed files with 53 additions and 2 deletions

View File

@ -840,9 +840,26 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
First = FirstWQM;
}
// Whether we need to save SCC depends on start and end states
bool SaveSCC = false;
switch (State) {
case StateExact:
case StateWWM:
// Exact/WWM -> WWM: save SCC
// Exact/WWM -> WQM: save SCC if WQM mask is generated from exec
// Exact/WWM -> Exact: no save
SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec);
break;
case StateWQM:
// WQM -> Exact/WMM: save SCC
SaveSCC = !(Needs & StateWQM);
break;
default:
llvm_unreachable("Unknown state");
break;
}
MachineBasicBlock::iterator Before =
prepareInsertion(MBB, First, II, Needs == StateWQM,
Needs == StateExact || WQMFromExec);
prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC);
if (State == StateWWM) {
assert(SavedNonWWMReg);

View File

@ -49,6 +49,40 @@ body: |
...
---
# Second test for awareness that s_or_saveexec_b64 clobbers SCC
# Because entry block is treated differently.
#
#CHECK: %bb.1
#CHECK: S_CMP_LT_I32
#CHECK: COPY $scc
#CHECK: ENTER_WWM
#CHECK: $scc = COPY
#CHECK: S_CSELECT_B32
name: test_wwm_scc2
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
%3:vgpr_32 = COPY $vgpr0
%2:sgpr_32 = COPY $sgpr2
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = IMPLICIT_DEF
bb.1:
S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
%10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
%5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
%11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
$vgpr0 = WWM %11:vgpr_32, implicit $exec
$vgpr1 = COPY %10:vgpr_32
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
...
---
# V_SET_INACTIVE, when its second operand is undef, is replaced by a
# COPY by si-wqm. Ensure the instruction is removed.