mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Remove SI_MASK_BRANCH
This is already deprecated, so remove code working on this. Also update the tests by using S_CBRANCH_EXECZ instead of SI_MASK_BRANCH. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D97545
This commit is contained in:
parent
4b251b39b2
commit
19ee89a560
@ -274,24 +274,9 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||
++I;
|
||||
}
|
||||
} else {
|
||||
// We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
|
||||
// We don't want these pseudo instructions encoded. They are
|
||||
// placeholder terminator instructions and should only be printed as
|
||||
// comments.
|
||||
if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
|
||||
if (isVerbose()) {
|
||||
SmallVector<char, 16> BBStr;
|
||||
raw_svector_ostream Str(BBStr);
|
||||
|
||||
const MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
|
||||
const MCSymbolRefExpr *Expr
|
||||
= MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
|
||||
Expr->print(Str, MAI);
|
||||
OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
|
||||
if (isVerbose())
|
||||
OutStreamer->emitRawComment(" return to shader part epilog");
|
||||
|
@ -7,9 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass inserts branches on the 0 exec mask over divergent branches
|
||||
/// branches when it's expected that jumping over the untaken control flow will
|
||||
/// be cheaper than having every workitem no-op through it.
|
||||
/// This pass mainly lowers early terminate pseudo instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -24,32 +22,21 @@ using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "si-insert-skips"
|
||||
|
||||
static cl::opt<unsigned> SkipThresholdFlag(
|
||||
"amdgpu-skip-threshold-legacy",
|
||||
cl::desc("Number of instructions before jumping over divergent control flow"),
|
||||
cl::init(12), cl::Hidden);
|
||||
|
||||
namespace {
|
||||
|
||||
class SIInsertSkips : public MachineFunctionPass {
|
||||
private:
|
||||
const SIRegisterInfo *TRI = nullptr;
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
unsigned SkipThreshold = 0;
|
||||
MachineDominatorTree *MDT = nullptr;
|
||||
|
||||
MachineBasicBlock *EarlyExitBlock = nullptr;
|
||||
bool EarlyExitClearsExec = false;
|
||||
|
||||
bool shouldSkip(const MachineBasicBlock &From,
|
||||
const MachineBasicBlock &To) const;
|
||||
|
||||
void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec);
|
||||
|
||||
void earlyTerm(MachineInstr &MI);
|
||||
|
||||
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
@ -87,53 +74,6 @@ static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
|
||||
if (MI.isMetaInstruction())
|
||||
return true;
|
||||
|
||||
// Handle target specific opcodes.
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::SI_MASK_BRANCH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
|
||||
const MachineBasicBlock &To) const {
|
||||
unsigned NumInstr = 0;
|
||||
const MachineFunction *MF = From.getParent();
|
||||
|
||||
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
|
||||
MBBI != End && MBBI != ToI; ++MBBI) {
|
||||
const MachineBasicBlock &MBB = *MBBI;
|
||||
|
||||
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
|
||||
NumInstr < SkipThreshold && I != E; ++I) {
|
||||
if (opcodeEmitsNoInsts(*I))
|
||||
continue;
|
||||
|
||||
// FIXME: Since this is required for correctness, this should be inserted
|
||||
// during SILowerControlFlow.
|
||||
|
||||
// When a uniform loop is inside non-uniform control flow, the branch
|
||||
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
|
||||
// when EXEC = 0. We should skip the loop lest it becomes infinite.
|
||||
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
|
||||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
|
||||
return true;
|
||||
|
||||
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
|
||||
return true;
|
||||
|
||||
// These instructions are potentially expensive even if EXEC = 0.
|
||||
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
|
||||
I->getOpcode() == AMDGPU::S_WAITCNT)
|
||||
return true;
|
||||
|
||||
++NumInstr;
|
||||
if (NumInstr >= SkipThreshold)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -209,29 +149,11 @@ void SIInsertSkips::earlyTerm(MachineInstr &MI) {
|
||||
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
|
||||
}
|
||||
|
||||
// Returns true if a branch over the block was inserted.
|
||||
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
|
||||
MachineBasicBlock &SrcMBB) {
|
||||
MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
|
||||
|
||||
if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
|
||||
return false;
|
||||
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
|
||||
|
||||
BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
|
||||
.addMBB(DestBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
MDT = &getAnalysis<MachineDominatorTree>();
|
||||
SkipThreshold = SkipThresholdFlag;
|
||||
|
||||
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||
@ -246,10 +168,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::SI_MASK_BRANCH:
|
||||
MadeChange |= skipMaskBranch(MI, MBB);
|
||||
break;
|
||||
|
||||
case AMDGPU::S_BRANCH:
|
||||
// Optimize out branches to the next block.
|
||||
// FIXME: Shouldn't this be handled by BranchFolding?
|
||||
|
@ -2369,10 +2369,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
|
||||
// Skip over the instructions that are artificially terminators for special
|
||||
// exec management.
|
||||
while (I != E && !I->isBranch() && !I->isReturn() &&
|
||||
I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
|
||||
while (I != E && !I->isBranch() && !I->isReturn()) {
|
||||
switch (I->getOpcode()) {
|
||||
case AMDGPU::SI_MASK_BRANCH:
|
||||
case AMDGPU::S_MOV_B64_term:
|
||||
case AMDGPU::S_XOR_B64_term:
|
||||
case AMDGPU::S_OR_B64_term:
|
||||
@ -2400,34 +2398,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
if (I == E)
|
||||
return false;
|
||||
|
||||
if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
|
||||
return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
|
||||
|
||||
++I;
|
||||
|
||||
// TODO: Should be able to treat as fallthrough?
|
||||
if (I == MBB.end())
|
||||
return true;
|
||||
|
||||
if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
|
||||
return true;
|
||||
|
||||
MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
|
||||
|
||||
// Specifically handle the case where the conditional branch is to the same
|
||||
// destination as the mask branch. e.g.
|
||||
//
|
||||
// si_mask_branch BB8
|
||||
// s_cbranch_execz BB8
|
||||
// s_cbranch BB9
|
||||
//
|
||||
// This is required to understand divergent loops which may need the branches
|
||||
// to be relaxed.
|
||||
if (TBB != MaskBrDest || Cond.empty())
|
||||
return true;
|
||||
|
||||
auto Pred = Cond[0].getImm();
|
||||
return (Pred != EXECZ && Pred != EXECNZ);
|
||||
return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
|
||||
@ -2438,11 +2409,6 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
|
||||
unsigned RemovedSize = 0;
|
||||
while (I != MBB.end()) {
|
||||
MachineBasicBlock::iterator Next = std::next(I);
|
||||
if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
|
||||
I = Next;
|
||||
continue;
|
||||
}
|
||||
|
||||
RemovedSize += getInstSizeInBytes(*I);
|
||||
I->eraseFromParent();
|
||||
++Count;
|
||||
|
@ -274,19 +274,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
|
||||
// SI pseudo instructions. These are used by the CFG structurizer pass
|
||||
// and should be lowered to ISA instructions prior to codegen.
|
||||
|
||||
// Dummy terminator instruction to use after control flow instructions
|
||||
// replaced with exec mask operations.
|
||||
def SI_MASK_BRANCH : VPseudoInstSI <
|
||||
(outs), (ins brtarget:$target)> {
|
||||
let isBranch = 0;
|
||||
let isTerminator = 1;
|
||||
let isBarrier = 0;
|
||||
let SchedRW = [];
|
||||
let hasNoSchedulingInfo = 1;
|
||||
let FixedSize = 1;
|
||||
let Size = 0;
|
||||
}
|
||||
|
||||
let isTerminator = 1 in {
|
||||
|
||||
let OtherPredicates = [EnableLateCFGStructurize] in {
|
||||
|
@ -39,7 +39,7 @@ body: |
|
||||
%20:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %21
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
@ -73,7 +73,7 @@ body: |
|
||||
%29:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %30
|
||||
SI_MASK_BRANCH %bb.10, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.10, implicit $exec
|
||||
S_BRANCH %bb.8
|
||||
|
||||
bb.8:
|
||||
|
@ -90,7 +90,7 @@ body: |
|
||||
%44:sreg_64 = S_AND_B64 %43, %30, implicit-def dead $scc
|
||||
%45:sreg_64 = S_XOR_B64 %44, %43, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %44
|
||||
SI_MASK_BRANCH %bb.9, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.9, implicit $exec
|
||||
S_BRANCH %bb.8
|
||||
|
||||
bb.5:
|
||||
@ -122,7 +122,7 @@ body: |
|
||||
%67:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%68:sreg_64 = S_AND_B64 %67, %61, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %68
|
||||
SI_MASK_BRANCH %bb.13, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.13, implicit $exec
|
||||
S_BRANCH %bb.7
|
||||
|
||||
bb.7:
|
||||
@ -198,7 +198,7 @@ body: |
|
||||
%90:sreg_64 = S_AND_B64 %89, %87, implicit-def dead $scc
|
||||
%46:sreg_64 = S_XOR_B64 %90, %89, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %90
|
||||
SI_MASK_BRANCH %bb.5, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.5, implicit $exec
|
||||
S_BRANCH %bb.15
|
||||
|
||||
bb.13:
|
||||
@ -211,7 +211,7 @@ body: |
|
||||
%95:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%96:sreg_64 = S_AND_B64 %95, %93, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %96
|
||||
SI_MASK_BRANCH %bb.16, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.16, implicit $exec
|
||||
S_BRANCH %bb.14
|
||||
|
||||
bb.14:
|
||||
|
@ -83,7 +83,7 @@ body: |
|
||||
%23:sreg_64 = S_AND_B64 %22, %18, implicit-def dead $scc
|
||||
%24:sreg_64 = S_XOR_B64 %23, %22, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %23
|
||||
SI_MASK_BRANCH %bb.7, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.7, implicit $exec
|
||||
S_BRANCH %bb.18
|
||||
|
||||
bb.7:
|
||||
|
@ -74,7 +74,7 @@ body: |
|
||||
%23:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %24
|
||||
SI_MASK_BRANCH %bb.7, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.7, implicit $exec
|
||||
S_BRANCH %bb.5
|
||||
|
||||
bb.5:
|
||||
@ -153,7 +153,7 @@ body: |
|
||||
%50:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed %51
|
||||
SI_MASK_BRANCH %bb.16, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.16, implicit $exec
|
||||
S_BRANCH %bb.15
|
||||
|
||||
bb.15:
|
||||
|
@ -30,7 +30,7 @@ body: |
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
|
||||
; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
|
||||
; GCN: SI_MASK_BRANCH %bb.3, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.2:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
@ -77,7 +77,7 @@ body: |
|
||||
%3:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %4
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
@ -96,7 +96,7 @@ body: |
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %13
|
||||
SI_MASK_BRANCH %bb.3, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
@ -8,7 +8,6 @@ body: |
|
||||
; CHECK-LABEL: name: skip_execz_flat
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
@ -18,7 +17,7 @@ body: |
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
@ -36,7 +35,6 @@ body: |
|
||||
; CHECK-LABEL: name: skip_execz_mubuf
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
@ -46,7 +44,7 @@ body: |
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
@ -64,7 +62,6 @@ body: |
|
||||
; CHECK-LABEL: name: skip_execz_ds
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
@ -74,7 +71,7 @@ body: |
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
|
||||
|
||||
---
|
||||
@ -9,7 +9,6 @@ body: |
|
||||
; CHECK-LABEL: name: skip_gws_init
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
@ -19,7 +18,7 @@ body: |
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
@ -37,7 +36,6 @@ body: |
|
||||
; CHECK-LABEL: name: skip_gws_barrier
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
@ -47,7 +45,7 @@ body: |
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
@ -1,44 +1,18 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
# CHECK-LABEL: name: no_count_mask_branch_pseudo
|
||||
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NOT: S_CBRANCH_EXECZ
|
||||
name: no_count_mask_branch_pseudo
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr1 = V_MOV_B32_e32 7, implicit $exec
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
SI_MASK_BRANCH %bb.3, implicit $exec
|
||||
|
||||
bb.2:
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
|
||||
bb.3:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
# FIXME: RemoveShortExecBranches should not count dbg_value instructions.
|
||||
# CHECK-LABEL: name: no_count_dbg_value
|
||||
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NOT: S_CBRANCH_EXECZ
|
||||
name: no_count_dbg_value
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
$vgpr1 = V_MOV_B32_e32 7, implicit $exec
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
@ -9,40 +9,29 @@ name: loop_header_nopred
|
||||
body: |
|
||||
; GCN-LABEL: name: loop_header_nopred
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: S_INST_PREFETCH 1
|
||||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.6 (align 64):
|
||||
; GCN: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
|
||||
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
|
||||
; GCN: S_BRANCH %bb.4
|
||||
; GCN: bb.2 (align 64):
|
||||
; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
|
||||
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
|
||||
; GCN: S_BRANCH %bb.4
|
||||
; GCN: bb.4:
|
||||
; GCN: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1 (align 64):
|
||||
; GCN: successors: %bb.7(0x04000000), %bb.2(0x7c000000)
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
|
||||
; GCN: bb.2:
|
||||
; GCN: successors: %bb.5(0x40000000), %bb.1(0x40000000)
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.5:
|
||||
; GCN: successors: %bb.6(0x80000000)
|
||||
; GCN: S_BRANCH %bb.6
|
||||
; GCN: successors: %bb.1(0x04000000), %bb.5(0x7c000000)
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_NOP 0
|
||||
; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
|
||||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.7:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
@ -60,7 +49,7 @@ body: |
|
||||
bb.3:
|
||||
successors: %bb.4(0x40000000), %bb.6(0x40000000)
|
||||
|
||||
SI_MASK_BRANCH %bb.6, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.6, implicit $exec
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.4:
|
||||
|
@ -86,7 +86,7 @@ body: |
|
||||
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc
|
||||
SI_MASK_BRANCH %bb.2.exit, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2.exit, implicit $exec
|
||||
|
||||
bb.1.atomic:
|
||||
successors: %bb.2.exit(0x80000000)
|
||||
|
@ -22,7 +22,7 @@ body: |
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY1]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
|
||||
; GCN: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
@ -32,7 +32,7 @@ body: |
|
||||
; GCN: $exec = S_AND_B64 $exec, [[COPY]], implicit-def dead $scc
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc
|
||||
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec
|
||||
; GCN: S_BRANCH %bb.3
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x40000000), %bb.5(0x40000000)
|
||||
@ -40,7 +40,7 @@ body: |
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_2]]
|
||||
; GCN: SI_MASK_BRANCH %bb.5, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
|
||||
; GCN: S_BRANCH %bb.4
|
||||
; GCN: bb.4:
|
||||
; GCN: successors: %bb.5(0x80000000)
|
||||
@ -60,7 +60,7 @@ body: |
|
||||
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
|
||||
%5:sreg_64 = S_XOR_B64 %4, %3, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %4
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
@ -72,7 +72,7 @@ body: |
|
||||
$exec = S_AND_B64 $exec, %0, implicit-def dead $scc
|
||||
%7:sreg_64 = S_AND_B64 $exec, %6, implicit-def $scc
|
||||
$exec = S_XOR_B64_term $exec, %7, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.6, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.6, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
@ -82,7 +82,7 @@ body: |
|
||||
%9:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%10:sreg_64 = S_AND_B64 %9, %8, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %10
|
||||
SI_MASK_BRANCH %bb.5, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.5, implicit $exec
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.4:
|
||||
|
@ -128,7 +128,7 @@
|
||||
# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
|
||||
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
|
||||
name: optimize_if_and_saveexec_xor
|
||||
liveins:
|
||||
@ -143,7 +143,7 @@ body: |
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -166,7 +166,7 @@ body: |
|
||||
---
|
||||
# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
|
||||
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
|
||||
name: optimize_if_and_saveexec
|
||||
liveins:
|
||||
@ -180,7 +180,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -203,7 +203,7 @@ body: |
|
||||
---
|
||||
# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
|
||||
# CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
|
||||
name: optimize_if_or_saveexec
|
||||
liveins:
|
||||
@ -217,7 +217,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -243,7 +243,7 @@ body: |
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
name: optimize_if_and_saveexec_xor_valu_middle
|
||||
liveins:
|
||||
- { reg: '$vgpr0' }
|
||||
@ -258,7 +258,7 @@ body: |
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -283,7 +283,7 @@ body: |
|
||||
# CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
|
||||
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
name: optimize_if_and_saveexec_xor_wrong_reg
|
||||
liveins:
|
||||
- { reg: '$vgpr0' }
|
||||
@ -299,7 +299,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term $sgpr0_sgpr1
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -322,7 +322,7 @@ body: |
|
||||
# CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
name: optimize_if_and_saveexec_xor_modify_copy_to_exec
|
||||
liveins:
|
||||
@ -338,7 +338,7 @@ body: |
|
||||
$sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -365,7 +365,7 @@ body: |
|
||||
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
name: optimize_if_and_saveexec_xor_live_out_setexec
|
||||
liveins:
|
||||
- { reg: '$vgpr0' }
|
||||
@ -379,7 +379,7 @@ body: |
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -404,7 +404,7 @@ body: |
|
||||
# CHECK: $sgpr0_sgpr1 = COPY $exec
|
||||
# CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
name: optimize_if_unknown_saveexec
|
||||
liveins:
|
||||
@ -418,7 +418,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -441,7 +441,7 @@ body: |
|
||||
---
|
||||
# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
|
||||
# CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ
|
||||
|
||||
name: optimize_if_andn2_saveexec
|
||||
liveins:
|
||||
@ -455,7 +455,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -479,7 +479,7 @@ body: |
|
||||
# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
|
||||
# CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
name: optimize_if_andn2_saveexec_no_commute
|
||||
liveins:
|
||||
- { reg: '$vgpr0' }
|
||||
@ -492,7 +492,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
@ -531,7 +531,7 @@ body: |
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1.if:
|
||||
|
@ -15,7 +15,7 @@ body: |
|
||||
$vgpr4 = V_AND_B32_e32 1, $vgpr1, implicit $exec
|
||||
V_CMP_EQ_U32_e32 1, killed $vgpr4, implicit-def $vcc, implicit $exec
|
||||
$sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: skip_branch_taildup_endpgm
|
||||
@ -21,14 +21,12 @@ body: |
|
||||
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.3
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.4
|
||||
; CHECK: bb.2:
|
||||
@ -42,7 +40,6 @@ body: |
|
||||
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.4:
|
||||
; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
|
||||
@ -67,7 +64,7 @@ body: |
|
||||
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.2:
|
||||
@ -76,7 +73,7 @@ body: |
|
||||
|
||||
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.4:
|
||||
@ -95,7 +92,7 @@ body: |
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
@ -120,14 +117,12 @@ body: |
|
||||
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.3
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.4
|
||||
; CHECK: bb.2:
|
||||
@ -139,7 +134,6 @@ body: |
|
||||
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
|
||||
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
|
||||
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.4:
|
||||
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
|
||||
@ -154,7 +148,7 @@ body: |
|
||||
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.2:
|
||||
@ -163,7 +157,7 @@ body: |
|
||||
|
||||
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.4:
|
||||
@ -180,7 +174,7 @@ body: |
|
||||
renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
|
||||
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.4, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
|
@ -49,7 +49,7 @@ body: |
|
||||
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.3, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
@ -57,7 +57,7 @@ body: |
|
||||
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
SI_MASK_BRANCH %bb.3, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.3:
|
||||
|
@ -203,7 +203,7 @@ body: |
|
||||
%47:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term %48
|
||||
SI_MASK_BRANCH %bb.18, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.18, implicit $exec
|
||||
S_BRANCH %bb.16
|
||||
|
||||
bb.16:
|
||||
|
@ -52,7 +52,7 @@ body: |
|
||||
SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
@ -117,7 +117,7 @@ body: |
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
|
||||
SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
|
Loading…
x
Reference in New Issue
Block a user