1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Remove SI_MASK_BRANCH

This is already deprecated, so remove code working on this.
Also update the tests by using S_CBRANCH_EXECZ instead of SI_MASK_BRANCH.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D97545
This commit is contained in:
Ruiling Song 2021-02-25 09:19:37 +08:00
parent 4b251b39b2
commit 19ee89a560
21 changed files with 92 additions and 284 deletions

View File

@ -274,24 +274,9 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
++I;
}
} else {
// We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
// We don't want these pseudo instructions encoded. They are
// placeholder terminator instructions and should only be printed as
// comments.
if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
if (isVerbose()) {
SmallVector<char, 16> BBStr;
raw_svector_ostream Str(BBStr);
const MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
const MCSymbolRefExpr *Expr
= MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
Expr->print(Str, MAI);
OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr);
}
return;
}
if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
if (isVerbose())
OutStreamer->emitRawComment(" return to shader part epilog");

View File

@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass inserts branches on the 0 exec mask over divergent branches
/// branches when it's expected that jumping over the untaken control flow will
/// be cheaper than having every workitem no-op through it.
/// This pass mainly lowers early terminate pseudo instructions.
//
//===----------------------------------------------------------------------===//
@ -24,32 +22,21 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
"amdgpu-skip-threshold-legacy",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
namespace {
class SIInsertSkips : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
unsigned SkipThreshold = 0;
MachineDominatorTree *MDT = nullptr;
MachineBasicBlock *EarlyExitBlock = nullptr;
bool EarlyExitClearsExec = false;
bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec);
void earlyTerm(MachineInstr &MI);
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
public:
static char ID;
@ -87,53 +74,6 @@ static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
if (MI.isMetaInstruction())
return true;
// Handle target specific opcodes.
switch (MI.getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
return true;
default:
return false;
}
}
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
unsigned NumInstr = 0;
const MachineFunction *MF = From.getParent();
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
MBBI != End && MBBI != ToI; ++MBBI) {
const MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
NumInstr < SkipThreshold && I != E; ++I) {
if (opcodeEmitsNoInsts(*I))
continue;
// FIXME: Since this is required for correctness, this should be inserted
// during SILowerControlFlow.
// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
// when EXEC = 0. We should skip the loop lest it becomes infinite.
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
I->getOpcode() == AMDGPU::S_WAITCNT)
return true;
++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
}
}
return false;
}
@ -209,29 +149,11 @@ void SIInsertSkips::earlyTerm(MachineInstr &MI) {
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
}
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
return false;
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.addMBB(DestBB);
return true;
}
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MDT = &getAnalysis<MachineDominatorTree>();
SkipThreshold = SkipThresholdFlag;
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@ -246,10 +168,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
MadeChange |= skipMaskBranch(MI, MBB);
break;
case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
// FIXME: Shouldn't this be handled by BranchFolding?

View File

@ -2369,10 +2369,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Skip over the instructions that are artificially terminators for special
// exec management.
while (I != E && !I->isBranch() && !I->isReturn() &&
I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
while (I != E && !I->isBranch() && !I->isReturn()) {
switch (I->getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
case AMDGPU::S_OR_B64_term:
@ -2400,34 +2398,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
if (I == E)
return false;
if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
++I;
// TODO: Should be able to treat as fallthrough?
if (I == MBB.end())
return true;
if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
return true;
MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
// Specifically handle the case where the conditional branch is to the same
// destination as the mask branch. e.g.
//
// si_mask_branch BB8
// s_cbranch_execz BB8
// s_cbranch BB9
//
// This is required to understand divergent loops which may need the branches
// to be relaxed.
if (TBB != MaskBrDest || Cond.empty())
return true;
auto Pred = Cond[0].getImm();
return (Pred != EXECZ && Pred != EXECNZ);
return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
}
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
@ -2438,11 +2409,6 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
unsigned RemovedSize = 0;
while (I != MBB.end()) {
MachineBasicBlock::iterator Next = std::next(I);
if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
I = Next;
continue;
}
RemovedSize += getInstSizeInBytes(*I);
I->eraseFromParent();
++Count;

View File

@ -274,19 +274,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
// Dummy terminator instruction to use after control flow instructions
// replaced with exec mask operations.
def SI_MASK_BRANCH : VPseudoInstSI <
(outs), (ins brtarget:$target)> {
let isBranch = 0;
let isTerminator = 1;
let isBarrier = 0;
let SchedRW = [];
let hasNoSchedulingInfo = 1;
let FixedSize = 1;
let Size = 0;
}
let isTerminator = 1 in {
let OtherPredicates = [EnableLateCFGStructurize] in {

View File

@ -39,7 +39,7 @@ body: |
%20:sreg_64 = COPY $exec, implicit-def $exec
%21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc
$exec = S_MOV_B64_term %21
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
@ -73,7 +73,7 @@ body: |
%29:sreg_64 = COPY $exec, implicit-def $exec
%30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc
$exec = S_MOV_B64_term %30
SI_MASK_BRANCH %bb.10, implicit $exec
S_CBRANCH_EXECZ %bb.10, implicit $exec
S_BRANCH %bb.8
bb.8:

View File

@ -90,7 +90,7 @@ body: |
%44:sreg_64 = S_AND_B64 %43, %30, implicit-def dead $scc
%45:sreg_64 = S_XOR_B64 %44, %43, implicit-def dead $scc
$exec = S_MOV_B64_term killed %44
SI_MASK_BRANCH %bb.9, implicit $exec
S_CBRANCH_EXECZ %bb.9, implicit $exec
S_BRANCH %bb.8
bb.5:
@ -122,7 +122,7 @@ body: |
%67:sreg_64 = COPY $exec, implicit-def $exec
%68:sreg_64 = S_AND_B64 %67, %61, implicit-def dead $scc
$exec = S_MOV_B64_term killed %68
SI_MASK_BRANCH %bb.13, implicit $exec
S_CBRANCH_EXECZ %bb.13, implicit $exec
S_BRANCH %bb.7
bb.7:
@ -198,7 +198,7 @@ body: |
%90:sreg_64 = S_AND_B64 %89, %87, implicit-def dead $scc
%46:sreg_64 = S_XOR_B64 %90, %89, implicit-def dead $scc
$exec = S_MOV_B64_term killed %90
SI_MASK_BRANCH %bb.5, implicit $exec
S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.15
bb.13:
@ -211,7 +211,7 @@ body: |
%95:sreg_64 = COPY $exec, implicit-def $exec
%96:sreg_64 = S_AND_B64 %95, %93, implicit-def dead $scc
$exec = S_MOV_B64_term killed %96
SI_MASK_BRANCH %bb.16, implicit $exec
S_CBRANCH_EXECZ %bb.16, implicit $exec
S_BRANCH %bb.14
bb.14:

View File

@ -83,7 +83,7 @@ body: |
%23:sreg_64 = S_AND_B64 %22, %18, implicit-def dead $scc
%24:sreg_64 = S_XOR_B64 %23, %22, implicit-def dead $scc
$exec = S_MOV_B64_term killed %23
SI_MASK_BRANCH %bb.7, implicit $exec
S_CBRANCH_EXECZ %bb.7, implicit $exec
S_BRANCH %bb.18
bb.7:

View File

@ -74,7 +74,7 @@ body: |
%23:sreg_64 = COPY $exec, implicit-def $exec
%24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc
$exec = S_MOV_B64_term killed %24
SI_MASK_BRANCH %bb.7, implicit $exec
S_CBRANCH_EXECZ %bb.7, implicit $exec
S_BRANCH %bb.5
bb.5:
@ -153,7 +153,7 @@ body: |
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
$exec = S_MOV_B64_term killed %51
SI_MASK_BRANCH %bb.16, implicit $exec
S_CBRANCH_EXECZ %bb.16, implicit $exec
S_BRANCH %bb.15
bb.15:

View File

@ -30,7 +30,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@ -47,7 +47,7 @@ body: |
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
; GCN: SI_MASK_BRANCH %bb.3, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.2:
; GCN: successors: %bb.3(0x80000000)
@ -77,7 +77,7 @@ body: |
%3:sreg_64 = COPY $exec, implicit-def $exec
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
$exec = S_MOV_B64_term %4
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.1
bb.1:
@ -96,7 +96,7 @@ body: |
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
$exec = S_MOV_B64_term %13
SI_MASK_BRANCH %bb.3, implicit $exec
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.2:

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
---
@ -8,7 +8,6 @@ body: |
; CHECK-LABEL: name: skip_execz_flat
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@ -18,7 +17,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@ -36,7 +35,6 @@ body: |
; CHECK-LABEL: name: skip_execz_mubuf
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@ -46,7 +44,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@ -64,7 +62,6 @@ body: |
; CHECK-LABEL: name: skip_execz_ds
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@ -74,7 +71,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
---
@ -9,7 +9,6 @@ body: |
; CHECK-LABEL: name: skip_gws_init
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@ -19,7 +18,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@ -37,7 +36,6 @@ body: |
; CHECK-LABEL: name: skip_gws_barrier
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@ -47,7 +45,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2

View File

@ -1,44 +1,18 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
---
# CHECK-LABEL: name: no_count_mask_branch_pseudo
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NOT: S_CBRANCH_EXECZ
name: no_count_mask_branch_pseudo
body: |
bb.0:
successors: %bb.1
$vgpr1 = V_MOV_B32_e32 7, implicit $exec
SI_MASK_BRANCH %bb.2, implicit $exec
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
SI_MASK_BRANCH %bb.3, implicit $exec
bb.2:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
bb.3:
S_ENDPGM 0
...
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
---
# FIXME: RemoveShortExecBranches should not count dbg_value instructions.
# CHECK-LABEL: name: no_count_dbg_value
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NOT: S_CBRANCH_EXECZ
name: no_count_dbg_value
body: |
bb.0:
successors: %bb.1
successors: %bb.1, %bb.2
$vgpr1 = V_MOV_B32_e32 7, implicit $exec
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2

View File

@ -9,40 +9,29 @@ name: loop_header_nopred
body: |
; GCN-LABEL: name: loop_header_nopred
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: S_INST_PREFETCH 1
; GCN: S_BRANCH %bb.1
; GCN: bb.6 (align 64):
; GCN: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GCN: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; GCN: bb.3:
; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
; GCN: S_BRANCH %bb.4
; GCN: bb.2 (align 64):
; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
; GCN: S_BRANCH %bb.4
; GCN: bb.4:
; GCN: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
; GCN: successors: %bb.2(0x80000000)
; GCN: S_BRANCH %bb.2
; GCN: bb.1 (align 64):
; GCN: successors: %bb.7(0x04000000), %bb.2(0x7c000000)
; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
; GCN: bb.2:
; GCN: successors: %bb.5(0x40000000), %bb.1(0x40000000)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.5:
; GCN: successors: %bb.6(0x80000000)
; GCN: S_BRANCH %bb.6
; GCN: successors: %bb.1(0x04000000), %bb.5(0x7c000000)
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.7:
; GCN: S_ENDPGM 0
bb.0:
@ -60,7 +49,7 @@ body: |
bb.3:
successors: %bb.4(0x40000000), %bb.6(0x40000000)
SI_MASK_BRANCH %bb.6, implicit $exec
S_CBRANCH_EXECZ %bb.6, implicit $exec
S_BRANCH %bb.4
bb.4:

View File

@ -86,7 +86,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
$sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc
SI_MASK_BRANCH %bb.2.exit, implicit $exec
S_CBRANCH_EXECZ %bb.2.exit, implicit $exec
bb.1.atomic:
successors: %bb.2.exit(0x80000000)

View File

@ -22,7 +22,7 @@ body: |
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY1]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
; GCN: SI_MASK_BRANCH %bb.2, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
@ -32,7 +32,7 @@ body: |
; GCN: $exec = S_AND_B64 $exec, [[COPY]], implicit-def dead $scc
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc
; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec
; GCN: S_BRANCH %bb.3
; GCN: bb.3:
; GCN: successors: %bb.4(0x40000000), %bb.5(0x40000000)
@ -40,7 +40,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_2]]
; GCN: SI_MASK_BRANCH %bb.5, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
; GCN: S_BRANCH %bb.4
; GCN: bb.4:
; GCN: successors: %bb.5(0x80000000)
@ -60,7 +60,7 @@ body: |
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
%5:sreg_64 = S_XOR_B64 %4, %3, implicit-def dead $scc
$exec = S_MOV_B64_term %4
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
@ -72,7 +72,7 @@ body: |
$exec = S_AND_B64 $exec, %0, implicit-def dead $scc
%7:sreg_64 = S_AND_B64 $exec, %6, implicit-def $scc
$exec = S_XOR_B64_term $exec, %7, implicit-def $scc
SI_MASK_BRANCH %bb.6, implicit $exec
S_CBRANCH_EXECZ %bb.6, implicit $exec
S_BRANCH %bb.3
bb.3:
@ -82,7 +82,7 @@ body: |
%9:sreg_64 = COPY $exec, implicit-def $exec
%10:sreg_64 = S_AND_B64 %9, %8, implicit-def dead $scc
$exec = S_MOV_B64_term %10
SI_MASK_BRANCH %bb.5, implicit $exec
S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.4
bb.4:

View File

@ -128,7 +128,7 @@
# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor
liveins:
@ -143,7 +143,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -166,7 +166,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec
liveins:
@ -180,7 +180,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -203,7 +203,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_or_saveexec
liveins:
@ -217,7 +217,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -243,7 +243,7 @@ body: |
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor_valu_middle
liveins:
- { reg: '$vgpr0' }
@ -258,7 +258,7 @@ body: |
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -283,7 +283,7 @@ body: |
# CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_and_saveexec_xor_wrong_reg
liveins:
- { reg: '$vgpr0' }
@ -299,7 +299,7 @@ body: |
$sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term $sgpr0_sgpr1
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -322,7 +322,7 @@ body: |
# CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_and_saveexec_xor_modify_copy_to_exec
liveins:
@ -338,7 +338,7 @@ body: |
$sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -365,7 +365,7 @@ body: |
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor_live_out_setexec
liveins:
- { reg: '$vgpr0' }
@ -379,7 +379,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -404,7 +404,7 @@ body: |
# CHECK: $sgpr0_sgpr1 = COPY $exec
# CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_unknown_saveexec
liveins:
@ -418,7 +418,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -441,7 +441,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_andn2_saveexec
liveins:
@ -455,7 +455,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -479,7 +479,7 @@ body: |
# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
# CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_andn2_saveexec_no_commute
liveins:
- { reg: '$vgpr0' }
@ -492,7 +492,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@ -531,7 +531,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:

View File

@ -15,7 +15,7 @@ body: |
$vgpr4 = V_AND_B32_e32 1, $vgpr1, implicit $exec
V_CMP_EQ_U32_e32 1, killed $vgpr4, implicit-def $vcc, implicit $exec
$sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
---
name: skip_branch_taildup_endpgm
@ -21,14 +21,12 @@ body: |
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
@ -42,7 +40,6 @@ body: |
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
@ -67,7 +64,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
@ -76,7 +73,7 @@ body: |
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
@ -95,7 +92,7 @@ body: |
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
@ -120,14 +117,12 @@ body: |
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
@ -139,7 +134,6 @@ body: |
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
@ -154,7 +148,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
@ -163,7 +157,7 @@ body: |
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
@ -180,7 +174,7 @@ body: |
renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:

View File

@ -49,7 +49,7 @@ body: |
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
SI_MASK_BRANCH %bb.3, implicit $exec
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.2:
@ -57,7 +57,7 @@ body: |
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
SI_MASK_BRANCH %bb.3, implicit $exec
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.4
bb.3:

View File

@ -203,7 +203,7 @@ body: |
%47:sreg_64 = COPY $exec, implicit-def $exec
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
$exec = S_MOV_B64_term %48
SI_MASK_BRANCH %bb.18, implicit $exec
S_CBRANCH_EXECZ %bb.18, implicit $exec
S_BRANCH %bb.16
bb.16:

View File

@ -52,7 +52,7 @@ body: |
SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
@ -117,7 +117,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1: