mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Implement AnalyzeBranch
Original patch by Tom Stellard llvm-svn: 270295
This commit is contained in:
parent
920c7d7490
commit
a197a65904
@ -1057,6 +1057,115 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
|
||||
return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
|
||||
switch (Cond) {
|
||||
case SIInstrInfo::SCC_TRUE:
|
||||
return AMDGPU::S_CBRANCH_SCC1;
|
||||
case SIInstrInfo::SCC_FALSE:
|
||||
return AMDGPU::S_CBRANCH_SCC0;
|
||||
default:
|
||||
llvm_unreachable("invalid branch predicate");
|
||||
}
|
||||
}
|
||||
|
||||
SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::S_CBRANCH_SCC0:
|
||||
return SCC_FALSE;
|
||||
case AMDGPU::S_CBRANCH_SCC1:
|
||||
return SCC_TRUE;
|
||||
default:
|
||||
return INVALID_BR;
|
||||
}
|
||||
}
|
||||
|
||||
bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const {
|
||||
MachineBasicBlock::iterator I = MBB.getFirstTerminator();
|
||||
|
||||
if (I == MBB.end())
|
||||
return false;
|
||||
|
||||
if (I->getOpcode() == AMDGPU::S_BRANCH) {
|
||||
// Unconditional Branch
|
||||
TBB = I->getOperand(0).getMBB();
|
||||
return false;
|
||||
}
|
||||
|
||||
BranchPredicate Pred = getBranchPredicate(I->getOpcode());
|
||||
if (Pred == INVALID_BR)
|
||||
return true;
|
||||
|
||||
MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
|
||||
Cond.push_back(MachineOperand::CreateImm(Pred));
|
||||
|
||||
++I;
|
||||
|
||||
if (I == MBB.end()) {
|
||||
// Conditional branch followed by fall-through.
|
||||
TBB = CondBB;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (I->getOpcode() == AMDGPU::S_BRANCH) {
|
||||
TBB = CondBB;
|
||||
FBB = I->getOperand(0).getMBB();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator I = MBB.getFirstTerminator();
|
||||
|
||||
unsigned Count = 0;
|
||||
while (I != MBB.end()) {
|
||||
MachineBasicBlock::iterator Next = std::next(I);
|
||||
I->eraseFromParent();
|
||||
++Count;
|
||||
I = Next;
|
||||
}
|
||||
|
||||
return Count;
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
DebugLoc DL) const {
|
||||
|
||||
if (!FBB && Cond.empty()) {
|
||||
BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
|
||||
.addMBB(TBB);
|
||||
return 1;
|
||||
}
|
||||
|
||||
assert(TBB && Cond[0].isImm());
|
||||
|
||||
unsigned Opcode
|
||||
= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
|
||||
|
||||
if (!FBB) {
|
||||
BuildMI(&MBB, DL, get(Opcode))
|
||||
.addMBB(TBB);
|
||||
return 1;
|
||||
}
|
||||
|
||||
assert(TBB && FBB);
|
||||
|
||||
BuildMI(&MBB, DL, get(Opcode))
|
||||
.addMBB(TBB);
|
||||
BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
|
||||
.addMBB(FBB);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static void removeModOperands(MachineInstr &MI) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
|
||||
|
@ -26,6 +26,16 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
|
||||
// The the inverse predicate should have the negative value.
|
||||
enum BranchPredicate {
|
||||
INVALID_BR = 0,
|
||||
SCC_TRUE = 1,
|
||||
SCC_FALSE = -1
|
||||
};
|
||||
|
||||
static unsigned getBranchOpcode(BranchPredicate Cond);
|
||||
static BranchPredicate getBranchPredicate(unsigned Opcode);
|
||||
|
||||
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineOperand &SuperReg,
|
||||
@ -136,6 +146,17 @@ public:
|
||||
unsigned &SrcOpIdx1,
|
||||
unsigned &SrcOpIdx2) const override;
|
||||
|
||||
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const override;
|
||||
|
||||
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
|
||||
|
||||
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
|
||||
DebugLoc DL) const override;
|
||||
|
||||
bool areMemAccessesTriviallyDisjoint(
|
||||
MachineInstr *MIa, MachineInstr *MIb,
|
||||
AliasAnalysis *AA = nullptr) const override;
|
||||
@ -493,7 +514,6 @@ public:
|
||||
|
||||
ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
|
||||
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
@ -113,12 +113,9 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We currently disallow SALU instructions in all branches,
|
||||
; but there are some cases when the should be allowed.
|
||||
|
||||
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; SI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; VI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
|
@ -3,19 +3,19 @@
|
||||
; Make sure that m0 is not reinitialized in the loop.
|
||||
|
||||
; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
|
||||
; GCN: s_cbranch_scc1 BB0_2
|
||||
; GCN: s_cbranch_scc1 BB0_3
|
||||
|
||||
; Initialize in preheader
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
|
||||
; GCN: BB0_3:
|
||||
; GCN: BB0_2:
|
||||
; GCN: ds_read_b32
|
||||
; GCN: buffer_store_dword
|
||||
|
||||
; GCN: s_cbranch_vccnz BB0_2
|
||||
; GCN: s_branch BB0_3
|
||||
; GCN: s_cbranch_vccnz BB0_3
|
||||
; GCN: s_branch BB0_2
|
||||
|
||||
; GCN: BB0_2:
|
||||
; GCN: BB0_3:
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @copy_local_to_global_loop_m0_init(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(3)* noalias nocapture readonly %in, i32 %n) #0 {
|
||||
bb:
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare void @llvm.amdgcn.s.dcache.inv() #0
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_dcache_inv:
|
||||
; GCN-NEXT: ; BB#0:
|
||||
@ -15,10 +16,11 @@ define void @test_s_dcache_inv() #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait:
|
||||
; GCN-NEXT: ; BB#0:
|
||||
; GCN-NEXT: s_dcache_inv
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
|
||||
; GCN: s_dcache_inv
|
||||
; GCN: s_waitcnt lgkmcnt(0) ; encoding
|
||||
define void @test_s_dcache_inv_insert_wait() #0 {
|
||||
call void @llvm.amdgcn.s.dcache.inv()
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 0)
|
||||
br label %end
|
||||
|
||||
end:
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare void @llvm.amdgcn.s.dcache.inv.vol() #0
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_dcache_inv_vol:
|
||||
; GCN-NEXT: ; BB#0:
|
||||
@ -16,9 +17,10 @@ define void @test_s_dcache_inv_vol() #0 {
|
||||
; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait:
|
||||
; GCN-NEXT: ; BB#0:
|
||||
; GCN-NEXT: s_dcache_inv_vol
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
|
||||
; GCN: s_waitcnt lgkmcnt(0) ; encoding
|
||||
define void @test_s_dcache_inv_vol_insert_wait() #0 {
|
||||
call void @llvm.amdgcn.s.dcache.inv.vol()
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 0)
|
||||
br label %end
|
||||
|
||||
end:
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
|
||||
|
||||
declare void @llvm.amdgcn.s.dcache.wb() #0
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
; VI-LABEL: {{^}}test_s_dcache_wb:
|
||||
; VI-NEXT: ; BB#0:
|
||||
@ -14,9 +15,10 @@ define void @test_s_dcache_wb() #0 {
|
||||
; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait:
|
||||
; VI-NEXT: ; BB#0:
|
||||
; VI-NEXT: s_dcache_wb
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
|
||||
; VI: s_waitcnt lgkmcnt(0) ; encoding
|
||||
define void @test_s_dcache_wb_insert_wait() #0 {
|
||||
call void @llvm.amdgcn.s.dcache.wb()
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 0)
|
||||
br label %end
|
||||
|
||||
end:
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
|
||||
|
||||
declare void @llvm.amdgcn.s.dcache.wb.vol() #0
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
; VI-LABEL: {{^}}test_s_dcache_wb_vol:
|
||||
; VI-NEXT: ; BB#0:
|
||||
@ -14,9 +15,10 @@ define void @test_s_dcache_wb_vol() #0 {
|
||||
; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait:
|
||||
; VI-NEXT: ; BB#0:
|
||||
; VI-NEXT: s_dcache_wb_vol
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
|
||||
; VI: s_waitcnt lgkmcnt(0) ; encoding
|
||||
define void @test_s_dcache_wb_vol_insert_wait() #0 {
|
||||
call void @llvm.amdgcn.s.dcache.wb.vol()
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 0)
|
||||
br label %end
|
||||
|
||||
end:
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
;
|
||||
;
|
||||
; Most SALU instructions ignore control flow, so we need to make sure
|
||||
@ -67,7 +67,7 @@ endif:
|
||||
; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
|
||||
|
||||
; SI: BB2_1:
|
||||
; SI: BB2_2:
|
||||
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
|
||||
; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
|
||||
|
@ -166,18 +166,21 @@ endif:
|
||||
}
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}uniform_if_else:
|
||||
; SI-LABEL: {{^}}uniform_if_else_ret:
|
||||
; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; SI: s_cbranch_scc1 [[ELSE_LABEL:[0-9_A-Za-z]+]]
|
||||
; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[ONE]]
|
||||
; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; SI-NEXT: s_cbranch_scc1 [[ELSE_LABEL:[0-9_A-Za-z]+]]
|
||||
; SI-NEXT: s_branch [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; SI: [[ELSE_LABEL]]:
|
||||
; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; SI: buffer_store_dword [[TWO]]
|
||||
; SI: [[ENDIF_LABEL]]:
|
||||
; SI: s_endpgm
|
||||
define void @uniform_if_else(i32 addrspace(1)* nocapture %out, i32 %a) {
|
||||
|
||||
; SI: {{^}}[[IF_LABEL]]:
|
||||
; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[ONE]]
|
||||
; SI: s_endpgm
|
||||
define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %a, 0
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
@ -194,6 +197,42 @@ if.end: ; preds = %if.else, %if.then
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}uniform_if_else:
|
||||
; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; SI-NEXT: s_cbranch_scc1 [[ELSE_LABEL:[0-9_A-Za-z]+]]
|
||||
; SI-NEXT: s_branch [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; SI: [[ELSE_LABEL]]:
|
||||
; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; SI: buffer_store_dword [[TWO]]
|
||||
; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; SI: [[IF_LABEL]]:
|
||||
; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[ONE]]
|
||||
|
||||
; SI: [[ENDIF_LABEL]]:
|
||||
; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
|
||||
; SI: buffer_store_dword [[THREE]]
|
||||
; SI: s_endpgm
|
||||
define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %a, 0
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %entry
|
||||
store i32 1, i32 addrspace(1)* %out0
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
store i32 2, i32 addrspace(1)* %out0
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
store i32 3, i32 addrspace(1)* %out1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}icmp_2_users:
|
||||
; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1
|
||||
; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
|
||||
@ -368,15 +407,15 @@ exit:
|
||||
; SI-LABEL: {{^}}cse_uniform_condition_different_blocks:
|
||||
; SI: s_load_dword [[COND:s[0-9]+]]
|
||||
; SI: s_cmp_lt_i32 [[COND]], 1
|
||||
; SI: s_cbranch_scc1 BB13_3
|
||||
; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
|
||||
|
||||
; SI: BB#1:
|
||||
; SI-NOT: cmp
|
||||
; SI: buffer_load_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_cbranch_scc1 BB13_3
|
||||
; SI: s_cbranch_scc1 BB[[FNNUM]]_3
|
||||
|
||||
; SI: BB13_3:
|
||||
; SI: BB[[FNNUM]]_3:
|
||||
; SI: s_endpgm
|
||||
define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
|
||||
bb:
|
||||
|
@ -72,19 +72,18 @@ exit:
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
|
||||
; SI: s_cbranch_execz BB2_2
|
||||
; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: ; BB#1:
|
||||
; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
||||
; SI: BB2_3:
|
||||
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: buffer_load_dword
|
||||
; SI-DAG: buffer_store_dword
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc,
|
||||
; SI-DAG: s_and_b64 vcc, exec, vcc
|
||||
; SI: s_cbranch_vccnz BB2_2
|
||||
; SI: s_branch BB2_3
|
||||
; SI: BB2_2:
|
||||
; SI: s_cbranch_vccnz [[LABEL_EXIT]]
|
||||
; SI: s_branch [[LABEL_LOOP]]
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
; SI: s_endpgm
|
||||
|
||||
define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
@ -117,7 +116,7 @@ exit:
|
||||
; SI: v_cmp_lt_i32_e32 vcc
|
||||
; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
|
||||
; SI: s_cbranch_execz BB3_2
|
||||
; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; Initialize inner condition to false
|
||||
; SI: ; BB#1:
|
||||
@ -125,7 +124,7 @@ exit:
|
||||
; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
|
||||
|
||||
; Clear exec bits for workitems that load -1s
|
||||
; SI: BB3_3:
|
||||
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]]
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]]
|
||||
; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
|
||||
@ -133,23 +132,23 @@ exit:
|
||||
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
|
||||
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
|
||||
; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
|
||||
; SI: s_cbranch_execz BB3_5
|
||||
; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: BB#4:
|
||||
; SI: BB#3:
|
||||
; SI: buffer_store_dword
|
||||
; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
|
||||
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
|
||||
|
||||
; SI: BB3_5:
|
||||
; SI: [[LABEL_FLOW]]:
|
||||
; SI: s_or_b64 exec, exec, [[ORNEG2]]
|
||||
; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
|
||||
; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
|
||||
; SI: s_cbranch_execnz BB3_3
|
||||
; SI: s_cbranch_execnz [[LABEL_LOOP]]
|
||||
|
||||
; SI: BB#6
|
||||
; SI: BB#5
|
||||
; SI: s_or_b64 exec, exec, [[COND_STATE]]
|
||||
|
||||
; SI: BB3_2:
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
; SI-NOT: [[COND_STATE]]
|
||||
; SI: s_endpgm
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user