mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AMDGPU: Use unsigned compare for eq/ne
For some reason there are both of these available, except for scalar 64-bit compares which only has u64. I'm not sure why there are both (I'm guessing it's for the one bit inputs we don't use), but for consistency always using the unsigned one. llvm-svn: 282832
This commit is contained in:
parent
d31e44ab52
commit
3a9a1ac61b
@ -407,7 +407,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
} else {
|
||||
// FIXME: Hack until VReg_1 removed.
|
||||
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
|
||||
.addImm(0)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
}
|
||||
|
@ -882,12 +882,12 @@ def : Pat <
|
||||
|
||||
def : Pat <
|
||||
(i1 (trunc i32:$a)),
|
||||
(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
|
||||
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), 1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (trunc i64:$a)),
|
||||
(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
|
||||
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
|
||||
(EXTRACT_SUBREG $a, sub0)), 1)
|
||||
>;
|
||||
|
||||
|
@ -131,7 +131,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
|
||||
MI.eraseFromParent();
|
||||
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
|
||||
SrcRC == &AMDGPU::VReg_1RegClass) {
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_U32_e64))
|
||||
.addOperand(Dst)
|
||||
.addOperand(Src)
|
||||
.addImm(0);
|
||||
|
@ -237,13 +237,13 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
|
||||
return;
|
||||
|
||||
// eq/ne is special because the imm16 can be treated as signed or unsigned,
|
||||
// and initially selectd to the signed versions.
|
||||
if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) {
|
||||
// and initially selectd to the unsigned versions.
|
||||
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
|
||||
bool HasUImm;
|
||||
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
|
||||
if (HasUImm) {
|
||||
SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ?
|
||||
AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32;
|
||||
if (!HasUImm) {
|
||||
SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
|
||||
AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
|
||||
}
|
||||
|
||||
MI.setDesc(TII->get(SOPKOpc));
|
||||
|
@ -644,8 +644,8 @@ class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
|
||||
class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
|
||||
: SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
|
||||
|
||||
def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
|
||||
def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>;
|
||||
def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">;
|
||||
def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">;
|
||||
def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
|
||||
def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
|
||||
def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
|
||||
|
@ -320,10 +320,10 @@ defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">;
|
||||
|
||||
defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">;
|
||||
defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
|
||||
defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32", COND_EQ>;
|
||||
defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">;
|
||||
defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
|
||||
defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>;
|
||||
defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32", COND_NE>;
|
||||
defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">;
|
||||
defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>;
|
||||
defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">;
|
||||
|
||||
@ -338,10 +338,10 @@ defm V_CMPX_T_I32 : VOPCX_I32 <"v_cmpx_t_i32">;
|
||||
|
||||
defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">;
|
||||
defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
|
||||
defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64", COND_EQ>;
|
||||
defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">;
|
||||
defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
|
||||
defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>;
|
||||
defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64", COND_NE>;
|
||||
defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">;
|
||||
defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>;
|
||||
defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">;
|
||||
|
||||
@ -460,8 +460,8 @@ class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
|
||||
(inst $src0, $src1)
|
||||
>;
|
||||
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
|
||||
@ -471,8 +471,8 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
|
||||
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
|
||||
|
@ -57,8 +57,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
|
||||
; SI: v_cmp_ne_i32
|
||||
; SI-NOT: v_cmp_ne_i32
|
||||
; SI: v_cmp_ne_u32
|
||||
; SI-NOT: v_cmp_ne_u32
|
||||
; SI: v_cndmask_b32
|
||||
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
|
||||
%cmp = icmp ne i32 addrspace(3)* %lds, null
|
||||
|
@ -11,7 +11,7 @@
|
||||
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
|
||||
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
|
||||
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
|
||||
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
|
||||
@ -34,7 +34,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
|
||||
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
|
||||
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
|
||||
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
|
||||
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
|
||||
@ -79,7 +79,7 @@ define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
|
||||
; HSA: enable_sgpr_queue_ptr = 0
|
||||
|
||||
; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
|
||||
; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
|
||||
; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
|
||||
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
|
||||
@ -96,7 +96,7 @@ define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
|
||||
; HSA: enable_sgpr_queue_ptr = 0
|
||||
|
||||
; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
|
||||
; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
|
||||
; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
|
||||
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
|
||||
|
@ -31,7 +31,7 @@ end:
|
||||
; GCN-LABEL: {{^}}test_brcc_i1:
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1,
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc,
|
||||
; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: buffer_store_dword
|
||||
|
@ -8,7 +8,7 @@
|
||||
;
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: ; %LOOP49
|
||||
; CHECK: v_cmp_ne_i32_e32 vcc,
|
||||
; CHECK: v_cmp_ne_u32_e32 vcc,
|
||||
; CHECK: s_cbranch_vccnz
|
||||
; CHECK: ; %ENDIF53
|
||||
define amdgpu_vs float @main(i32 %in) {
|
||||
|
@ -95,7 +95,7 @@ for.body:
|
||||
|
||||
; GCN-LABEL: {{^}}loop_arg_0:
|
||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; GCN: v_cmp_eq_i32_e32 vcc, 1,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 1,
|
||||
|
||||
; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc
|
||||
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
|
||||
|
@ -5,7 +5,7 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
|
||||
; Produces error after adding an implicit def to v_cndmask_b32
|
||||
|
||||
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
|
||||
; GCN: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
||||
; GCN: v_cndmask_b32_e64 v1, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
|
||||
|
@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
; GCN-LABEL: {{^}}commute_eq_64_i32:
|
||||
; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
@ -20,7 +20,7 @@ define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}commute_ne_64_i32:
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}}
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
@ -35,7 +35,7 @@ define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
||||
; FIXME: Why isn't this being folded as a constant?
|
||||
; GCN-LABEL: {{^}}commute_ne_litk_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
@ -172,7 +172,7 @@ define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
; GCN-LABEL: {{^}}commute_eq_64_i64:
|
||||
; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
@ -185,7 +185,7 @@ define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}commute_ne_64_i64:
|
||||
; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
@ -697,7 +697,7 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
||||
; SIShrinkInstructions, this was using the VOP3 compare.
|
||||
|
||||
; GCN-LABEL: {{^}}commute_frameindex:
|
||||
; GCN: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
|
||||
entry:
|
||||
%stack0 = alloca i32
|
||||
|
@ -15,7 +15,7 @@
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
|
||||
|
||||
; GCN: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
|
||||
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
|
||||
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
|
||||
@ -93,7 +93,7 @@ endif:
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
|
||||
|
||||
; GCN: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
|
||||
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
|
||||
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
|
||||
@ -122,7 +122,7 @@ endif:
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[8:11], s12 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
|
||||
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
; GCN: buffer_store_dword [[VAL_LOOP]], off, s[8:11], s12 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0)
|
||||
@ -173,7 +173,7 @@ end:
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]]
|
||||
|
||||
; GCN: v_cmp_ne_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
|
||||
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
|
||||
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
|
||||
|
@ -3,13 +3,13 @@
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
; GCN-LABEL: {{^}}convergent_inlineasm:
|
||||
; GCN: BB#0:
|
||||
; GCN: v_cmp_ne_i32_e64
|
||||
; GCN: v_cmp_ne_u32_e64
|
||||
; GCN: ; mask branch
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1) #1
|
||||
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1
|
||||
%tmp2 = icmp eq i32 %tmp, 8
|
||||
br i1 %tmp2, label %bb3, label %bb5
|
||||
|
||||
@ -26,13 +26,13 @@ bb5: ; preds = %bb3, %bb
|
||||
; GCN: ; mask branch
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
; GCN: v_cmp_ne_i32_e64
|
||||
; GCN: v_cmp_ne_u32_e64
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1)
|
||||
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1)
|
||||
%tmp2 = icmp eq i32 %tmp, 8
|
||||
br i1 %tmp2, label %bb3, label %bb5
|
||||
|
||||
|
@ -19,7 +19,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
; FUNC-LABEL: {{^}}s_ctlz_i32:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
|
||||
; GCN-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
|
||||
; GCN-DAG: v_cmp_eq_u32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
@ -36,7 +36,7 @@ define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i32:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
|
||||
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
|
||||
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[CTLZ]]
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
@ -99,7 +99,7 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i8:
|
||||
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
|
||||
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[CTLZ]]
|
||||
; GCN-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
|
||||
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
|
||||
; GCN: buffer_store_byte [[RESULT]],
|
||||
@ -112,7 +112,7 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctlz_i64:
|
||||
; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; GCN-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
|
||||
; GCN-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}}
|
||||
; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
|
||||
; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
|
||||
; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
|
||||
@ -138,13 +138,13 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; GCN-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
|
||||
; GCN-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
|
||||
; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
|
||||
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
|
||||
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
|
||||
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[OR]]
|
||||
; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
|
||||
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
|
||||
define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
|
@ -92,7 +92,7 @@ define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
|
||||
; SI-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}}
|
||||
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
|
||||
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
|
||||
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
|
||||
@ -117,7 +117,7 @@ define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %va
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64:
|
||||
; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
|
||||
; SI-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
|
||||
; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
|
||||
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
|
||||
@ -188,7 +188,7 @@ define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[VAL]]
|
||||
; SI-DAG: v_cmp_eq_u32_e32 vcc, 0, [[VAL]]
|
||||
; SI-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc
|
||||
; SI-DAG: buffer_store_dword [[RESULT0]]
|
||||
; SI-DAG: buffer_store_byte [[RESULT1]]
|
||||
|
@ -15,8 +15,8 @@
|
||||
|
||||
; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]]
|
||||
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-DAG: v_cmp_eq_u32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-DAG: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc
|
||||
|
||||
; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0
|
||||
|
@ -6,7 +6,7 @@
|
||||
; SI: s_and_saveexec_b64
|
||||
; SI: s_xor_b64
|
||||
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, [[REG]]
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]]
|
||||
; SI: s_and_saveexec_b64
|
||||
; SI: s_xor_b64
|
||||
; SI: s_endpgm
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_eq:
|
||||
; SI: v_cmp_eq_i64
|
||||
; SI: v_cmp_eq_u64
|
||||
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp eq i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@ -11,7 +11,7 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_ne:
|
||||
; SI: v_cmp_ne_i64
|
||||
; SI: v_cmp_ne_u64
|
||||
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp ne i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
|
@ -357,7 +357,7 @@ bb2:
|
||||
|
||||
; CHECK-LABEL: {{^}}extract_adjacent_blocks:
|
||||
; CHECK: s_load_dword [[ARG:s[0-9]+]]
|
||||
; CHECK: s_cmp_lg_i32
|
||||
; CHECK: s_cmp_lg_u32
|
||||
; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: buffer_load_dwordx4
|
||||
@ -396,7 +396,7 @@ bb7:
|
||||
|
||||
; CHECK-LABEL: {{^}}insert_adjacent_blocks:
|
||||
; CHECK: s_load_dword [[ARG:s[0-9]+]]
|
||||
; CHECK: s_cmp_lg_i32
|
||||
; CHECK: s_cmp_lg_u32
|
||||
; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: buffer_load_dwordx4
|
||||
|
@ -40,12 +40,12 @@ endif:
|
||||
|
||||
; CHECK-LABEL: {{^}}v_cmp_asm:
|
||||
; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: v_cmp_ne_i32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
|
||||
; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
|
||||
define void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) {
|
||||
%sgpr = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %in)
|
||||
%sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
|
||||
store i64 %sgpr, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b,
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
|
||||
; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
|
||||
%cmp = icmp eq i32 %i, 0
|
||||
@ -109,8 +109,8 @@ define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, fl
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; SI-DAG: v_cmp_eq_u32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
|
||||
; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
|
||||
; SI: s_endpgm
|
||||
@ -135,18 +135,18 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]]
|
||||
|
||||
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, [[LOAD]]
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, [[LOAD]]
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
|
||||
|
||||
; SI: BB9_2:
|
||||
; SI: s_or_b64 exec, exec, [[SAVE]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_endpgm
|
||||
|
@ -5,7 +5,7 @@ declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
|
||||
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_eq:
|
||||
; GCN: v_cmp_eq_i32_e64
|
||||
; GCN: v_cmp_eq_u32_e64
|
||||
define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -13,14 +13,14 @@ define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp:
|
||||
; GCN-NOT: v_cmp_eq_i32_e64
|
||||
; GCN-NOT: v_cmp_eq_u32_e64
|
||||
define void @v_icmp(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_ne:
|
||||
; GCN: v_cmp_ne_i32_e64
|
||||
; GCN: v_cmp_ne_u32_e64
|
||||
define void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -91,7 +91,7 @@ define void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_eq:
|
||||
; GCN: v_cmp_eq_i64_e64
|
||||
; GCN: v_cmp_eq_u64_e64
|
||||
define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -99,7 +99,7 @@ define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_ne:
|
||||
; GCN: v_cmp_ne_i64_e64
|
||||
; GCN: v_cmp_ne_u64_e64
|
||||
define void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
|
@ -18,7 +18,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
|
||||
; SI-DAG: v_not_b32_e32
|
||||
; SI-DAG: v_not_b32_e32
|
||||
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
|
||||
; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
|
||||
; SI-DAG: v_cmp_gt_i32
|
||||
|
@ -17,7 +17,7 @@ define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind
|
||||
; SI-DAG: buffer_load_ubyte [[COND:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
|
||||
; SI-DAG: buffer_load_ubyte [[A:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:45
|
||||
; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[COND]]
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 1, [[COND]]
|
||||
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
||||
define void @s_minmax_i1(i1 addrspace(1)* %out, i1 zeroext %cond, i1 zeroext %a, i1 zeroext %b) nounwind {
|
||||
%cmp = icmp slt i1 %cond, false
|
||||
|
@ -96,7 +96,7 @@ define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32>
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; SI-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
|
||||
@ -112,7 +112,7 @@ define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x
|
||||
; FUNC-LABEL: {{^}}s_select_v4f32:
|
||||
; SI: s_load_dwordx4
|
||||
; SI: s_load_dwordx4
|
||||
; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
|
@ -68,7 +68,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}selectcc_bool:
|
||||
; SI: v_cmp_ne_i32
|
||||
; SI: v_cmp_ne_u32
|
||||
; SI-NEXT: v_cndmask_b32_e64
|
||||
; SI-NOT: cmp
|
||||
; SI-NOT: cndmask
|
||||
|
@ -8,7 +8,7 @@
|
||||
; EG: OR_INT
|
||||
; EG: CNDE_INT
|
||||
; EG: CNDE_INT
|
||||
; SI: v_cmp_eq_i64
|
||||
; SI: v_cmp_eq_u64
|
||||
; SI: v_cndmask
|
||||
; SI: v_cndmask
|
||||
define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT:buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -21,7 +21,7 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -38,7 +38,7 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -52,7 +52,7 @@ define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -66,7 +66,7 @@ define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -80,7 +80,7 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -94,7 +94,7 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -108,7 +108,7 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN: v_cmp_eq_u32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
@ -151,7 +151,7 @@ define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw
|
||||
; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
|
||||
; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]]
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]]
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
@ -164,7 +164,7 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
|
||||
; GCN: buffer_load_sbyte [[B:v[0-9]+]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}}
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
@ -178,7 +178,7 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
|
||||
; GCN: s_load_dword [[B:s[0-9]+]]
|
||||
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; GCN: v_cmp_ne_u32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
@ -199,7 +199,7 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
|
||||
; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
|
||||
; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}}
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}}
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
|
@ -225,7 +225,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}i32_eq:
|
||||
; R600: SETE_INT
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %a, %b
|
||||
@ -236,7 +236,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}i32_ne:
|
||||
; R600: SETNE_INT
|
||||
; SI: v_cmp_ne_i32
|
||||
; SI: v_cmp_ne_u32
|
||||
define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = icmp ne i32 %a, %b
|
||||
@ -335,11 +335,11 @@ entry:
|
||||
|
||||
; FIXME: This does 4 compares
|
||||
; FUNC-LABEL: {{^}}v3i32_eq:
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI: s_endpgm
|
||||
define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
|
||||
@ -356,11 +356,11 @@ define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptr
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v3i8_eq:
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI-DAG: v_cmp_eq_i32
|
||||
; SI-DAG: v_cmp_eq_u32
|
||||
; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; SI: s_endpgm
|
||||
define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
|
||||
@ -379,7 +379,7 @@ define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra,
|
||||
; Make sure we don't try to emit i1 setcc ops
|
||||
; FUNC-LABEL: setcc-i1
|
||||
; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1
|
||||
; SI: s_cmp_eq_i32 [[AND]], 0
|
||||
; SI: s_cmp_eq_u32 [[AND]], 0
|
||||
define void @setcc-i1(i32 %in) {
|
||||
%and = and i32 %in, 1
|
||||
%cmp = icmp eq i32 %and, 0
|
||||
|
@ -159,7 +159,7 @@ entry:
|
||||
;;;==========================================================================;;;
|
||||
|
||||
; FUNC-LABEL: {{^}}i64_eq:
|
||||
; SI: v_cmp_eq_i64
|
||||
; SI: v_cmp_eq_u64
|
||||
define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
entry:
|
||||
%0 = icmp eq i64 %a, %b
|
||||
@ -169,7 +169,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}i64_ne:
|
||||
; SI: v_cmp_ne_i64
|
||||
; SI: v_cmp_ne_u64
|
||||
define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
entry:
|
||||
%0 = icmp ne i64 %a, %b
|
||||
|
@ -69,10 +69,10 @@ endif:
|
||||
|
||||
; SI: BB2_2:
|
||||
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
|
||||
; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
|
||||
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
|
||||
|
||||
; SI: v_cmp_ne_i32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
|
||||
; SI: v_cmp_ne_u32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
|
||||
|
@ -30,7 +30,7 @@ ENDIF:
|
||||
; FIXME: This could be folded into the s_or_b64 instruction
|
||||
; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0
|
||||
; SI: [[LOOP_LABEL:[A-Z0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
|
||||
; SI_IF_BREAK instruction:
|
||||
; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
|
||||
; GCN: v_cmp_eq_i32
|
||||
; GCN: v_cmp_eq_u32
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_xor_b64
|
||||
; GCN: s_branch BB0_1
|
||||
@ -26,7 +26,7 @@ bb68:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
|
||||
; GCN: v_cmp_eq_i32
|
||||
; GCN: v_cmp_eq_u32
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_xor_b64
|
||||
; GCN: s_endpgm
|
||||
|
@ -14,7 +14,7 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
; uses an SGPR (implicit vcc).
|
||||
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc,
|
||||
; SI-DAG: v_cmp_eq_u32_e64 vcc,
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
|
@ -21,7 +21,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
|
||||
; GCN: v_cndmask
|
||||
; GCN: v_cndmask
|
||||
|
||||
; GCN-DAG: v_cmp_eq_i64
|
||||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_lt_u64
|
||||
|
||||
; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
|
@ -77,7 +77,7 @@ define void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrsp
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sint_to_fp_i1_f32:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI: v_cmp_eq_u32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
@ -101,7 +101,7 @@ define void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
|
||||
; FUNC-LABEL: {{^}}v_sint_to_fp_i1_f32_load:
|
||||
; SI: {{buffer|flat}}_load_ubyte
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
|
||||
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
|
||||
; SI: s_endpgm
|
||||
|
@ -87,7 +87,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
|
||||
; FIXME: why does the skip depend on the asm length in the same block?
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_control_flow:
|
||||
; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK-NEXT: ; BB#1:
|
||||
@ -137,7 +137,7 @@ exit:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
|
||||
; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK-NEXT: ; BB#1: ; %bb
|
||||
@ -199,7 +199,7 @@ exit:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_divergent_loop:
|
||||
; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
|
||||
; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
@ -216,7 +216,7 @@ exit:
|
||||
|
||||
; CHECK-NEXT: ; BB#3:
|
||||
; CHECK: buffer_load_dword [[LOAD:v[0-9]+]]
|
||||
; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]]
|
||||
; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]]
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]]
|
||||
|
||||
|
@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.groupstaticsize() #1
|
||||
@lds = addrspace(3) global [512 x i32] undef, align 4
|
||||
|
||||
; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
|
||||
; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}}
|
||||
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 4{{$}}
|
||||
define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp eq i32 %cond, 4
|
||||
@ -88,7 +88,7 @@ endif:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
|
||||
; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}}
|
||||
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
|
||||
define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp eq i32 %cond, -32769
|
||||
@ -136,7 +136,7 @@ endif:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
|
||||
; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}}
|
||||
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0x10000{{$}}
|
||||
define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp eq i32 %cond, 65536
|
||||
@ -572,7 +572,7 @@ endif:
|
||||
; GCN-LABEL: {{^}}br_scc_eq_i64_inline_imm:
|
||||
; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
|
||||
|
||||
; SI: v_cmp_eq_i64_e64
|
||||
; SI: v_cmp_eq_u64_e64
|
||||
define void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp eq i64 %cond, 4
|
||||
@ -592,7 +592,7 @@ endif:
|
||||
; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0
|
||||
; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
|
||||
|
||||
; SI: v_cmp_eq_i64_e32
|
||||
; SI: v_cmp_eq_u64_e32
|
||||
define void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp eq i64 %cond, 1234
|
||||
@ -610,7 +610,7 @@ endif:
|
||||
; GCN-LABEL: {{^}}br_scc_ne_i64_inline_imm:
|
||||
; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
|
||||
|
||||
; SI: v_cmp_ne_i64_e64
|
||||
; SI: v_cmp_ne_u64_e64
|
||||
define void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp ne i64 %cond, 4
|
||||
@ -630,7 +630,7 @@ endif:
|
||||
; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0
|
||||
; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
|
||||
|
||||
; SI: v_cmp_ne_i64_e32
|
||||
; SI: v_cmp_ne_u64_e32
|
||||
define void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%cmp0 = icmp ne i64 %cond, 1234
|
||||
|
@ -6,7 +6,7 @@
|
||||
; XXX - Why does it like to use vcc?
|
||||
|
||||
; GCN-LABEL: {{^}}spill_m0:
|
||||
; GCN: s_cmp_lg_i32
|
||||
; GCN: s_cmp_lg_u32
|
||||
|
||||
; TOVGPR: s_mov_b32 vcc_hi, m0
|
||||
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0
|
||||
|
@ -8,7 +8,7 @@ target triple="amdgcn--"
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK: v_mbcnt_lo_u32_b32_e64
|
||||
; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; BB0_1:
|
||||
|
@ -4,7 +4,7 @@
|
||||
; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 0, [[TMP]]{{$}}
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}}
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: buffer_store_byte
|
||||
define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
@ -19,7 +19,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
@ -116,7 +116,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
@ -132,7 +132,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
|
||||
; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
|
||||
; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
|
||||
; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
|
||||
; XSI-NEXT: buffer_store_byte [[RESULT]]
|
||||
define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
@ -156,7 +156,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
|
||||
|
||||
; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
|
||||
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64
|
||||
; SI: buffer_store_byte
|
||||
define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||
|
@ -51,7 +51,7 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
|
||||
|
||||
; SI-LABEL: {{^}}trunc_i32_to_i1:
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
|
||||
%a = load i32, i32 addrspace(1)* %ptr, align 4
|
||||
%trunc = trunc i32 %a to i1
|
||||
@ -62,7 +62,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
|
||||
|
||||
; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
|
||||
; SI: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}}
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
|
||||
%trunc = trunc i32 %a to i1
|
||||
%result = select i1 %trunc, i32 1, i32 0
|
||||
@ -73,7 +73,7 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
|
||||
; SI-LABEL: {{^}}s_trunc_i64_to_i1:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
|
||||
; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}}
|
||||
; SI: v_cmp_eq_u32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}}
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
|
||||
define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
|
||||
%trunc = trunc i64 %x to i1
|
||||
@ -85,7 +85,7 @@ define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
|
||||
; SI-LABEL: {{^}}v_trunc_i64_to_i1:
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
|
||||
; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]]
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]]
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
|
||||
define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
@ -74,7 +74,7 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
|
||||
; uses an SGPR (implicit vcc).
|
||||
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc
|
||||
; SI-DAG: v_cmp_eq_u32_e64 vcc
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
|
@ -18,7 +18,7 @@ define void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
|
||||
; GCN: v_cndmask
|
||||
; GCN: v_cndmask
|
||||
|
||||
; GCN-DAG: v_cmp_eq_i64
|
||||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_lt_u64
|
||||
|
||||
; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
|
||||
|
@ -77,7 +77,7 @@ define void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrsp
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f32:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI: v_cmp_eq_u32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
@ -101,7 +101,7 @@ define void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 {
|
||||
; FUNC-LABEL: {{^}}v_uint_to_fp_i1_f32_load:
|
||||
; SI: {{buffer|flat}}_load_ubyte
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
|
||||
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
|
||||
; SI: s_endpgm
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_scc:
|
||||
; GCN-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0
|
||||
; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -59,7 +59,7 @@ done:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc:
|
||||
; GCN-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -119,7 +119,7 @@ done:
|
||||
; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
|
||||
; Using a floating-point value in an integer compare will cause the compare to
|
||||
; be selected for the SALU and then later moved to the VALU.
|
||||
; GCN: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
|
||||
; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
|
||||
; GCN: s_and_b64 vcc, exec, [[COND]]
|
||||
; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: buffer_store_dword
|
||||
@ -167,7 +167,7 @@ endif:
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_else_ret:
|
||||
; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
@ -196,7 +196,7 @@ if.end: ; preds = %if.else, %if.then
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_else:
|
||||
; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
@ -284,7 +284,7 @@ bb9: ; preds = %bb8, %bb4
|
||||
; FIXME: We need to teach GCNFixSGPRCopies about uniform branches so we
|
||||
; get s_add_i32 here.
|
||||
; GCN: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}}
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, 0, [[I]]
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, 0, [[I]]
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
; GCN: s_cbranch_vccnz [[LOOP_LABEL]]
|
||||
; GCN: s_endpgm
|
||||
@ -309,7 +309,7 @@ done:
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
|
||||
; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: s_cmp_lg_i32 {{s[0-9]+}}, 0
|
||||
; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[ENDIF_LABEL]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
@ -333,7 +333,7 @@ endif:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}divergent_inside_uniform:
|
||||
; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
@ -362,13 +362,13 @@ endif:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}divergent_if_uniform_if:
|
||||
; GCN: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN: s_or_b64 exec, exec, [[MASK]]
|
||||
; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN: buffer_store_dword [[TWO]]
|
||||
@ -438,7 +438,7 @@ bb9: ; preds = %bb8, %bb4
|
||||
; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
|
||||
; SI: v_cmp_eq_i64_e64
|
||||
; SI: v_cmp_eq_u64_e64
|
||||
; SI: s_and_b64 vcc, exec,
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -470,7 +470,7 @@ done:
|
||||
; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
|
||||
; SI: v_cmp_ne_i64_e64
|
||||
; SI: v_cmp_ne_u64_e64
|
||||
; SI: s_and_b64 vcc, exec,
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -526,8 +526,7 @@ done:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}move_to_valu_i64_eq:
|
||||
; SI: v_cmp_eq_i64_e32
|
||||
; VI: v_cmp_eq_u64_e32
|
||||
; GCN: v_cmp_eq_u64_e32
|
||||
define void @move_to_valu_i64_eq(i32 addrspace(1)* %out) {
|
||||
%cond = load volatile i64, i64 addrspace(3)* undef
|
||||
%cmp0 = icmp eq i64 %cond, 0
|
||||
@ -546,8 +545,7 @@ done:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}move_to_valu_i64_ne:
|
||||
; SI: v_cmp_ne_i64_e32
|
||||
; VI: v_cmp_ne_u64_e32
|
||||
; GCN: v_cmp_ne_u64_e32
|
||||
define void @move_to_valu_i64_ne(i32 addrspace(1)* %out) {
|
||||
%cond = load volatile i64, i64 addrspace(3)* undef
|
||||
%cmp0 = icmp ne i64 %cond, 0
|
||||
|
@ -3,7 +3,7 @@
|
||||
; Test a simple uniform loop that lives inside non-uniform control flow.
|
||||
|
||||
; CHECK-LABEL: {{^}}test1:
|
||||
; CHECK: v_cmp_ne_i32_e32 vcc, 0
|
||||
; CHECK: v_cmp_ne_u32_e32 vcc, 0
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK-NEXT: s_xor_b64
|
||||
; CHECK-NEXT: ; mask branch
|
||||
|
@ -65,7 +65,7 @@ end:
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_test_v_if
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
|
||||
|
||||
@ -91,7 +91,7 @@ exit:
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}simple_test_v_loop:
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
|
||||
; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
|
||||
@ -101,7 +101,7 @@ exit:
|
||||
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: buffer_load_dword
|
||||
; SI-DAG: buffer_store_dword
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc,
|
||||
; SI-DAG: v_cmp_eq_u32_e32 vcc,
|
||||
; SI-DAG: s_and_b64 vcc, exec, vcc
|
||||
; SI: s_cbranch_vccz [[LABEL_LOOP]]
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
@ -148,8 +148,8 @@ exit:
|
||||
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]]
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]]
|
||||
; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
|
||||
; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
|
||||
; SI-DAG: v_cmp_ne_u32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
|
||||
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
|
||||
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
|
||||
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
|
||||
; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
|
||||
|
@ -30,7 +30,7 @@ entry:
|
||||
|
||||
; SI-LABEL: {{^}}zext_i1_to_i64:
|
||||
; SI: s_mov_b32 s{{[0-9]+}}, 0
|
||||
; SI: v_cmp_eq_i32
|
||||
; SI: v_cmp_eq_u32
|
||||
; SI: v_cndmask_b32
|
||||
define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
|
Loading…
Reference in New Issue
Block a user