diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 0fed33e3214..da725dae47c 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -407,7 +407,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) + BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) .addImm(0) .addReg(SrcReg, getKillRegState(KillSrc)); } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index e4114adf599..95c30a55445 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -882,12 +882,12 @@ def : Pat < def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1) + (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), 1) >; def : Pat < (i1 (trunc i64:$a)), - (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), + (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), (EXTRACT_SUBREG $a, sub0)), 1) >; diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index dc1d20ddb27..396764b1850 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -131,7 +131,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_U32_e64)) .addOperand(Dst) .addOperand(Src) .addImm(0); diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 1a0f7d41a1f..010c909774f 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -237,13 +237,13 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { return; // eq/ne is special because the imm16 can be treated as signed or unsigned, - // and initially selectd to the signed versions. - if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) { + // and initially selectd to the unsigned versions. + if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) { bool HasUImm; if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) { - if (HasUImm) { - SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ? - AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32; + if (!HasUImm) { + SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? + AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; } MI.setDesc(TII->get(SOPKOpc)); diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 9744cd3cd07..5a42016074b 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -644,8 +644,8 @@ class SOPC_32 op, string opName, list pattern = []> class SOPC_64_32 op, string opName, list pattern = []> : SOPC_Base; -def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>; -def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>; +def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">; +def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">; def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>; def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>; def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">; diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index 30e76aa938f..8014a2d67c2 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -320,10 +320,10 @@ defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">; defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">; defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">; -defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32", COND_EQ>; +defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">; defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">; defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>; -defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32", COND_NE>; +defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">; defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>; defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">; @@ -338,10 +338,10 @@ defm V_CMPX_T_I32 : VOPCX_I32 <"v_cmpx_t_i32">; defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">; defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">; -defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64", COND_EQ>; +defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">; defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">; defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>; -defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64", COND_NE>; +defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">; defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>; defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">; @@ -460,8 +460,8 @@ class ICMP_Pattern : Pat < (inst $src0, $src1) >; -def : ICMP_Pattern ; -def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; @@ -471,8 +471,8 @@ def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; -def : ICMP_Pattern ; -def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; diff --git a/test/CodeGen/AMDGPU/32-bit-local-address-space.ll b/test/CodeGen/AMDGPU/32-bit-local-address-space.ll index ff8c9045787..6495a1480df 100644 --- a/test/CodeGen/AMDGPU/32-bit-local-address-space.ll +++ b/test/CodeGen/AMDGPU/32-bit-local-address-space.ll @@ -57,8 +57,8 @@ entry: } ; FUNC-LABEL: {{^}}null_32bit_lds_ptr: -; SI: v_cmp_ne_i32 -; SI-NOT: v_cmp_ne_i32 +; SI: v_cmp_ne_u32 +; SI-NOT: v_cmp_ne_u32 ; SI: v_cndmask_b32 define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind { %cmp = icmp ne i32 addrspace(3)* %lds, null diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll index 7d9f49b3ff6..0a2130c96ad 100644 --- a/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/test/CodeGen/AMDGPU/addrspacecast.ll @@ -11,7 +11,7 @@ ; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1 +; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] ; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 @@ -34,7 +34,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { ; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1 +; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] ; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 @@ -79,7 +79,7 @@ define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 { ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} +; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} @@ -96,7 +96,7 @@ define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 { ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} +; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index 00636240bc6..7bc4d735feb 100644 --- a/test/CodeGen/AMDGPU/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll @@ -31,7 +31,7 @@ end: ; GCN-LABEL: {{^}}test_brcc_i1: ; GCN: buffer_load_ubyte ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, -; GCN: v_cmp_eq_i32_e32 vcc, +; GCN: v_cmp_eq_u32_e32 vcc, ; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]] ; GCN: buffer_store_dword diff --git a/test/CodeGen/AMDGPU/branch-uniformity.ll b/test/CodeGen/AMDGPU/branch-uniformity.ll index b3779e8dd42..e6f68417803 100644 --- a/test/CodeGen/AMDGPU/branch-uniformity.ll +++ b/test/CodeGen/AMDGPU/branch-uniformity.ll @@ -8,7 +8,7 @@ ; ; CHECK-LABEL: {{^}}main: ; CHECK: ; %LOOP49 -; CHECK: v_cmp_ne_i32_e32 vcc, +; CHECK: v_cmp_ne_u32_e32 vcc, ; CHECK: s_cbranch_vccnz ; CHECK: ; %ENDIF53 define amdgpu_vs float @main(i32 %in) { diff --git a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index 759c48b3a9c..a68310c6083 100644 --- a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -95,7 +95,7 @@ for.body: ; GCN-LABEL: {{^}}loop_arg_0: ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; GCN: v_cmp_eq_i32_e32 vcc, 1, +; GCN: v_cmp_eq_u32_e32 vcc, 1, ; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll index 8f880a5b86a..bea565e9fb4 100644 --- a/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ b/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -5,7 +5,7 @@ declare i1 @llvm.amdgcn.class.f32(float, i32) ; Produces error after adding an implicit def to v_cndmask_b32 ; GCN-LABEL: {{^}}vcc_shrink_vcc_def: -; GCN: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc ; GCN: v_cndmask_b32_e64 v1, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) { diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll index e21b694e4c4..055fd8f1ccd 100644 --- a/test/CodeGen/AMDGPU/commute-compares.ll +++ b/test/CodeGen/AMDGPU/commute-compares.ll @@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 ; -------------------------------------------------------------------------------- ; GCN-LABEL: {{^}}commute_eq_64_i32: -; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}} define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -20,7 +20,7 @@ define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 } ; GCN-LABEL: {{^}}commute_ne_64_i32: -; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}} define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -35,7 +35,7 @@ define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 ; FIXME: Why isn't this being folded as a constant? ; GCN-LABEL: {{^}}commute_ne_litk_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 -; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}} +; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}} define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -172,7 +172,7 @@ define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 ; -------------------------------------------------------------------------------- ; GCN-LABEL: {{^}}commute_eq_64_i64: -; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -185,7 +185,7 @@ define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 } ; GCN-LABEL: {{^}}commute_ne_64_i64: -; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -697,7 +697,7 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; SIShrinkInstructions, this was using the VOP3 compare. ; GCN-LABEL: {{^}}commute_frameindex: -; GCN: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}} +; GCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 diff --git a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 2df5bb2380b..1e6e9bf2acf 100644 --- a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -15,7 +15,7 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} @@ -93,7 +93,7 @@ endif: ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] @@ -122,7 +122,7 @@ endif: ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[8:11], s12 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] -; GCN: v_cmp_ne_i32_e32 vcc, +; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[8:11], s12 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_waitcnt vmcnt(0) expcnt(0) @@ -173,7 +173,7 @@ end: ; GCN: s_mov_b32 m0, -1 ; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_ne_i32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, +; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] diff --git a/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/test/CodeGen/AMDGPU/convergent-inlineasm.ll index d0f98e5744d..755f439c686 100644 --- a/test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ b/test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -3,13 +3,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}convergent_inlineasm: ; GCN: BB#0: -; GCN: v_cmp_ne_i32_e64 +; GCN: v_cmp_ne_u32_e64 ; GCN: ; mask branch ; GCN: BB{{[0-9]+_[0-9]+}}: define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1) #1 + %tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1 %tmp2 = icmp eq i32 %tmp, 8 br i1 %tmp2, label %bb3, label %bb5 @@ -26,13 +26,13 @@ bb5: ; preds = %bb3, %bb ; GCN: ; mask branch ; GCN: BB{{[0-9]+_[0-9]+}}: -; GCN: v_cmp_ne_i32_e64 +; GCN: v_cmp_ne_u32_e64 ; GCN: BB{{[0-9]+_[0-9]+}}: define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 1) + %tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) %tmp2 = icmp eq i32 %tmp, 8 br i1 %tmp2, label %bb3, label %bb5 diff --git a/test/CodeGen/AMDGPU/ctlz.ll b/test/CodeGen/AMDGPU/ctlz.ll index 6a8666f9ad4..c5c5549c206 100644 --- a/test/CodeGen/AMDGPU/ctlz.ll +++ b/test/CodeGen/AMDGPU/ctlz.ll @@ -19,7 +19,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; FUNC-LABEL: {{^}}s_ctlz_i32: ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} ; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]] -; GCN-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} +; GCN-DAG: v_cmp_eq_u32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]] ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]] ; GCN: buffer_store_dword [[RESULT]] @@ -36,7 +36,7 @@ define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; FUNC-LABEL: {{^}}v_ctlz_i32: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]] -; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] +; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[CTLZ]] ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -99,7 +99,7 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp ; FUNC-LABEL: {{^}}v_ctlz_i8: ; GCN: buffer_load_ubyte [[VAL:v[0-9]+]], ; GCN-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] -; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] +; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[CTLZ]] ; GCN-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]] ; GCN: buffer_store_byte [[RESULT]], @@ -112,7 +112,7 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias % ; FUNC-LABEL: {{^}}s_ctlz_i64: ; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} +; GCN-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}} ; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] ; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] @@ -138,13 +138,13 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind ; FUNC-LABEL: {{^}}v_ctlz_i64: ; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; GCN-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] +; GCN-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]] ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]] -; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]] +; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[OR]] ; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { diff --git a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index e9ac1ecf5af..498c3331611 100644 --- a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -92,7 +92,7 @@ define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1) ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64: ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} +; SI-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}} ; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] ; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] @@ -117,7 +117,7 @@ define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %va ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64: ; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] +; SI-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] ; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] ; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] @@ -188,7 +188,7 @@ define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use: ; SI: buffer_load_dword [[VAL:v[0-9]+]], ; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]] -; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[VAL]] +; SI-DAG: v_cmp_eq_u32_e32 vcc, 0, [[VAL]] ; SI-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc ; SI-DAG: buffer_store_dword [[RESULT0]] ; SI-DAG: buffer_store_byte [[RESULT1]] diff --git a/test/CodeGen/AMDGPU/fdiv.f64.ll b/test/CodeGen/AMDGPU/fdiv.f64.ll index 3343b681b9f..2ace0762c67 100644 --- a/test/CodeGen/AMDGPU/fdiv.f64.ll +++ b/test/CodeGen/AMDGPU/fdiv.f64.ll @@ -15,8 +15,8 @@ ; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]] -; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}} -; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: v_cmp_eq_u32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}} ; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc ; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0 diff --git a/test/CodeGen/AMDGPU/i1-copy-phi.ll b/test/CodeGen/AMDGPU/i1-copy-phi.ll index 4d50dc2f402..fd9f469b057 100644 --- a/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -6,7 +6,7 @@ ; SI: s_and_saveexec_b64 ; SI: s_xor_b64 ; SI: v_mov_b32_e32 [[REG]], -1{{$}} -; SI: v_cmp_ne_i32_e32 vcc, 0, [[REG]] +; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]] ; SI: s_and_saveexec_b64 ; SI: s_xor_b64 ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/icmp64.ll b/test/CodeGen/AMDGPU/icmp64.ll index 0eaa33ebafe..3d42eac4429 100644 --- a/test/CodeGen/AMDGPU/icmp64.ll +++ b/test/CodeGen/AMDGPU/icmp64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}test_i64_eq: -; SI: v_cmp_eq_i64 +; SI: v_cmp_eq_u64 define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %cmp = icmp eq i64 %a, %b %result = sext i1 %cmp to i32 @@ -11,7 +11,7 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { } ; SI-LABEL: {{^}}test_i64_ne: -; SI: v_cmp_ne_i64 +; SI: v_cmp_ne_u64 define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %cmp = icmp ne i64 %a, %b %result = sext i1 %cmp to i32 diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 6d729f4569e..5c217ff421f 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -357,7 +357,7 @@ bb2: ; CHECK-LABEL: {{^}}extract_adjacent_blocks: ; CHECK: s_load_dword [[ARG:s[0-9]+]] -; CHECK: s_cmp_lg_i32 +; CHECK: s_cmp_lg_u32 ; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: buffer_load_dwordx4 @@ -396,7 +396,7 @@ bb7: ; CHECK-LABEL: {{^}}insert_adjacent_blocks: ; CHECK: s_load_dword [[ARG:s[0-9]+]] -; CHECK: s_cmp_lg_i32 +; CHECK: s_cmp_lg_u32 ; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: buffer_load_dwordx4 diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index 1f5b8be15e2..cef1b114cd3 100644 --- a/test/CodeGen/AMDGPU/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll @@ -40,12 +40,12 @@ endif: ; CHECK-LABEL: {{^}}v_cmp_asm: ; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} -; CHECK: v_cmp_ne_i32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]] +; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]] ; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]] ; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]] ; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) { - %sgpr = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %in) + %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in) store i64 %sgpr, i64 addrspace(1)* %out ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll index 9f50b4505c1..af934ede040 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -77,7 +77,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, } ; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc: -; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind { %cmp = icmp eq i32 %i, 0 @@ -109,8 +109,8 @@ define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, fl ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} -; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}} -; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; SI-DAG: v_cmp_eq_u32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}} +; SI-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} ; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]] ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]] ; SI: s_endpgm @@ -135,18 +135,18 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo } ; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc: -; SI: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} ; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc ; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]] ; SI: buffer_load_dword [[LOAD:v[0-9]+]] -; SI: v_cmp_ne_i32_e32 vcc, 0, [[LOAD]] +; SI: v_cmp_ne_u32_e32 vcc, 0, [[LOAD]] ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc ; SI: BB9_2: ; SI: s_or_b64 exec, exec, [[SAVE]] -; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ; SI: buffer_store_dword ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll index 0797e5ead00..6d0457bc648 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -5,7 +5,7 @@ declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 ; GCN-LABEL: {{^}}v_icmp_i32_eq: -; GCN: v_cmp_eq_i32_e64 +; GCN: v_cmp_eq_u32_e64 define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -13,14 +13,14 @@ define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { } ; GCN-LABEL: {{^}}v_icmp: -; GCN-NOT: v_cmp_eq_i32_e64 +; GCN-NOT: v_cmp_eq_u32_e64 define void @v_icmp(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i32_ne: -; GCN: v_cmp_ne_i32_e64 +; GCN: v_cmp_ne_u32_e64 define void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -91,7 +91,7 @@ define void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { } ; GCN-LABEL: {{^}}v_icmp_i64_eq: -; GCN: v_cmp_eq_i64_e64 +; GCN: v_cmp_eq_u64_e64 define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -99,7 +99,7 @@ define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { } ; GCN-LABEL: {{^}}v_icmp_i64_ne: -; GCN: v_cmp_ne_i64_e64 +; GCN: v_cmp_ne_u64_e64 define void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, i64 addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll index a8024b71326..d5187adc0e6 100644 --- a/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -18,7 +18,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 { ; SI-DAG: v_not_b32_e32 ; SI-DAG: v_not_b32_e32 -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff ; SI-DAG: v_cmp_gt_i32 diff --git a/test/CodeGen/AMDGPU/select-i1.ll b/test/CodeGen/AMDGPU/select-i1.ll index 2406831b94c..adc054b4895 100644 --- a/test/CodeGen/AMDGPU/select-i1.ll +++ b/test/CodeGen/AMDGPU/select-i1.ll @@ -17,7 +17,7 @@ define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind ; SI-DAG: buffer_load_ubyte [[COND:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 ; SI-DAG: buffer_load_ubyte [[A:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:45 ; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 -; SI: v_cmp_eq_i32_e32 vcc, 1, [[COND]] +; SI: v_cmp_eq_u32_e32 vcc, 1, [[COND]] ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] define void @s_minmax_i1(i1 addrspace(1)* %out, i1 zeroext %cond, i1 zeroext %a, i1 zeroext %b) nounwind { %cmp = icmp slt i1 %cond, false diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll index 626ab020b3d..240235138c8 100644 --- a/test/CodeGen/AMDGPU/select-vectors.ll +++ b/test/CodeGen/AMDGPU/select-vectors.ll @@ -96,7 +96,7 @@ define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] -; SI-DAG: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; SI-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_cndmask_b32_e32 ; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] @@ -112,7 +112,7 @@ define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x ; FUNC-LABEL: {{^}}s_select_v4f32: ; SI: s_load_dwordx4 ; SI: s_load_dwordx4 -; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_cndmask_b32_e32 ; SI: v_cndmask_b32_e32 diff --git a/test/CodeGen/AMDGPU/selectcc-opt.ll b/test/CodeGen/AMDGPU/selectcc-opt.ll index 65be4a626a1..25670afec24 100644 --- a/test/CodeGen/AMDGPU/selectcc-opt.ll +++ b/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -68,7 +68,7 @@ entry: } ; FUNC-LABEL: {{^}}selectcc_bool: -; SI: v_cmp_ne_i32 +; SI: v_cmp_ne_u32 ; SI-NEXT: v_cndmask_b32_e64 ; SI-NOT: cmp ; SI-NOT: cndmask diff --git a/test/CodeGen/AMDGPU/selectcc.ll b/test/CodeGen/AMDGPU/selectcc.ll index f378e15dd76..446d4ab344b 100644 --- a/test/CodeGen/AMDGPU/selectcc.ll +++ b/test/CodeGen/AMDGPU/selectcc.ll @@ -8,7 +8,7 @@ ; EG: OR_INT ; EG: CNDE_INT ; EG: CNDE_INT -; SI: v_cmp_eq_i64 +; SI: v_cmp_eq_u64 ; SI: v_cndmask ; SI: v_cndmask define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) { diff --git a/test/CodeGen/AMDGPU/setcc-opt.ll b/test/CodeGen/AMDGPU/setcc-opt.ll index 9218aaf3b5d..89c92583fd4 100644 --- a/test/CodeGen/AMDGPU/setcc-opt.ll +++ b/test/CodeGen/AMDGPU/setcc-opt.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: ; GCN-NOT: v_cmp -; GCN: v_cmp_ne_i32_e32 vcc, +; GCN: v_cmp_ne_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT:buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -21,7 +21,7 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: ; GCN-NOT: v_cmp -; GCN: v_cmp_ne_i32_e32 vcc, +; GCN: v_cmp_ne_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -38,7 +38,7 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: ; GCN-NOT: v_cmp -; GCN: v_cmp_eq_i32_e32 vcc, +; GCN: v_cmp_eq_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -52,7 +52,7 @@ define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: ; GCN-NOT: v_cmp -; GCN: v_cmp_eq_i32_e32 vcc, +; GCN: v_cmp_eq_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -66,7 +66,7 @@ define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: ; GCN-NOT: v_cmp -; GCN: v_cmp_ne_i32_e32 vcc, +; GCN: v_cmp_ne_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -80,7 +80,7 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: ; GCN-NOT: v_cmp -; GCN: v_cmp_ne_i32_e32 vcc, +; GCN: v_cmp_ne_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -94,7 +94,7 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: ; GCN-NOT: v_cmp -; GCN: v_cmp_eq_i32_e32 vcc, +; GCN: v_cmp_eq_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN-NEXT: s_endpgm @@ -108,7 +108,7 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: ; GCN-NOT: v_cmp -; GCN: v_cmp_eq_i32_e32 vcc, +; GCN: v_cmp_eq_u32_e32 vcc, ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN-NEXT: buffer_store_byte [[RESULT]] define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -151,7 +151,7 @@ define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounw ; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] ; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]] -; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]] +; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]] ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm @@ -164,7 +164,7 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { ; FUNC-LABEL: {{^}}cmp_sext_k_neg1: ; GCN: buffer_load_sbyte [[B:v[0-9]+]] -; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}} +; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}} ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm @@ -178,7 +178,7 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg: ; GCN: s_load_dword [[B:s[0-9]+]] -; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} +; GCN: v_cmp_ne_u32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN: s_endpgm @@ -199,7 +199,7 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n ; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]] ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] -; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}} +; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}} ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/setcc.ll b/test/CodeGen/AMDGPU/setcc.ll index 5f7d645bb65..184d58ca1a0 100644 --- a/test/CodeGen/AMDGPU/setcc.ll +++ b/test/CodeGen/AMDGPU/setcc.ll @@ -225,7 +225,7 @@ entry: ; FUNC-LABEL: {{^}}i32_eq: ; R600: SETE_INT -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: %0 = icmp eq i32 %a, %b @@ -236,7 +236,7 @@ entry: ; FUNC-LABEL: {{^}}i32_ne: ; R600: SETNE_INT -; SI: v_cmp_ne_i32 +; SI: v_cmp_ne_u32 define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: %0 = icmp ne i32 %a, %b @@ -335,11 +335,11 @@ entry: ; FIXME: This does 4 compares ; FUNC-LABEL: {{^}}v3i32_eq: -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; SI: s_endpgm define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) { @@ -356,11 +356,11 @@ define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptr } ; FUNC-LABEL: {{^}}v3i8_eq: -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, -; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cmp_eq_u32 ; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; SI: s_endpgm define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) { @@ -379,7 +379,7 @@ define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, ; Make sure we don't try to emit i1 setcc ops ; FUNC-LABEL: setcc-i1 ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1 -; SI: s_cmp_eq_i32 [[AND]], 0 +; SI: s_cmp_eq_u32 [[AND]], 0 define void @setcc-i1(i32 %in) { %and = and i32 %in, 1 %cmp = icmp eq i32 %and, 0 diff --git a/test/CodeGen/AMDGPU/setcc64.ll b/test/CodeGen/AMDGPU/setcc64.ll index 15db03cf906..18805feed7b 100644 --- a/test/CodeGen/AMDGPU/setcc64.ll +++ b/test/CodeGen/AMDGPU/setcc64.ll @@ -159,7 +159,7 @@ entry: ;;;==========================================================================;;; ; FUNC-LABEL: {{^}}i64_eq: -; SI: v_cmp_eq_i64 +; SI: v_cmp_eq_u64 define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = icmp eq i64 %a, %b @@ -169,7 +169,7 @@ entry: } ; FUNC-LABEL: {{^}}i64_ne: -; SI: v_cmp_ne_i64 +; SI: v_cmp_ne_u64 define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = icmp ne i64 %a, %b diff --git a/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/test/CodeGen/AMDGPU/sgpr-control-flow.ll index f1b8e8eec85..bb3f9491435 100644 --- a/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -69,10 +69,10 @@ endif: ; SI: BB2_2: ; SI: buffer_load_dword [[AVAL:v[0-9]+]] -; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]] +; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]] ; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]] -; SI: v_cmp_ne_i32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]] +; SI: v_cmp_ne_u32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]] ; SI: buffer_store_dword [[RESULT]] define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { diff --git a/test/CodeGen/AMDGPU/si-annotate-cf.ll b/test/CodeGen/AMDGPU/si-annotate-cf.ll index 1a4c2259559..175fa0e9f95 100644 --- a/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -30,7 +30,7 @@ ENDIF: ; FIXME: This could be folded into the s_or_b64 instruction ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0 ; SI: [[LOOP_LABEL:[A-Z0-9]+]] -; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} ; SI_IF_BREAK instruction: ; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]] diff --git a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll index ea506e6b3b3..15de4dd5018 100644 --- a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator: -; GCN: v_cmp_eq_i32 +; GCN: v_cmp_eq_u32 ; GCN: s_and_saveexec_b64 ; GCN: s_xor_b64 ; GCN: s_branch BB0_1 @@ -26,7 +26,7 @@ bb68: } ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order: -; GCN: v_cmp_eq_i32 +; GCN: v_cmp_eq_u32 ; GCN: s_and_saveexec_b64 ; GCN: s_xor_b64 ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index 7c58f2d906d..68dc3c6ccd2 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -14,7 +14,7 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { ; uses an SGPR (implicit vcc). ; SI-LABEL: {{^}}sint_to_fp_i1_f64: -; SI-DAG: v_cmp_eq_i32_e64 vcc, +; SI-DAG: v_cmp_eq_u32_e64 vcc, ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}} ; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}} diff --git a/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll index 6e6b1e7b6de..1c9196a8bd6 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -21,7 +21,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { ; GCN: v_cndmask ; GCN: v_cndmask -; GCN-DAG: v_cmp_eq_i64 +; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_lt_u64 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/sint_to_fp.ll b/test/CodeGen/AMDGPU/sint_to_fp.ll index 75ffdd2cc85..bae9bead46a 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.ll @@ -77,7 +77,7 @@ define void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrsp } ; FUNC-LABEL: {{^}}s_sint_to_fp_i1_f32: -; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI: v_cmp_eq_u32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -101,7 +101,7 @@ define void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 { ; FUNC-LABEL: {{^}}v_sint_to_fp_i1_f32_load: ; SI: {{buffer|flat}}_load_ubyte ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 82aefa643e3..9d3eb2ffdc6 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -87,7 +87,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { ; FIXME: why does the skip depend on the asm length in the same block? ; CHECK-LABEL: {{^}}test_kill_control_flow: -; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#1: @@ -137,7 +137,7 @@ exit: } ; CHECK-LABEL: {{^}}test_kill_control_flow_remainder: -; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#1: ; %bb @@ -199,7 +199,7 @@ exit: } ; CHECK-LABEL: {{^}}test_kill_divergent_loop: -; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0 +; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc ; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] ; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]] @@ -216,7 +216,7 @@ exit: ; CHECK-NEXT: ; BB#3: ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] -; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]] +; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]] ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]] diff --git a/test/CodeGen/AMDGPU/sopk-compares.ll b/test/CodeGen/AMDGPU/sopk-compares.ll index 1dda60a3fb1..6b708d9af5d 100644 --- a/test/CodeGen/AMDGPU/sopk-compares.ll +++ b/test/CodeGen/AMDGPU/sopk-compares.ll @@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.groupstaticsize() #1 @lds = addrspace(3) global [512 x i32] undef, align 4 ; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm: -; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}} +; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 4{{$}} define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i32 %cond, 4 @@ -88,7 +88,7 @@ endif: } ; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1: -; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}} +; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0xffff7fff{{$}} define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i32 %cond, -32769 @@ -136,7 +136,7 @@ endif: } ; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1: -; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}} +; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0x10000{{$}} define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i32 %cond, 65536 @@ -572,7 +572,7 @@ endif: ; GCN-LABEL: {{^}}br_scc_eq_i64_inline_imm: ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4 -; SI: v_cmp_eq_i64_e64 +; SI: v_cmp_eq_u64_e64 define void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i64 %cond, 4 @@ -592,7 +592,7 @@ endif: ; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} -; SI: v_cmp_eq_i64_e32 +; SI: v_cmp_eq_u64_e32 define void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i64 %cond, 1234 @@ -610,7 +610,7 @@ endif: ; GCN-LABEL: {{^}}br_scc_ne_i64_inline_imm: ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4 -; SI: v_cmp_ne_i64_e64 +; SI: v_cmp_ne_u64_e64 define void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp ne i64 %cond, 4 @@ -630,7 +630,7 @@ endif: ; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} -; SI: v_cmp_ne_i64_e32 +; SI: v_cmp_ne_u64_e32 define void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp ne i64 %cond, 1234 diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll index a4a4c1109a6..2f99efd8109 100644 --- a/test/CodeGen/AMDGPU/spill-m0.ll +++ b/test/CodeGen/AMDGPU/spill-m0.ll @@ -6,7 +6,7 @@ ; XXX - Why does it like to use vcc? ; GCN-LABEL: {{^}}spill_m0: -; GCN: s_cmp_lg_i32 +; GCN: s_cmp_lg_u32 ; TOVGPR: s_mov_b32 vcc_hi, m0 ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0 diff --git a/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll index 4b6f65a77b9..72a1f1e25b3 100644 --- a/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ b/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -8,7 +8,7 @@ target triple="amdgcn--" ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK: v_mbcnt_lo_u32_b32_e64 -; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc ; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] ; BB0_1: diff --git a/test/CodeGen/AMDGPU/trunc-cmp-constant.ll b/test/CodeGen/AMDGPU/trunc-cmp-constant.ll index 6d820dbd269..0ef460ec582 100644 --- a/test/CodeGen/AMDGPU/trunc-cmp-constant.ll +++ b/test/CodeGen/AMDGPU/trunc-cmp-constant.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0: ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] -; SI: v_cmp_eq_i32_e32 vcc, 0, [[TMP]]{{$}} +; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}} ; SI: v_cndmask_b32_e64 ; SI: buffer_store_byte define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { @@ -19,7 +19,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0: ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] -; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}} +; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}} ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] ; SI: buffer_store_byte [[RESULT]] @@ -116,7 +116,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1: ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] -; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}} +; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}} ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] ; SI: buffer_store_byte [[RESULT]] @@ -132,7 +132,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1: ; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]] ; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] -; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}} +; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}} ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]] ; XSI-NEXT: buffer_store_byte [[RESULT]] define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { @@ -156,7 +156,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr ; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1: ; SI: buffer_load_sbyte [[LOAD:v[0-9]+]] -; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}} +; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}} ; SI-NEXT: v_cndmask_b32_e64 ; SI: buffer_store_byte define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll index 1b53647a84c..63bb447df2f 100644 --- a/test/CodeGen/AMDGPU/trunc.ll +++ b/test/CodeGen/AMDGPU/trunc.ll @@ -51,7 +51,7 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 ; SI-LABEL: {{^}}trunc_i32_to_i1: ; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { %a = load i32, i32 addrspace(1)* %ptr, align 4 %trunc = trunc i32 %a to i1 @@ -62,7 +62,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { ; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1: ; SI: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}} -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { %trunc = trunc i32 %a to i1 %result = select i1 %trunc, i32 1, i32 0 @@ -73,7 +73,7 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { ; SI-LABEL: {{^}}s_trunc_i64_to_i1: ; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]] -; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}} +; SI: v_cmp_eq_u32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}} ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]] define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) { %trunc = trunc i64 %x to i1 @@ -85,7 +85,7 @@ define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) { ; SI-LABEL: {{^}}v_trunc_i64_to_i1: ; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}} ; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]] -; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]] +; SI: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]] ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index b36ce6b8d6c..a4e18ebc912 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -74,7 +74,7 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i ; uses an SGPR (implicit vcc). ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64: -; SI-DAG: v_cmp_eq_i32_e64 vcc +; SI-DAG: v_cmp_eq_u32_e64 vcc ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}} ; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}} diff --git a/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll index 27c41e41a0e..88edbb097be 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -18,7 +18,7 @@ define void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { ; GCN: v_cndmask ; GCN: v_cndmask -; GCN-DAG: v_cmp_eq_i64 +; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_lt_u64 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]] diff --git a/test/CodeGen/AMDGPU/uint_to_fp.ll b/test/CodeGen/AMDGPU/uint_to_fp.ll index 0c3d54cf0d0..aae05508366 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.ll @@ -77,7 +77,7 @@ define void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrsp } ; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f32: -; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI: v_cmp_eq_u32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -101,7 +101,7 @@ define void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 { ; FUNC-LABEL: {{^}}v_uint_to_fp_i1_f32_load: ; SI: {{buffer|flat}}_load_ubyte ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/uniform-cfg.ll b/test/CodeGen/AMDGPU/uniform-cfg.ll index 521e1891e84..a3033b6be87 100644 --- a/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uniform_if_scc: -; GCN-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0 +; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -59,7 +59,7 @@ done: } ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc: -; GCN-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -119,7 +119,7 @@ done: ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] ; Using a floating-point value in an integer compare will cause the compare to ; be selected for the SALU and then later moved to the VALU. -; GCN: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] +; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] ; GCN: s_and_b64 vcc, exec, [[COND]] ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] ; GCN: buffer_store_dword @@ -167,7 +167,7 @@ endif: ; GCN-LABEL: {{^}}uniform_if_else_ret: -; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 @@ -196,7 +196,7 @@ if.end: ; preds = %if.else, %if.then } ; GCN-LABEL: {{^}}uniform_if_else: -; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 @@ -284,7 +284,7 @@ bb9: ; preds = %bb8, %bb4 ; FIXME: We need to teach GCNFixSGPRCopies about uniform branches so we ; get s_add_i32 here. ; GCN: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}} -; GCN: v_cmp_ne_i32_e32 vcc, 0, [[I]] +; GCN: v_cmp_ne_u32_e32 vcc, 0, [[I]] ; GCN: s_and_b64 vcc, exec, vcc ; GCN: s_cbranch_vccnz [[LOOP_LABEL]] ; GCN: s_endpgm @@ -309,7 +309,7 @@ done: ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] ; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; GCN: s_cmp_lg_i32 {{s[0-9]+}}, 0 +; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] @@ -333,7 +333,7 @@ endif: } ; GCN-LABEL: {{^}}divergent_inside_uniform: -; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc @@ -362,13 +362,13 @@ endif: } ; GCN-LABEL: {{^}}divergent_if_uniform_if: -; GCN: v_cmp_eq_i32_e32 vcc, 0, v0 +; GCN: v_cmp_eq_u32_e32 vcc, 0, v0 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] ; GCN: s_or_b64 exec, exec, [[MASK]] -; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN: buffer_store_dword [[TWO]] @@ -438,7 +438,7 @@ bb9: ; preds = %bb8, %bb4 ; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: v_cmp_eq_i64_e64 +; SI: v_cmp_eq_u64_e64 ; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -470,7 +470,7 @@ done: ; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: v_cmp_ne_i64_e64 +; SI: v_cmp_ne_u64_e64 ; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -526,8 +526,7 @@ done: } ; GCN-LABEL: {{^}}move_to_valu_i64_eq: -; SI: v_cmp_eq_i64_e32 -; VI: v_cmp_eq_u64_e32 +; GCN: v_cmp_eq_u64_e32 define void @move_to_valu_i64_eq(i32 addrspace(1)* %out) { %cond = load volatile i64, i64 addrspace(3)* undef %cmp0 = icmp eq i64 %cond, 0 @@ -546,8 +545,7 @@ done: } ; GCN-LABEL: {{^}}move_to_valu_i64_ne: -; SI: v_cmp_ne_i64_e32 -; VI: v_cmp_ne_u64_e32 +; GCN: v_cmp_ne_u64_e32 define void @move_to_valu_i64_ne(i32 addrspace(1)* %out) { %cond = load volatile i64, i64 addrspace(3)* undef %cmp0 = icmp ne i64 %cond, 0 diff --git a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll index 84a4c212a52..b6ca95f8bfc 100644 --- a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll +++ b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -3,7 +3,7 @@ ; Test a simple uniform loop that lives inside non-uniform control flow. ; CHECK-LABEL: {{^}}test1: -; CHECK: v_cmp_ne_i32_e32 vcc, 0 +; CHECK: v_cmp_ne_u32_e32 vcc, 0 ; CHECK: s_and_saveexec_b64 ; CHECK-NEXT: s_xor_b64 ; CHECK-NEXT: ; mask branch diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index c1f8d5916ae..f2324b82422 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -65,7 +65,7 @@ end: } ; SI-LABEL: @simple_test_v_if -; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] @@ -91,7 +91,7 @@ exit: } ; SI-LABEL: {{^}}simple_test_v_loop: -; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] ; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]] @@ -101,7 +101,7 @@ exit: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: buffer_load_dword ; SI-DAG: buffer_store_dword -; SI-DAG: v_cmp_eq_i32_e32 vcc, +; SI-DAG: v_cmp_eq_u32_e32 vcc, ; SI-DAG: s_and_b64 vcc, exec, vcc ; SI: s_cbranch_vccz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: @@ -148,8 +148,8 @@ exit: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: buffer_load_dword [[B:v[0-9]+]] ; SI: buffer_load_dword [[A:v[0-9]+]] -; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] -; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] +; SI-DAG: v_cmp_ne_u32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] +; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] diff --git a/test/CodeGen/AMDGPU/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll index c3b76da5f77..53539921479 100644 --- a/test/CodeGen/AMDGPU/zero_extend.ll +++ b/test/CodeGen/AMDGPU/zero_extend.ll @@ -30,7 +30,7 @@ entry: ; SI-LABEL: {{^}}zext_i1_to_i64: ; SI: s_mov_b32 s{{[0-9]+}}, 0 -; SI: v_cmp_eq_i32 +; SI: v_cmp_eq_u32 ; SI: v_cndmask_b32 define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b