1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[GlobalISel] Constant fold G_SITOFP and G_UITOFP in CSEMIRBuilder

Differential Revision: https://reviews.llvm.org/D104528
This commit is contained in:
Jay Foad 2021-06-18 13:22:11 +01:00
parent cf6bdfc026
commit 3bc8cd6a0b
9 changed files with 282 additions and 285 deletions

View File

@ -257,6 +257,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI);
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit is
/// set.

View File

@ -203,6 +203,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildConstant(Dst, *MaybeCst);
break;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
// Try to constant fold these.
assert(SrcOps.size() == 1 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
if (Optional<APFloat> Cst = ConstantFoldIntToFloat(
Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI()))
return buildFConstant(DstOps[0], *Cst);
break;
}
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))

View File

@ -673,6 +673,19 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
return None;
}
Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI) {
assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
APFloat DstVal(getFltSemanticForLLT(DstTy));
DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
APFloat::rmNearestTiesToEven);
return DstVal;
}
return None;
}
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
Optional<DefinitionAndSourceRegister> DefSrcReg =

View File

@ -689,8 +689,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
; GFX6-LABEL: simplify_demanded_bfe_sdiv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 2
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0
; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; GFX6-NEXT: s_mov_b32 s6, -1

View File

@ -68,9 +68,8 @@ define float @v_powi_neg1_f32(float %l) {
; GCN-LABEL: v_powi_neg1_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -1
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -1)
@ -82,8 +81,7 @@ define float @v_powi_2_f32(float %l) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 2
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 2)
@ -94,9 +92,8 @@ define float @v_powi_neg2_f32(float %l) {
; GCN-LABEL: v_powi_neg2_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -2
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, -2.0, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -2)
@ -108,8 +105,7 @@ define float @v_powi_4_f32(float %l) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 4
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 4.0, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 4)
@ -121,8 +117,7 @@ define float @v_powi_8_f32(float %l) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 8
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 0x41000000, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 8)
@ -134,8 +129,7 @@ define float @v_powi_16_f32(float %l) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 16
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 0x41800000, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 16)
@ -147,8 +141,7 @@ define float @v_powi_128_f32(float %l) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 0x80
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 0x43000000, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 128)
@ -159,9 +152,8 @@ define float @v_powi_neg128_f32(float %l) {
; GCN-LABEL: v_powi_neg128_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, 0xffffff80
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_legacy_f32_e32 v0, 0xc3000000, v0
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -128)

View File

@ -296,16 +296,15 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s6, 0x1000
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
@ -379,56 +378,54 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: s_movk_i32 s8, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x1000
; CGP-NEXT: s_movk_i32 s5, 0xf000
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
; CGP-NEXT: s_movk_i32 s4, 0xf000
; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x45800000
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
; CGP-NEXT: v_mul_lo_u32 v8, s4, v3
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v6
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s8, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3
; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v3
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5]
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
@ -440,16 +437,15 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
@ -523,56 +519,54 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
; CGP-NEXT: s_mov_b32 s5, 0xffed2705
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8
; CGP-NEXT: s_mov_b32 s4, 0xffed2705
; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x4996c7d8
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
; CGP-NEXT: v_mul_lo_u32 v8, s4, v3
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_mul_lo_u32 v7, v6, s4
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6
; CGP-NEXT: v_mul_lo_u32 v9, v4, v3
; CGP-NEXT: v_mul_lo_u32 v7, v3, s8
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
; CGP-NEXT: v_mul_lo_u32 v9, v4, v5
; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s8, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3
; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v3
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5]
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result

View File

@ -272,16 +272,15 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s4, 0x1000
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
@ -351,50 +350,48 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x1000
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
; CGP-NEXT: s_movk_i32 s5, 0xf000
; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x45800000
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_lshlrev_b32_e32 v6, 12, v6
; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v3
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
@ -406,16 +403,15 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
@ -485,50 +481,48 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8
; CGP-NEXT: s_mov_b32 s5, 0xffed2705
; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x4996c7d8
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_mul_lo_u32 v6, v6, s4
; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mul_lo_u32 v8, s5, v3
; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_mul_lo_u32 v3, v3, s4
; CGP-NEXT: v_mul_lo_u32 v4, v4, s4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result

View File

@ -223,14 +223,13 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s6, 0x1000
; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x45800000
; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
@ -295,45 +294,43 @@ define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: v_mov_b32_e32 v2, 0x1000
; CGP-NEXT: s_movk_i32 s5, 0xf000
; CGP-NEXT: s_movk_i32 s8, 0x1000
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
; CGP-NEXT: s_movk_i32 s4, 0xf000
; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000
; CGP-NEXT: v_cvt_f32_u32_e32 v4, s4
; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; CGP-NEXT: v_mov_b32_e32 v4, 0x1000
; CGP-NEXT: v_rcp_iflag_f32_e32 v5, 0x45800000
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
; CGP-NEXT: v_mul_lo_u32 v6, s5, v4
; CGP-NEXT: v_mul_lo_u32 v6, s4, v2
; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
; CGP-NEXT: v_mul_hi_u32 v6, v2, v6
; CGP-NEXT: v_mul_hi_u32 v3, v5, v3
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v4
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v2
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v4
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 4096, i32 4096>
@ -345,14 +342,13 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
@ -417,44 +413,42 @@ define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-LABEL: v_udiv_v2i32_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb
; CGP-NEXT: s_mov_b32 s5, 0xffed2705
; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
; CGP-NEXT: s_mov_b32 s4, 0xffed2705
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, 0x4996c7d8
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
; CGP-NEXT: v_mul_lo_u32 v5, s5, v3
; CGP-NEXT: v_mul_lo_u32 v6, s5, v4
; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
; CGP-NEXT: v_mul_lo_u32 v5, s4, v2
; CGP-NEXT: v_mul_lo_u32 v6, s4, v4
; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_mul_lo_u32 v5, v3, s4
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
; CGP-NEXT: v_mul_lo_u32 v7, v4, v2
; CGP-NEXT: v_mul_lo_u32 v5, v2, s8
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
; CGP-NEXT: v_mul_lo_u32 v7, v4, s8
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>

View File

@ -230,14 +230,13 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@ -290,33 +289,31 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
; CGP-NEXT: s_mov_b32 s5, 0xffed2705
; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_mul_lo_u32 v4, s5, v3
; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
; CGP-NEXT: v_mul_lo_u32 v4, v4, s4
; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
; CGP-NEXT: v_mul_lo_u32 v3, s5, v2
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_mul_hi_u32 v3, v0, v2
; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
; CGP-NEXT: v_mul_lo_u32 v3, v3, s4
; CGP-NEXT: v_mul_lo_u32 v2, v2, s4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result