1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[DAG] computeKnownBits - add ISD::MULHS/MULHU/SMUL_LOHI/UMUL_LOHI handling

Reuse the existing KnownBits multiplication code to handle the 'extend + multiply + extract high bits' pattern for multiply-high ops.

Noticed while looking at the codegen for D88785 / D98587 - the patch helps division-by-constant expansion code in particular, which suggests that we might have some further KnownBits div/rem cases we could handle - but this was far easier to implement.

Differential Revision: https://reviews.llvm.org/D98857
This commit is contained in:
Simon Pilgrim 2021-03-19 16:02:31 +00:00
parent a59cc2f1c8
commit ea4ee76d88
17 changed files with 322 additions and 364 deletions

View File

@ -2979,6 +2979,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::computeForMul(Known, Known2); Known = KnownBits::computeForMul(Known, Known2);
break; break;
} }
case ISD::MULHU: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = KnownBits::mulhu(Known, Known2);
break;
}
case ISD::MULHS: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = KnownBits::mulhs(Known, Known2);
break;
}
case ISD::UMUL_LOHI: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op.getResNo() == 0)
Known = KnownBits::computeForMul(Known, Known2);
else
Known = KnownBits::mulhu(Known, Known2);
break;
}
case ISD::SMUL_LOHI: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op.getResNo() == 0)
Known = KnownBits::computeForMul(Known, Known2);
else
Known = KnownBits::mulhs(Known, Known2);
break;
}
case ISD::UDIV: { case ISD::UDIV: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

View File

@ -609,14 +609,14 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX7LESS-NEXT: s_cbranch_execz BB3_2 ; GFX7LESS-NEXT: s_cbranch_execz BB3_2
; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s6, s6, 5
; GFX7LESS-NEXT: s_mov_b32 s10, -1 ; GFX7LESS-NEXT: s_mov_b32 s10, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s8, s2
; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: s_mov_b32 s9, s3
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[6:7] ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6
; GFX7LESS-NEXT: s_mul_i32 s3, s2, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s3
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7LESS-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc ; GFX7LESS-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
@ -651,12 +651,12 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX89-NEXT: s_waitcnt lgkmcnt(0) ; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s8, s2 ; GFX89-NEXT: s_mov_b32 s8, s2
; GFX89-NEXT: s_bcnt1_i32_b64 s2, s[6:7] ; GFX89-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX89-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX89-NEXT: s_mul_i32 s2, s2, 5 ; GFX89-NEXT: s_mul_i32 s2, s2, 5
; GFX89-NEXT: s_mov_b32 s11, 0xf000 ; GFX89-NEXT: s_mov_b32 s11, 0xf000
; GFX89-NEXT: s_mov_b32 s10, -1 ; GFX89-NEXT: s_mov_b32 s10, -1
; GFX89-NEXT: s_mov_b32 s9, s3 ; GFX89-NEXT: s_mov_b32 s9, s3
; GFX89-NEXT: v_mov_b32_e32 v1, s2 ; GFX89-NEXT: v_mov_b32_e32 v1, s2
; GFX89-NEXT: v_mov_b32_e32 v2, 0
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX89-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc ; GFX89-NEXT: buffer_atomic_add_x2 v[1:2], off, s[8:11], 0 glc
; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_waitcnt vmcnt(0)
@ -687,10 +687,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN64-NEXT: s_cbranch_execz BB3_2 ; GCN64-NEXT: s_cbranch_execz BB3_2
; GCN64-NEXT: ; %bb.1: ; GCN64-NEXT: ; %bb.1:
; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7] ; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN64-NEXT: v_mov_b32_e32 v2, 0
; GCN64-NEXT: s_mul_i32 s6, s6, 5
; GCN64-NEXT: s_mov_b32 s11, 0x31016000 ; GCN64-NEXT: s_mov_b32 s11, 0x31016000
; GCN64-NEXT: s_mul_i32 s7, s6, 5 ; GCN64-NEXT: v_mov_b32_e32 v1, s6
; GCN64-NEXT: v_mul_hi_u32_u24_e64 v2, s6, 5
; GCN64-NEXT: v_mov_b32_e32 v1, s7
; GCN64-NEXT: s_mov_b32 s10, -1 ; GCN64-NEXT: s_mov_b32 s10, -1
; GCN64-NEXT: s_waitcnt lgkmcnt(0) ; GCN64-NEXT: s_waitcnt lgkmcnt(0)
; GCN64-NEXT: s_mov_b32 s8, s2 ; GCN64-NEXT: s_mov_b32 s8, s2
@ -724,10 +724,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN32-NEXT: s_cbranch_execz BB3_2 ; GCN32-NEXT: s_cbranch_execz BB3_2
; GCN32-NEXT: ; %bb.1: ; GCN32-NEXT: ; %bb.1:
; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5 ; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5
; GCN32-NEXT: v_mov_b32_e32 v2, 0
; GCN32-NEXT: s_mul_i32 s5, s5, 5
; GCN32-NEXT: s_mov_b32 s11, 0x31016000 ; GCN32-NEXT: s_mov_b32 s11, 0x31016000
; GCN32-NEXT: s_mul_i32 s6, s5, 5 ; GCN32-NEXT: v_mov_b32_e32 v1, s5
; GCN32-NEXT: v_mul_hi_u32_u24_e64 v2, s5, 5
; GCN32-NEXT: v_mov_b32_e32 v1, s6
; GCN32-NEXT: s_mov_b32 s10, -1 ; GCN32-NEXT: s_mov_b32 s10, -1
; GCN32-NEXT: s_waitcnt lgkmcnt(0) ; GCN32-NEXT: s_waitcnt lgkmcnt(0)
; GCN32-NEXT: s_mov_b32 s8, s2 ; GCN32-NEXT: s_mov_b32 s8, s2
@ -1700,14 +1700,14 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX7LESS-NEXT: s_cbranch_execz BB9_2 ; GFX7LESS-NEXT: s_cbranch_execz BB9_2
; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s11, 0xf000
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: s_mul_i32 s6, s6, 5
; GFX7LESS-NEXT: s_mov_b32 s10, -1 ; GFX7LESS-NEXT: s_mov_b32 s10, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s8, s2
; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: s_mov_b32 s9, s3
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[6:7] ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6
; GFX7LESS-NEXT: s_mul_i32 s3, s2, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s3
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc ; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
@ -1742,12 +1742,12 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s8, s2 ; GFX8-NEXT: s_mov_b32 s8, s2
; GFX8-NEXT: s_bcnt1_i32_b64 s2, s[6:7] ; GFX8-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX8-NEXT: s_mul_i32 s2, s2, 5 ; GFX8-NEXT: s_mul_i32 s2, s2, 5
; GFX8-NEXT: s_mov_b32 s11, 0xf000 ; GFX8-NEXT: s_mov_b32 s11, 0xf000
; GFX8-NEXT: s_mov_b32 s10, -1 ; GFX8-NEXT: s_mov_b32 s10, -1
; GFX8-NEXT: s_mov_b32 s9, s3 ; GFX8-NEXT: s_mov_b32 s9, s3
; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc ; GFX8-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_waitcnt vmcnt(0)
@ -1781,12 +1781,12 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s8, s2 ; GFX9-NEXT: s_mov_b32 s8, s2
; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[6:7] ; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[6:7]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s2, 5
; GFX9-NEXT: s_mul_i32 s2, s2, 5 ; GFX9-NEXT: s_mul_i32 s2, s2, 5
; GFX9-NEXT: s_mov_b32 s11, 0xf000 ; GFX9-NEXT: s_mov_b32 s11, 0xf000
; GFX9-NEXT: s_mov_b32 s10, -1 ; GFX9-NEXT: s_mov_b32 s10, -1
; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: s_mov_b32 s9, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc ; GFX9-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[8:11], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_waitcnt vmcnt(0)
@ -1818,10 +1818,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN64-NEXT: s_cbranch_execz BB9_2 ; GCN64-NEXT: s_cbranch_execz BB9_2
; GCN64-NEXT: ; %bb.1: ; GCN64-NEXT: ; %bb.1:
; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7] ; GCN64-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN64-NEXT: v_mov_b32_e32 v2, 0
; GCN64-NEXT: s_mul_i32 s6, s6, 5
; GCN64-NEXT: s_mov_b32 s11, 0x31016000 ; GCN64-NEXT: s_mov_b32 s11, 0x31016000
; GCN64-NEXT: s_mul_i32 s7, s6, 5 ; GCN64-NEXT: v_mov_b32_e32 v1, s6
; GCN64-NEXT: v_mul_hi_u32_u24_e64 v2, s6, 5
; GCN64-NEXT: v_mov_b32_e32 v1, s7
; GCN64-NEXT: s_mov_b32 s10, -1 ; GCN64-NEXT: s_mov_b32 s10, -1
; GCN64-NEXT: s_waitcnt lgkmcnt(0) ; GCN64-NEXT: s_waitcnt lgkmcnt(0)
; GCN64-NEXT: s_mov_b32 s8, s2 ; GCN64-NEXT: s_mov_b32 s8, s2
@ -1858,10 +1858,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out, i64 addrspac
; GCN32-NEXT: s_cbranch_execz BB9_2 ; GCN32-NEXT: s_cbranch_execz BB9_2
; GCN32-NEXT: ; %bb.1: ; GCN32-NEXT: ; %bb.1:
; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5 ; GCN32-NEXT: s_bcnt1_i32_b32 s5, s5
; GCN32-NEXT: v_mov_b32_e32 v2, 0
; GCN32-NEXT: s_mul_i32 s5, s5, 5
; GCN32-NEXT: s_mov_b32 s11, 0x31016000 ; GCN32-NEXT: s_mov_b32 s11, 0x31016000
; GCN32-NEXT: s_mul_i32 s6, s5, 5 ; GCN32-NEXT: v_mov_b32_e32 v1, s5
; GCN32-NEXT: v_mul_hi_u32_u24_e64 v2, s5, 5
; GCN32-NEXT: v_mov_b32_e32 v1, s6
; GCN32-NEXT: s_mov_b32 s10, -1 ; GCN32-NEXT: s_mov_b32 s10, -1
; GCN32-NEXT: s_waitcnt lgkmcnt(0) ; GCN32-NEXT: s_waitcnt lgkmcnt(0)
; GCN32-NEXT: s_mov_b32 s8, s2 ; GCN32-NEXT: s_mov_b32 s8, s2

View File

@ -746,10 +746,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: s_cbranch_execz BB4_2 ; GFX7LESS-NEXT: s_cbranch_execz BB4_2
; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_mul_i32 s5, s4, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5
; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_mov_b32 m0, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2] ; GFX7LESS-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
@ -781,9 +781,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: s_cbranch_execz BB4_2 ; GFX8-NEXT: s_cbranch_execz BB4_2
; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: ; %bb.1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: s_mul_i32 s4, s4, 5 ; GFX8-NEXT: s_mul_i32 s4, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@ -815,9 +815,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB4_2 ; GFX9-NEXT: s_cbranch_execz BB4_2
; GFX9-NEXT: ; %bb.1: ; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: s_mul_i32 s4, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2] ; GFX9-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
@ -848,10 +848,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB4_2 ; GFX1064-NEXT: s_cbranch_execz BB4_2
; GFX1064-NEXT: ; %bb.1: ; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_mul_i32 s4, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_mul_i32 s5, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, s4
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v1, s5
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2] ; GFX1064-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
@ -880,10 +880,10 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB4_2 ; GFX1032-NEXT: s_cbranch_execz BB4_2
; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3 ; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_mul_i32 s3, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_mul_i32 s4, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s3
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v1, s4
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2] ; GFX1032-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
@ -1945,10 +1945,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: s_cbranch_execz BB11_2 ; GFX7LESS-NEXT: s_cbranch_execz BB11_2
; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_mul_i32 s5, s4, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5
; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_mov_b32 m0, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2] ; GFX7LESS-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
@ -1980,9 +1980,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: s_cbranch_execz BB11_2 ; GFX8-NEXT: s_cbranch_execz BB11_2
; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: ; %bb.1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: s_mul_i32 s4, s4, 5 ; GFX8-NEXT: s_mul_i32 s4, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX8-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@ -2015,9 +2015,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: s_cbranch_execz BB11_2 ; GFX9-NEXT: s_cbranch_execz BB11_2
; GFX9-NEXT: ; %bb.1: ; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: s_mul_i32 s4, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2] ; GFX9-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
@ -2049,10 +2049,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_cbranch_execz BB11_2 ; GFX1064-NEXT: s_cbranch_execz BB11_2
; GFX1064-NEXT: ; %bb.1: ; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_mul_i32 s4, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_mul_i32 s5, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, s4
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX1064-NEXT: v_mov_b32_e32 v1, s5
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2] ; GFX1064-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
@ -2084,10 +2084,10 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_cbranch_execz BB11_2 ; GFX1032-NEXT: s_cbranch_execz BB11_2
; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3 ; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: s_mul_i32 s3, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo ; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_mul_i32 s4, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s3
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
; GFX1032-NEXT: v_mov_b32_e32 v1, s4
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2] ; GFX1032-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]

View File

@ -1202,15 +1202,14 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1] ; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1]
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_hi_u32 v5, 24, v0
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3 ; GCN-NEXT: v_mul_hi_u32 v0, 24, v0
; GCN-NEXT: v_mul_hi_u32 v0, 0, v0 ; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3 ; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 0, v0
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v0, vcc ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v2, s8, v1 ; GCN-NEXT: v_mul_lo_u32 v2, s8, v1
; GCN-NEXT: v_mul_hi_u32 v3, s8, v0 ; GCN-NEXT: v_mul_hi_u32 v3, s8, v0
@ -1420,15 +1419,14 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
; GCN-NEXT: v_addc_u32_e64 v4, vcc, v4, v6, s[4:5] ; GCN-NEXT: v_addc_u32_e64 v4, vcc, v4, v6, s[4:5]
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3
; GCN-NEXT: v_mul_lo_u32 v5, v4, 24 ; GCN-NEXT: v_mul_lo_u32 v5, v4, 24
; GCN-NEXT: v_mul_hi_u32 v7, 24, v4 ; GCN-NEXT: v_mul_hi_u32 v3, 24, v3
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3 ; GCN-NEXT: v_mul_hi_u32 v6, 24, v4
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4 ; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v13, v7, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v6, vcc
; GCN-NEXT: v_add_i32_e32 v5, vcc, 0, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4 ; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v3
@ -1633,15 +1631,14 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: s_mov_b32 s4, 0x8000
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
; GCN-NEXT: v_mul_hi_u32 v5, s4, v3 ; GCN-NEXT: v_mul_hi_u32 v3, s4, v3
; GCN-NEXT: v_mul_hi_u32 v6, s4, v4 ; GCN-NEXT: v_mul_hi_u32 v5, s4, v4
; GCN-NEXT: v_lshlrev_b32_e32 v7, 15, v4 ; GCN-NEXT: v_lshlrev_b32_e32 v6, 15, v4
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4 ; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GCN-NEXT: v_add_i32_e32 v5, vcc, 0, v5 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4 ; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v3

View File

@ -1390,15 +1390,14 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1] ; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1]
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_hi_u32 v5, 24, v0
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3 ; GCN-NEXT: v_mul_hi_u32 v0, 24, v0
; GCN-NEXT: v_mul_hi_u32 v0, 0, v0 ; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3 ; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 0, v0
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v0, vcc ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v1, s8, v1 ; GCN-NEXT: v_mul_lo_u32 v1, s8, v1
; GCN-NEXT: v_mul_hi_u32 v2, s8, v0 ; GCN-NEXT: v_mul_hi_u32 v2, s8, v0
@ -1605,15 +1604,14 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v5, s[4:5] ; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v5, s[4:5]
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_hi_u32 v5, 24, v2
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v6, 24, v3 ; GCN-NEXT: v_mul_hi_u32 v2, 24, v2
; GCN-NEXT: v_mul_hi_u32 v2, 0, v2 ; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3 ; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v12, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v12, v5, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GCN-NEXT: v_add_i32_e32 v2, vcc, 0, v2
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
; GCN-NEXT: v_mul_lo_u32 v3, v0, v3 ; GCN-NEXT: v_mul_lo_u32 v3, v0, v3
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v4, v0, v2
@ -1816,15 +1814,14 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: s_mov_b32 s4, 0x8000
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_hi_u32 v4, s4, v2 ; GCN-NEXT: v_mul_hi_u32 v2, s4, v2
; GCN-NEXT: v_mul_hi_u32 v5, s4, v3 ; GCN-NEXT: v_mul_hi_u32 v4, s4, v3
; GCN-NEXT: v_lshlrev_b32_e32 v6, 15, v3 ; GCN-NEXT: v_lshlrev_b32_e32 v5, 15, v3
; GCN-NEXT: v_mul_hi_u32 v2, 0, v2
; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3 ; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v12, v5, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, 0, v2
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
; GCN-NEXT: v_mul_lo_u32 v3, v0, v3 ; GCN-NEXT: v_mul_lo_u32 v3, v0, v3
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v4, v0, v2

View File

@ -969,14 +969,14 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_mul_hi_u32 v5, v0, v6 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v6
; GCN-NEXT: v_mul_lo_u32 v7, v0, v4 ; GCN-NEXT: v_mul_lo_u32 v7, v0, v4
; GCN-NEXT: v_mul_hi_u32 v9, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v9, v0, v4
; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v8, v3, v6
; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 ; GCN-NEXT: v_mul_lo_u32 v6, v3, v6
; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4
; GCN-NEXT: v_addc_u32_e32 v7, vcc, v2, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v2, v9, vcc
; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4
; GCN-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v1, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; GCN-NEXT: v_add_i32_e64 v0, s[0:1], v0, v4 ; GCN-NEXT: v_add_i32_e64 v0, s[0:1], v0, v4
@ -999,27 +999,24 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6
; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7
; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v2, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1] ; GCN-NEXT: v_addc_u32_e64 v1, vcc, v3, v1, s[0:1]
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
; GCN-NEXT: v_mov_b32_e32 v5, s7
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v3, vcc
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v2, s6, v1 ; GCN-NEXT: v_mul_lo_u32 v3, v1, 24
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
; GCN-NEXT: v_mul_hi_u32 v1, v1, 24
; GCN-NEXT: v_mov_b32_e32 v5, s7
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v1, s7, v0
; GCN-NEXT: v_mul_hi_u32 v3, s6, v0 ; GCN-NEXT: v_mul_hi_u32 v3, s6, v0
; GCN-NEXT: v_mul_lo_u32 v4, s7, v0 ; GCN-NEXT: v_add_i32_e32 v1, vcc, v3, v1
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GCN-NEXT: v_mul_lo_u32 v3, s6, v0 ; GCN-NEXT: v_mul_lo_u32 v3, s6, v0
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
; GCN-NEXT: v_sub_i32_e32 v3, vcc, 24, v3 ; GCN-NEXT: v_sub_i32_e32 v3, vcc, 24, v3
; GCN-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v5, vcc ; GCN-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
; GCN-NEXT: v_subrev_i32_e64 v5, s[0:1], s6, v3 ; GCN-NEXT: v_subrev_i32_e64 v5, s[0:1], s6, v3
@ -1031,21 +1028,21 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v4 ; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v4
; GCN-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1]
; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0
; GCN-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] ; GCN-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v2, s[0:1]
; GCN-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0
; GCN-NEXT: v_subb_u32_e32 v2, vcc, 0, v2, vcc ; GCN-NEXT: v_addc_u32_e64 v2, s[0:1], 0, v2, s[0:1]
; GCN-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s7, v1
; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v3
; GCN-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; GCN-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s7, v2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1
; GCN-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GCN-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1]
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
; GCN-NEXT: s_endpgm ; GCN-NEXT: s_endpgm

View File

@ -9,7 +9,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd ; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd
; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1 ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, 5 ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 5, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@ -28,7 +28,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
; CHECK-NEXT: s_mov_b32 s4, 0x92492493 ; CHECK-NEXT: s_mov_b32 s4, 0x92492493
; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 2, v0
; CHECK-NEXT: v_mul_lo_u32 v0, v0, 14 ; CHECK-NEXT: v_mul_u32_u24_e32 v0, 14, v0
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@ -46,7 +46,7 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd ; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd
; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1 ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, 5 ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 5, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@ -62,10 +62,9 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x1ff, v0 ; CHECK-NEXT: v_and_b32_e32 v0, 0x1ff, v0
; CHECK-NEXT: s_mov_b32 s4, 0x2050c9f9 ; CHECK-NEXT: s_mov_b32 s4, 0x2050c9f9
; CHECK-NEXT: s_movk_i32 s5, 0x1fb
; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 6, v1 ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, s5 ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x1fb, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc

View File

@ -779,14 +779,14 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_mul_hi_u32 v5, v0, v6 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v6
; GCN-NEXT: v_mul_lo_u32 v7, v0, v4 ; GCN-NEXT: v_mul_lo_u32 v7, v0, v4
; GCN-NEXT: v_mul_hi_u32 v9, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v9, v0, v4
; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v8, v3, v6
; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 ; GCN-NEXT: v_mul_lo_u32 v6, v3, v6
; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4
; GCN-NEXT: v_addc_u32_e32 v7, vcc, v2, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v2, v9, vcc
; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4
; GCN-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v1, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; GCN-NEXT: v_add_i32_e64 v0, s[0:1], v0, v4 ; GCN-NEXT: v_add_i32_e64 v0, s[0:1], v0, v4
@ -809,27 +809,24 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6
; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7
; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; GCN-NEXT: v_addc_u32_e32 v6, vcc, v2, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v6, s[0:1] ; GCN-NEXT: v_addc_u32_e64 v1, vcc, v3, v1, s[0:1]
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v3, vcc
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v1, s6, v1 ; GCN-NEXT: v_mul_lo_u32 v3, v1, 24
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
; GCN-NEXT: v_mul_hi_u32 v1, v1, 24
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v2, v1, vcc
; GCN-NEXT: v_mul_lo_u32 v1, s7, v0
; GCN-NEXT: v_mul_hi_u32 v2, s6, v0 ; GCN-NEXT: v_mul_hi_u32 v2, s6, v0
; GCN-NEXT: v_mul_lo_u32 v3, s7, v0
; GCN-NEXT: v_mul_lo_u32 v0, s6, v0 ; GCN-NEXT: v_mul_lo_u32 v0, s6, v0
; GCN-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GCN-NEXT: v_mov_b32_e32 v3, s7 ; GCN-NEXT: v_mov_b32_e32 v3, s7
; GCN-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GCN-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-NEXT: v_sub_i32_e32 v0, vcc, 24, v0
; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc ; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s6, v0 ; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s6, v0

View File

@ -684,29 +684,16 @@ define i1 @t11() {
; ARM: @ %bb.0: @ %entry ; ARM: @ %bb.0: @ %entry
; ARM-NEXT: .pad #4 ; ARM-NEXT: .pad #4
; ARM-NEXT: sub sp, sp, #4 ; ARM-NEXT: sub sp, sp, #4
; ARM-NEXT: ldr r0, .LCPI10_0 ; ARM-NEXT: ldr r0, [sp]
; ARM-NEXT: mov r1, #33 ; ARM-NEXT: mov r1, #40960
; ARM-NEXT: umull r2, r3, r1, r0 ; ARM-NEXT: orr r1, r1, #-33554432
; ARM-NEXT: lsr r0, r3, #3
; ARM-NEXT: add r0, r0, r0, lsl #2
; ARM-NEXT: sub r0, r1, r0, lsl #1
; ARM-NEXT: ldr r1, [sp]
; ARM-NEXT: and r1, r1, #-33554432
; ARM-NEXT: orr r0, r1, r0
; ARM-NEXT: mov r1, #255
; ARM-NEXT: orr r0, r0, #40960 ; ARM-NEXT: orr r0, r0, #40960
; ARM-NEXT: orr r1, r1, #3840
; ARM-NEXT: str r0, [sp]
; ARM-NEXT: and r0, r0, r1 ; ARM-NEXT: and r0, r0, r1
; ARM-NEXT: sub r0, r0, #3 ; ARM-NEXT: orr r0, r0, #3
; ARM-NEXT: rsbs r1, r0, #0 ; ARM-NEXT: str r0, [sp]
; ARM-NEXT: adc r0, r0, r1 ; ARM-NEXT: mov r0, #1
; ARM-NEXT: add sp, sp, #4 ; ARM-NEXT: add sp, sp, #4
; ARM-NEXT: mov pc, lr ; ARM-NEXT: mov pc, lr
; ARM-NEXT: .p2align 2
; ARM-NEXT: @ %bb.1:
; ARM-NEXT: .LCPI10_0:
; ARM-NEXT: .long 3435973837 @ 0xcccccccd
; ;
; ARMT2-LABEL: t11: ; ARMT2-LABEL: t11:
; ARMT2: @ %bb.0: @ %entry ; ARMT2: @ %bb.0: @ %entry

View File

@ -44,7 +44,6 @@ define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) {
; CHECK-NEXT: lsr.l #8, %d0 ; CHECK-NEXT: lsr.l #8, %d0
; CHECK-NEXT: lsr.w #1, %d0 ; CHECK-NEXT: lsr.w #1, %d0
; CHECK-NEXT: and.l #65535, %d0 ; CHECK-NEXT: and.l #65535, %d0
; CHECK-NEXT: and.l #255, %d0
; CHECK-NEXT: rts ; CHECK-NEXT: rts
entry: entry:
%div = udiv i8 %c, 3 %div = udiv i8 %c, 3

View File

@ -88,9 +88,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
; PPC64LE-NEXT: clrlwi 3, 3, 28 ; PPC64LE-NEXT: clrlwi 3, 3, 28
; PPC64LE-NEXT: ori 4, 4, 52429 ; PPC64LE-NEXT: ori 4, 4, 52429
; PPC64LE-NEXT: mulhwu 4, 3, 4 ; PPC64LE-NEXT: mulhwu 4, 3, 4
; PPC64LE-NEXT: rlwinm 5, 4, 0, 0, 29
; PPC64LE-NEXT: srwi 4, 4, 2 ; PPC64LE-NEXT: srwi 4, 4, 2
; PPC64LE-NEXT: add 4, 4, 5 ; PPC64LE-NEXT: rlwimi 4, 4, 2, 28, 29
; PPC64LE-NEXT: sub 3, 3, 4 ; PPC64LE-NEXT: sub 3, 3, 4
; PPC64LE-NEXT: cntlzw 3, 3 ; PPC64LE-NEXT: cntlzw 3, 3
; PPC64LE-NEXT: not 3, 3 ; PPC64LE-NEXT: not 3, 3

View File

@ -551,13 +551,13 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.u8 r0, q1[1] ; CHECK-NEXT: vmov.u8 r0, q1[3]
; CHECK-NEXT: vmov.u8 r1, q1[0] ; CHECK-NEXT: vmov.u8 r1, q1[2]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0 ; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u8 r1, q0[1] ; CHECK-NEXT: vmov.u8 r1, q0[3]
; CHECK-NEXT: vmov.u8 r2, q0[0] ; CHECK-NEXT: vmov.u8 r2, q0[2]
; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vmov q4[2], q4[0], r2, r1 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1
; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q3, q3, q2
@ -566,55 +566,53 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: vmov r1, s18 ; CHECK-NEXT: vmov r1, s18
; CHECK-NEXT: vmov r2, s12 ; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r3, s16 ; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: umull r12, r1, r1, r0
; CHECK-NEXT: vmov.u8 r0, q1[2]
; CHECK-NEXT: umull r2, r3, r3, r2
; CHECK-NEXT: orr.w lr, r3, r1
; CHECK-NEXT: vmov.u8 r3, q1[3]
; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
; CHECK-NEXT: vmov.u8 r3, q0[3]
; CHECK-NEXT: vmov.u8 r1, q0[2]
; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vmov q4[2], q4[0], r1, r3
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov r1, s18
; CHECK-NEXT: add r2, r12
; CHECK-NEXT: vmov r4, s16
; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: umull r0, r1, r1, r0
; CHECK-NEXT: umull r3, r4, r4, r3 ; CHECK-NEXT: umull r2, r3, r3, r2
; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
; CHECK-NEXT: vmov q3[3], q3[1], r4, r1 ; CHECK-NEXT: vmov.u8 r0, q1[0]
; CHECK-NEXT: vmov.u8 r4, q0[4] ; CHECK-NEXT: vmov q3[3], q3[1], r3, r1
; CHECK-NEXT: vmov r3, s12 ; CHECK-NEXT: vmov.u8 r3, q1[1]
; CHECK-NEXT: vmov r0, s13 ; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
; CHECK-NEXT: adds r2, r2, r3 ; CHECK-NEXT: vmov.u8 r3, q0[1]
; CHECK-NEXT: vmov.u8 r2, q0[0]
; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vand q5, q5, q2
; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov r2, s20
; CHECK-NEXT: vmov r4, s22
; CHECK-NEXT: vmov lr, s12
; CHECK-NEXT: vmov r12, s13
; CHECK-NEXT: umull r0, r2, r2, r0
; CHECK-NEXT: smlabb r0, r4, r3, r0
; CHECK-NEXT: vmov r3, s14 ; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: adc.w r0, r0, lr ; CHECK-NEXT: vmov.u8 r4, q0[4]
; CHECK-NEXT: adds r2, r2, r3 ; CHECK-NEXT: adds.w r0, r0, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w r12, r0, r3
; CHECK-NEXT: vmov.u8 r3, q1[4] ; CHECK-NEXT: vmov.u8 r3, q1[4]
; CHECK-NEXT: adc.w r12, r0, r1 ; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: vmov.u8 r1, q1[5] ; CHECK-NEXT: vmov.u8 r2, q1[5]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r1 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
; CHECK-NEXT: vmov.u8 r3, q0[5] ; CHECK-NEXT: vmov.u8 r3, q0[5]
; CHECK-NEXT: vmov q4[2], q4[0], r4, r3 ; CHECK-NEXT: vmov q4[2], q4[0], r4, r3
; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r1, s14 ; CHECK-NEXT: vmov r2, s14
; CHECK-NEXT: vmov r3, s18 ; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov r4, s12 ; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: umull r1, r3, r3, r1 ; CHECK-NEXT: umull r2, r3, r3, r2
; CHECK-NEXT: umull r0, r4, r0, r4 ; CHECK-NEXT: umull r0, r4, r0, r4
; CHECK-NEXT: vmov q3[2], q3[0], r0, r1 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r2
; CHECK-NEXT: vmov q3[3], q3[1], r4, r3 ; CHECK-NEXT: vmov q3[3], q3[1], r4, r3
; CHECK-NEXT: vmov.u8 r4, q0[6] ; CHECK-NEXT: vmov.u8 r4, q0[6]
; CHECK-NEXT: vmov r1, s12 ; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r0, s13 ; CHECK-NEXT: vmov r0, s13
; CHECK-NEXT: adds r1, r1, r2 ; CHECK-NEXT: adds.w r2, r2, r12
; CHECK-NEXT: vmov r2, s14 ; CHECK-NEXT: adcs r0, r1
; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: adds r1, r1, r2 ; CHECK-NEXT: adds r1, r1, r2
; CHECK-NEXT: vmov.u8 r2, q1[7] ; CHECK-NEXT: vmov.u8 r2, q1[7]
; CHECK-NEXT: adc.w r12, r0, r3 ; CHECK-NEXT: adc.w r12, r0, r3
@ -722,7 +720,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s2 ; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: umlal r0, r1, r3, r2 ; CHECK-NEXT: umlal r0, r1, r3, r2
; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: pop {r4, pc}
entry: entry:
%xx = zext <16 x i8> %x to <16 x i64> %xx = zext <16 x i8> %x to <16 x i64>
@ -1466,58 +1464,56 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.u8 r2, q1[1] ; CHECK-NEXT: vmov.u8 r2, q1[3]
; CHECK-NEXT: vmov.u8 r3, q1[0] ; CHECK-NEXT: vmov.u8 r3, q1[2]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
; CHECK-NEXT: vmov.u8 r3, q0[1] ; CHECK-NEXT: vmov.u8 r3, q0[3]
; CHECK-NEXT: vmov.u8 r2, q0[0] ; CHECK-NEXT: vmov.u8 r2, q0[2]
; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vmov q4[2], q4[0], r2, r3 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r12, s14 ; CHECK-NEXT: vmov r12, s14
; CHECK-NEXT: vmov r2, s18 ; CHECK-NEXT: vmov r2, s18
; CHECK-NEXT: vmov.u8 r4, q1[2] ; CHECK-NEXT: vmov.u8 r4, q1[0]
; CHECK-NEXT: vmov r3, s16 ; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov.u8 r5, q0[2] ; CHECK-NEXT: vmov.u8 r5, q0[0]
; CHECK-NEXT: umull r12, lr, r2, r12 ; CHECK-NEXT: umull lr, r12, r2, r12
; CHECK-NEXT: vmov r2, s12 ; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: umull r2, r3, r3, r2 ; CHECK-NEXT: umull r2, r3, r2, r3
; CHECK-NEXT: orr.w lr, lr, r3 ; CHECK-NEXT: vmov q3[2], q3[0], r2, lr
; CHECK-NEXT: vmov.u8 r3, q1[3] ; CHECK-NEXT: vmov.u8 r2, q1[1]
; CHECK-NEXT: vmov q3[2], q3[0], r4, r3 ; CHECK-NEXT: vmov q4[2], q4[0], r4, r2
; CHECK-NEXT: vmov.u8 r4, q0[3] ; CHECK-NEXT: vmov.u8 r4, q0[1]
; CHECK-NEXT: vmov q4[2], q4[0], r5, r4 ; CHECK-NEXT: vmov q5[2], q5[0], r5, r4
; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r3, s14 ; CHECK-NEXT: vand q5, q5, q2
; CHECK-NEXT: vmov r4, s18 ; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: add r2, r12 ; CHECK-NEXT: vmov r4, s20
; CHECK-NEXT: vmov r5, s12 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r12
; CHECK-NEXT: vmov r6, s16 ; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: umull r3, r4, r4, r3 ; CHECK-NEXT: vmov r6, s22
; CHECK-NEXT: umull r5, r6, r6, r5 ; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov q3[2], q3[0], r5, r3 ; CHECK-NEXT: vmov lr, s13
; CHECK-NEXT: vmov.u8 r5, q1[4] ; CHECK-NEXT: umull r2, r4, r4, r2
; CHECK-NEXT: vmov q3[3], q3[1], r6, r4 ; CHECK-NEXT: smlabb r2, r6, r5, r2
; CHECK-NEXT: vmov r6, s12
; CHECK-NEXT: vmov r3, s13
; CHECK-NEXT: adds r2, r2, r6
; CHECK-NEXT: vmov r6, s14 ; CHECK-NEXT: vmov r6, s14
; CHECK-NEXT: adc.w r3, r3, lr ; CHECK-NEXT: vmov.u8 r5, q1[4]
; CHECK-NEXT: adds.w r12, r2, r6 ; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, r4, lr
; CHECK-NEXT: vmov.u8 r4, q0[4]
; CHECK-NEXT: adds.w lr, r2, r6
; CHECK-NEXT: vmov.u8 r6, q1[5] ; CHECK-NEXT: vmov.u8 r6, q1[5]
; CHECK-NEXT: vmov q3[2], q3[0], r5, r6 ; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
; CHECK-NEXT: adcs r3, r4
; CHECK-NEXT: vmov.u8 r5, q0[5] ; CHECK-NEXT: vmov.u8 r5, q0[5]
; CHECK-NEXT: vmov.u8 r4, q0[4]
; CHECK-NEXT: vmov q4[2], q4[0], r4, r5 ; CHECK-NEXT: vmov q4[2], q4[0], r4, r5
; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q3, q3, q2
; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vand q4, q4, q2
; CHECK-NEXT: vmov r6, s14 ; CHECK-NEXT: vmov r6, s14
; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov r4, s12 ; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vmov r2, s16 ; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: umull r6, r5, r5, r6 ; CHECK-NEXT: umull r6, r5, r5, r6
@ -1527,7 +1523,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vmov.u8 r4, q0[6] ; CHECK-NEXT: vmov.u8 r4, q0[6]
; CHECK-NEXT: vmov r6, s12 ; CHECK-NEXT: vmov r6, s12
; CHECK-NEXT: vmov r2, s13 ; CHECK-NEXT: vmov r2, s13
; CHECK-NEXT: adds.w r6, r6, r12 ; CHECK-NEXT: adds.w r6, r6, lr
; CHECK-NEXT: adcs r2, r3 ; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov r3, s14 ; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: adds r3, r3, r6 ; CHECK-NEXT: adds r3, r3, r6
@ -1639,7 +1635,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: umlal r3, r2, r5, r6 ; CHECK-NEXT: umlal r3, r2, r5, r6
; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: pop {r4, r5, r6, pc}
entry: entry:
%xx = zext <16 x i8> %x to <16 x i64> %xx = zext <16 x i8> %x to <16 x i64>

View File

@ -34,20 +34,20 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_27 ; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.1: ## %bb116.i ; CHECK-NEXT: ## %bb.1: ## %bb116.i
; CHECK-NEXT: je LBB0_27 ; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.2: ## %bb52.i.i ; CHECK-NEXT: ## %bb.2: ## %bb52.i.i
; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_27 ; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.3: ## %bb142.i ; CHECK-NEXT: ## %bb.3: ## %bb142.i
; CHECK-NEXT: je LBB0_27 ; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.4: ; CHECK-NEXT: ## %bb.4:
; CHECK-NEXT: movl L_.str89$non_lazy_ptr, %edi ; CHECK-NEXT: movl L_.str89$non_lazy_ptr, %edi
; CHECK-NEXT: movb $1, %bh ; CHECK-NEXT: movb $1, %bh
; CHECK-NEXT: movl $274877907, %ebp ## imm = 0x10624DD3 ; CHECK-NEXT: movl L_.str$non_lazy_ptr, %ebp
; CHECK-NEXT: jmp LBB0_5 ; CHECK-NEXT: jmp LBB0_5
; CHECK-NEXT: LBB0_23: ## %bb7806 ; CHECK-NEXT: LBB0_21: ## %bb7806
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: Ltmp16: ; CHECK-NEXT: Ltmp16:
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
@ -58,7 +58,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: LBB0_5: ## %bb3261 ; CHECK-NEXT: LBB0_5: ## %bb3261
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpl $37, 0 ; CHECK-NEXT: cmpl $37, 0
; CHECK-NEXT: jne LBB0_27 ; CHECK-NEXT: jne LBB0_25
; CHECK-NEXT: ## %bb.6: ## %bb3306 ; CHECK-NEXT: ## %bb.6: ## %bb3306
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: Ltmp0: ; CHECK-NEXT: Ltmp0:
@ -70,7 +70,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: movl 0, %eax
; CHECK-NEXT: cmpl $121, %eax ; CHECK-NEXT: cmpl $121, %eax
; CHECK-NEXT: ja LBB0_27 ; CHECK-NEXT: ja LBB0_25
; CHECK-NEXT: ## %bb.8: ## %bb3314 ; CHECK-NEXT: ## %bb.8: ## %bb3314
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4) ; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4)
@ -78,11 +78,11 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_27 ; CHECK-NEXT: jne LBB0_25
; CHECK-NEXT: ## %bb.11: ## %bb5809 ; CHECK-NEXT: ## %bb.11: ## %bb5809
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: testb %bh, %bh ; CHECK-NEXT: testb %bh, %bh
; CHECK-NEXT: je LBB0_27 ; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.12: ## %bb91.i8504 ; CHECK-NEXT: ## %bb.12: ## %bb91.i8504
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: testb $1, %bl
@ -98,10 +98,10 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_15 ; CHECK-NEXT: je LBB0_15
; CHECK-NEXT: ## %bb.17: ## %bb278.i8617 ; CHECK-NEXT: ## %bb.16: ## %bb278.i8617
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: je LBB0_19 ; CHECK-NEXT: je LBB0_18
; CHECK-NEXT: ## %bb.18: ## %bb440.i8663 ; CHECK-NEXT: ## %bb.17: ## %bb440.i8663
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: Ltmp6: ; CHECK-NEXT: Ltmp6:
; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax ; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax
@ -110,39 +110,24 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl L__ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__$non_lazy_ptr, %eax ; CHECK-NEXT: movl L__ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__$non_lazy_ptr, %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl L_.str$non_lazy_ptr, %eax ; CHECK-NEXT: movl %ebp, (%esp)
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5 ; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5
; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_ ; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_
; CHECK-NEXT: Ltmp7: ; CHECK-NEXT: Ltmp7:
; CHECK-NEXT: LBB0_19: ## %bb448.i8694 ; CHECK-NEXT: jmp LBB0_18
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_15: ## %bb187.i8591 ; CHECK-NEXT: LBB0_15: ## %bb187.i8591
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: jne LBB0_27 ; CHECK-NEXT: jne LBB0_25
; CHECK-NEXT: ## %bb.16: ## %bb265.i8606 ; CHECK-NEXT: LBB0_18: ## %invcont5814
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: imull %ebp
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: shrl $6, %edx
; CHECK-NEXT: addl %eax, %edx
; CHECK-NEXT: imull $1000, %edx, %eax ## imm = 0x3E8
; CHECK-NEXT: negl %eax
; CHECK-NEXT: LBB0_20: ## %invcont5814
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: Ltmp8: ; CHECK-NEXT: Ltmp8:
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: subl $4, %esp
; CHECK-NEXT: Ltmp9: ; CHECK-NEXT: Ltmp9:
; CHECK-NEXT: ## %bb.21: ## %invcont5831 ; CHECK-NEXT: ## %bb.19: ## %invcont5831
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: Ltmp10: ; CHECK-NEXT: Ltmp10:
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
@ -160,8 +145,8 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE ; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE
; CHECK-NEXT: Ltmp14: ; CHECK-NEXT: Ltmp14:
; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_22: ## %bb5968 ; CHECK-NEXT: LBB0_20: ## %bb5968
; CHECK-NEXT: Ltmp2: ; CHECK-NEXT: Ltmp2:
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
@ -169,7 +154,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: subl $4, %esp
; CHECK-NEXT: Ltmp3: ; CHECK-NEXT: Ltmp3:
; CHECK-NEXT: LBB0_27: ## %bb115.critedge.i ; CHECK-NEXT: LBB0_25: ## %bb115.critedge.i
; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: addl $28, %esp ; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
@ -177,15 +162,15 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalia
; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp ; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl $4 ; CHECK-NEXT: retl $4
; CHECK-NEXT: LBB0_25: ## %lpad.loopexit.split-lp ; CHECK-NEXT: LBB0_23: ## %lpad.loopexit.split-lp
; CHECK-NEXT: Ltmp15: ; CHECK-NEXT: Ltmp15:
; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_26: ## %lpad8185 ; CHECK-NEXT: LBB0_24: ## %lpad8185
; CHECK-NEXT: Ltmp12: ; CHECK-NEXT: Ltmp12:
; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_24: ## %lpad.loopexit ; CHECK-NEXT: LBB0_22: ## %lpad.loopexit
; CHECK-NEXT: Ltmp18: ; CHECK-NEXT: Ltmp18:
; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: Lfunc_end0: ; CHECK-NEXT: Lfunc_end0:
entry: entry:
br i1 %foo, label %bb116.i, label %bb115.critedge.i br i1 %foo, label %bb116.i, label %bb115.critedge.i

View File

@ -693,23 +693,20 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
define <8 x i16> @pr38477(<8 x i16> %a0) { define <8 x i16> @pr38477(<8 x i16> %a0) {
; SSE2-LABEL: pr38477: ; SSE2-LABEL: pr38477:
; SSE2: # %bb.0: ; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4957,57457,4103,16385,35545,2048,2115] ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4957,57457,4103,16385,35545,2048,2115]
; SSE2-NEXT: pmulhuw %xmm0, %xmm2 ; SSE2-NEXT: pmulhuw %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psubw %xmm2, %xmm1
; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm1
; SSE2-NEXT: paddw %xmm2, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,0,65535]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pandn %xmm1, %xmm3
; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm1
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: por %xmm3, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2 ; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: psubw %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0
; SSE2-NEXT: paddw %xmm1, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535]
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: pandn %xmm0, %xmm3
; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: por %xmm3, %xmm2
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq ; SSE2-NEXT: retq
; ;
; SSE41-LABEL: pr38477: ; SSE41-LABEL: pr38477:

View File

@ -91,13 +91,7 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: psrld $2, %xmm2 ; SSE2-NEXT: psrld $2, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] ; SSE2-NEXT: pmaddwd {{.*}}(%rip), %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
@ -113,7 +107,7 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE4-NEXT: psrld $2, %xmm2 ; SSE4-NEXT: psrld $2, %xmm2
; SSE4-NEXT: pmulld {{.*}}(%rip), %xmm2 ; SSE4-NEXT: pmaddwd {{.*}}(%rip), %xmm2
; SSE4-NEXT: psubd %xmm2, %xmm0 ; SSE4-NEXT: psubd %xmm2, %xmm0
; SSE4-NEXT: pxor %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm1, %xmm1
; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
@ -130,8 +124,7 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] ; AVX2-NEXT: vpmaddwd {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@ -156,19 +149,12 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: psrld $1, %xmm1 ; SSE2-NEXT: psrld $2, %xmm1
; SSE2-NEXT: psrld $2, %xmm2 ; SSE2-NEXT: psrld $1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm1[1,2]
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,3,1]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [3,5,6,9] ; SSE2-NEXT: pmaddwd {{.*}}(%rip), %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] ; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: pmuludq %xmm3, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE2-NEXT: pmuludq %xmm4, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: retq ; SSE2-NEXT: retq
@ -187,7 +173,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-NEXT: psrld $2, %xmm2 ; SSE4-NEXT: psrld $2, %xmm2
; SSE4-NEXT: psrld $1, %xmm1 ; SSE4-NEXT: psrld $1, %xmm1
; SSE4-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7] ; SSE4-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
; SSE4-NEXT: pmulld {{.*}}(%rip), %xmm1 ; SSE4-NEXT: pmaddwd {{.*}}(%rip), %xmm1
; SSE4-NEXT: psubd %xmm1, %xmm0 ; SSE4-NEXT: psubd %xmm1, %xmm0
; SSE4-NEXT: pxor %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm1, %xmm1
; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
@ -204,7 +190,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmaddwd {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@ -292,13 +278,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: psrld $2, %xmm2 ; SSE2-NEXT: psrld $2, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] ; SSE2-NEXT: pmaddwd {{.*}}(%rip), %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
@ -314,7 +294,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE4-NEXT: psrld $2, %xmm2 ; SSE4-NEXT: psrld $2, %xmm2
; SSE4-NEXT: pmulld {{.*}}(%rip), %xmm2 ; SSE4-NEXT: pmaddwd {{.*}}(%rip), %xmm2
; SSE4-NEXT: psubd %xmm2, %xmm0 ; SSE4-NEXT: psubd %xmm2, %xmm0
; SSE4-NEXT: pxor %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm1, %xmm1
; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
@ -331,8 +311,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] ; AVX2-NEXT: vpmaddwd {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0

View File

@ -15,13 +15,13 @@ define i64 @func() nounwind {
; X64-NEXT: movl $2, %ecx ; X64-NEXT: movl $2, %ecx
; X64-NEXT: movl $3, %eax ; X64-NEXT: movl $3, %eax
; X64-NEXT: imulq %rcx ; X64-NEXT: imulq %rcx
; X64-NEXT: shrdq $2, %rdx, %rax
; X64-NEXT: cmpq $1, %rdx ; X64-NEXT: cmpq $1, %rdx
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF ; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovgq %rcx, %rax ; X64-NEXT: movl $1, %ecx
; X64-NEXT: cmovgq %rax, %rcx
; X64-NEXT: cmpq $-2, %rdx ; X64-NEXT: cmpq $-2, %rdx
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-NEXT: cmovlq %rcx, %rax ; X64-NEXT: cmovgeq %rcx, %rax
; X64-NEXT: retq ; X64-NEXT: retq
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 3, i64 2, i32 2) %tmp = call i64 @llvm.smul.fix.sat.i64(i64 3, i64 2, i32 2)
ret i64 %tmp ret i64 %tmp
@ -51,12 +51,12 @@ define i64 @func3() nounwind {
; X64-NEXT: movl $2, %edx ; X64-NEXT: movl $2, %edx
; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rax
; X64-NEXT: imulq %rdx ; X64-NEXT: imulq %rdx
; X64-NEXT: shrdq $2, %rdx, %rax
; X64-NEXT: cmpq $1, %rdx ; X64-NEXT: cmpq $1, %rdx
; X64-NEXT: cmovgq %rcx, %rax ; X64-NEXT: movabsq $4611686018427387903, %rsi # imm = 0x3FFFFFFFFFFFFFFF
; X64-NEXT: cmovgq %rcx, %rsi
; X64-NEXT: cmpq $-2, %rdx ; X64-NEXT: cmpq $-2, %rdx
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-NEXT: cmovlq %rcx, %rax ; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: retq ; X64-NEXT: retq
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 2) %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 2)
ret i64 %tmp ret i64 %tmp
@ -69,12 +69,12 @@ define i64 @func4() nounwind {
; X64-NEXT: movl $2, %edx ; X64-NEXT: movl $2, %edx
; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rax
; X64-NEXT: imulq %rdx ; X64-NEXT: imulq %rdx
; X64-NEXT: shrdq $32, %rdx, %rax
; X64-NEXT: cmpq $2147483647, %rdx # imm = 0x7FFFFFFF ; X64-NEXT: cmpq $2147483647, %rdx # imm = 0x7FFFFFFF
; X64-NEXT: cmovgq %rcx, %rax ; X64-NEXT: movl $4294967295, %esi # imm = 0xFFFFFFFF
; X64-NEXT: cmovgq %rcx, %rsi
; X64-NEXT: cmpq $-2147483648, %rdx # imm = 0x80000000 ; X64-NEXT: cmpq $-2147483648, %rdx # imm = 0x80000000
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-NEXT: cmovlq %rcx, %rax ; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: retq ; X64-NEXT: retq
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 32) %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 32)
ret i64 %tmp ret i64 %tmp
@ -87,14 +87,14 @@ define i64 @func5() nounwind {
; X64-NEXT: movl $2, %edx ; X64-NEXT: movl $2, %edx
; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rax
; X64-NEXT: imulq %rdx ; X64-NEXT: imulq %rdx
; X64-NEXT: shrdq $63, %rdx, %rax ; X64-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
; X64-NEXT: movabsq $4611686018427387903, %rsi # imm = 0x3FFFFFFFFFFFFFFF ; X64-NEXT: cmpq %rax, %rdx
; X64-NEXT: cmpq %rsi, %rdx ; X64-NEXT: movl $1, %esi
; X64-NEXT: cmovgq %rcx, %rax ; X64-NEXT: cmovgq %rcx, %rsi
; X64-NEXT: movabsq $-4611686018427387904, %rcx # imm = 0xC000000000000000 ; X64-NEXT: movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000
; X64-NEXT: cmpq %rcx, %rdx ; X64-NEXT: cmpq %rax, %rdx
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-NEXT: cmovlq %rcx, %rax ; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: retq ; X64-NEXT: retq
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 63) %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 63)
ret i64 %tmp ret i64 %tmp

View File

@ -77,11 +77,11 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
; X64-LABEL: func3: ; X64-LABEL: func3:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $15, %esi ; X64-NEXT: andb $15, %al
; X64-NEXT: andl $15, %eax ; X64-NEXT: andb $15, %sil
; X64-NEXT: imull %esi, %eax
; X64-NEXT: shrb $2, %al
; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %sil
; X64-NEXT: shrb $2, %al
; X64-NEXT: retq ; X64-NEXT: retq
; ;
; X86-LABEL: func3: ; X86-LABEL: func3:
@ -90,11 +90,8 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
; X86-NEXT: andb $15, %al ; X86-NEXT: andb $15, %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: andb $15, %cl ; X86-NEXT: andb $15, %cl
; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: mulb %cl
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: imull %ecx, %eax
; X86-NEXT: shrb $2, %al ; X86-NEXT: shrb $2, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl ; X86-NEXT: retl
%tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2) %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2)
ret i4 %tmp ret i4 %tmp