1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[GlobalISel] Start using vectors in GISelKnownBits

For vectors we consider a bit as known if it is the same for all demanded
vector elements (all elements by default). KnownBits BitWidth for vector
type is size of vector element. Add support for G_BUILD_VECTOR.
This allows combines of urem_pow2_to_mask in pre-legalizer combiner.

Differential Revision: https://reviews.llvm.org/D96122
This commit is contained in:
Petar Avramovic 2021-03-04 14:27:39 +01:00
parent 3ec9f44e35
commit 51bcd9fe00
7 changed files with 2208 additions and 323 deletions

View File

@ -129,7 +129,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
return;
}
unsigned BitWidth = DstTy.getSizeInBits();
unsigned BitWidth = DstTy.getScalarSizeInBits();
auto CacheEntry = ComputeKnownBitsCache.find(R);
if (CacheEntry != ComputeKnownBitsCache.end()) {
Known = CacheEntry->second;
@ -140,9 +140,6 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
Known = KnownBits(BitWidth); // Don't know anything
if (DstTy.isVector())
return; // TODO: Handle vectors.
// Depth may get bigger than max depth if it gets passed to a different
// GISelKnownBits object.
// This may happen when say a generic part uses a GISelKnownBits object
@ -164,6 +161,25 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
Depth);
break;
case TargetOpcode::G_BUILD_VECTOR: {
// Collect the known bits that are shared by every demanded vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
if (!DemandedElts[i])
continue;
computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
Depth + 1);
// Known bits are the values that are shared by every demanded element.
Known = KnownBits::commonBits(Known, Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
}
break;
}
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
@ -244,6 +260,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_PTR_ADD: {
if (DstTy.isVector())
break;
// G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
@ -332,6 +350,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
if (DstTy.isVector())
break;
if (TL.getBooleanContents(DstTy.isVector(),
Opcode == TargetOpcode::G_FCMP) ==
TargetLowering::ZeroOrOneBooleanContent &&
@ -369,6 +389,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_ZEXTLOAD: {
if (DstTy.isVector())
break;
// Everything above the retrieved bits is zero
Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
@ -402,6 +424,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_PTRTOINT:
if (DstTy.isVector())
break;
// Fall through and handle them the same as zext/trunc.
LLVM_FALLTHROUGH;
case TargetOpcode::G_ASSERT_ZEXT:
@ -440,6 +464,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
if (DstTy.isVector())
break;
unsigned NumOps = MI.getNumOperands();
Register SrcReg = MI.getOperand(NumOps - 1).getReg();
if (MRI.getType(SrcReg).isVector())

View File

@ -156,3 +156,74 @@ body: |
%rem:_(<2 x s16>) = G_UREM %var, %four_vec
$vgpr0 = COPY %rem
...
---
name: v_urem_v2i32_pow2k_denom
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: v_urem_v2i32_pow2k_denom
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GCN: %pow2:_(s32) = G_CONSTANT i32 4096
; GCN: %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2(s32), %pow2(s32)
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; GCN: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD %pow2_vec, [[BUILD_VECTOR]]
; GCN: %rem:_(<2 x s32>) = G_AND %var, [[ADD]]
; GCN: $vgpr0_vgpr1 = COPY %rem(<2 x s32>)
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
%pow2:_(s32) = G_CONSTANT i32 4096
%pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2(s32), %pow2(s32)
%rem:_(<2 x s32>) = G_UREM %var, %pow2_vec
$vgpr0_vgpr1 = COPY %rem
...
---
name: v_urem_v2i32_pow2k_not_splat_denom
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: v_urem_v2i32_pow2k_not_splat_denom
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GCN: %pow2_1:_(s32) = G_CONSTANT i32 4096
; GCN: %pow2_2:_(s32) = G_CONSTANT i32 2048
; GCN: %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32)
; GCN: %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec
; GCN: $vgpr0_vgpr1 = COPY %rem(<2 x s32>)
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
%pow2_1:_(s32) = G_CONSTANT i32 4096
%pow2_2:_(s32) = G_CONSTANT i32 2048
%pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32)
%rem:_(<2 x s32>) = G_UREM %var, %pow2_vec
$vgpr0_vgpr1 = COPY %rem
...
---
name: v_urem_v2i64_pow2k_denom
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GCN-LABEL: name: v_urem_v2i64_pow2k_denom
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GCN: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GCN: %pow2:_(s64) = G_CONSTANT i64 4096
; GCN: %pow2_vec:_(<2 x s64>) = G_BUILD_VECTOR %pow2(s64), %pow2(s64)
; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; GCN: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD %pow2_vec, [[BUILD_VECTOR]]
; GCN: %rem:_(<2 x s64>) = G_AND %var, [[ADD]]
; GCN: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem(<2 x s64>)
%var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%pow2:_(s64) = G_CONSTANT i64 4096
%pow2_vec:_(<2 x s64>) = G_BUILD_VECTOR %pow2(s64), %pow2(s64)
%rem:_(<2 x s64>) = G_UREM %var, %pow2_vec
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem
...

View File

@ -215,45 +215,13 @@ define i32 @v_urem_i32_pow2k_denom(i32 %num) {
}
define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_movk_i32 s4, 0x1000
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4
; GISEL-NEXT: s_sub_i32 s5, 0, s4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2
; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2
; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i32_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_add_i32 s4, 0x1000, -1
; CGP-NEXT: v_and_b32_e32 v0, s4, v0
; CGP-NEXT: v_and_b32_e32 v1, s4, v1
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_add_i32 s4, 0x1000, -1
; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
; CHECK-NEXT: v_and_b32_e32 v1, s4, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
}

View File

@ -962,286 +962,25 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
}
define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_movk_i32 s10, 0x1000
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10
; GISEL-NEXT: s_sub_u32 s8, 0, s10
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, v4
; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s9, 0, 0
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
; GISEL-NEXT: s_sub_u32 s11, 0, s10
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
; GISEL-NEXT: v_trunc_f32_e32 v6, v6
; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_trunc_f32_e32 v7, v7
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s6, 0, 0
; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7
; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10
; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10
; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13
; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13
; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8
; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9
; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16
; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4
; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9
; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10
; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8
; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8
; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9
; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19
; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12
; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15
; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11
; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19
; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11
; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11
; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11
; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12
; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12
; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12
; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18
; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12
; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14
; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11
; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4
; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5
; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6
; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6
; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6
; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7
; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7
; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7
; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4
; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5
; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5
; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6
; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5
; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7]
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8
; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc
; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9
; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc
; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i64_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: s_add_u32 s5, s4, -1
; CGP-NEXT: s_cselect_b32 s6, 1, 0
; CGP-NEXT: s_and_b32 s6, s6, 1
; CGP-NEXT: s_cmp_lg_u32 s6, 0
; CGP-NEXT: s_addc_u32 s6, 0, -1
; CGP-NEXT: s_add_u32 s4, s4, -1
; CGP-NEXT: s_cselect_b32 s7, 1, 0
; CGP-NEXT: v_and_b32_e32 v0, s5, v0
; CGP-NEXT: s_and_b32 s5, s7, 1
; CGP-NEXT: v_and_b32_e32 v1, s6, v1
; CGP-NEXT: s_cmp_lg_u32 s5, 0
; CGP-NEXT: s_addc_u32 s5, 0, -1
; CGP-NEXT: v_and_b32_e32 v2, s4, v2
; CGP-NEXT: v_and_b32_e32 v3, s5, v3
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s4, 0x1000
; CHECK-NEXT: s_add_u32 s5, s4, -1
; CHECK-NEXT: s_cselect_b32 s6, 1, 0
; CHECK-NEXT: s_and_b32 s6, s6, 1
; CHECK-NEXT: s_cmp_lg_u32 s6, 0
; CHECK-NEXT: s_addc_u32 s6, 0, -1
; CHECK-NEXT: s_add_u32 s4, s4, -1
; CHECK-NEXT: s_cselect_b32 s7, 1, 0
; CHECK-NEXT: v_and_b32_e32 v0, s5, v0
; CHECK-NEXT: s_and_b32 s5, s7, 1
; CHECK-NEXT: v_and_b32_e32 v1, s6, v1
; CHECK-NEXT: s_cmp_lg_u32 s5, 0
; CHECK-NEXT: s_addc_u32 s5, 0, -1
; CHECK-NEXT: v_and_b32_e32 v2, s4, v2
; CHECK-NEXT: v_and_b32_e32 v3, s5, v3
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
}

View File

@ -20,5 +20,6 @@ add_llvm_unittest(GlobalISelTests
GISelMITest.cpp
PatternMatchTest.cpp
KnownBitsTest.cpp
KnownBitsVectorTest.cpp
GISelUtilsTest.cpp
)

View File

@ -259,7 +259,106 @@ TEST_F(AArch64GISelMITest, TestKnownBitsPtrToIntViceVersa) {
EXPECT_EQ(256u, Res.One.getZExtValue());
EXPECT_EQ(0xfffffeffu, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsAND) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 52
%mask1:_(s8) = G_CONSTANT i8 10
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 32
%mask3:_(s8) = G_CONSTANT i8 24
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%and:_(s8) = G_AND %val0, %val1
%copy_and:_(s8) = COPY %and
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// 00??1?10
// & 00?11000
// = 00??1000
EXPECT_EQ(0x08u, Res.One.getZExtValue());
EXPECT_EQ(0xC7u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsOR) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 52
%mask1:_(s8) = G_CONSTANT i8 10
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 32
%mask3:_(s8) = G_CONSTANT i8 24
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%or:_(s8) = G_OR %val0, %val1
%copy_or:_(s8) = COPY %or
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// 00??1?10
// | 00?11000
// = 00?11?10
EXPECT_EQ(0x1Au, Res.One.getZExtValue());
EXPECT_EQ(0xC1u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsXOR) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 52
%mask1:_(s8) = G_CONSTANT i8 10
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 32
%mask3:_(s8) = G_CONSTANT i8 24
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%xor:_(s8) = G_XOR %val0, %val1
%copy_xor:_(s8) = COPY %xor
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// Xor KnowBits does not track if we are doing xor of unknown bit with itself
// or negated itself.
// 00??1?10
// ^ 00?11000
// = 00??0?10
EXPECT_EQ(0x02u, Res.One.getZExtValue());
EXPECT_EQ(0xC9u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsXORConstant) {
StringRef MIRString = " %3:_(s8) = G_CONSTANT i8 4\n"
" %4:_(s8) = G_CONSTANT i8 7\n"
" %5:_(s8) = G_XOR %3, %4\n"
@ -276,6 +375,299 @@ TEST_F(AArch64GISelMITest, TestKnownBitsXOR) {
EXPECT_EQ(252u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsASHR) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 38
%mask1:_(s8) = G_CONSTANT i8 202
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%cst0:_(s8) = G_CONSTANT i8 2
%ashr0:_(s8) = G_ASHR %val0, %cst0
%copy_ashr0:_(s8) = COPY %ashr0
%mask2:_(s8) = G_CONSTANT i8 204
%mask3:_(s8) = G_CONSTANT i8 18
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%ashr1:_(s8) = G_ASHR %val1, %cst0
%copy_ashr1:_(s8) = COPY %ashr1
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 2];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res0 = Info.getKnownBits(SrcReg0);
// 11?01??0 >> 2
// = 1111?01?
EXPECT_EQ(0xF2u, Res0.One.getZExtValue());
EXPECT_EQ(0x04u, Res0.Zero.getZExtValue());
Register CopyReg1 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy1 = MRI->getVRegDef(CopyReg1);
Register SrcReg1 = FinalCopy1->getOperand(1).getReg();
KnownBits Res1 = Info.getKnownBits(SrcReg1);
// ??01??10 >> 2
// = ????01??
EXPECT_EQ(0x04u, Res1.One.getZExtValue());
EXPECT_EQ(0x08u, Res1.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsLSHR) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 38
%mask1:_(s8) = G_CONSTANT i8 202
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%cst0:_(s8) = G_CONSTANT i8 2
%lshr0:_(s8) = G_LSHR %val0, %cst0
%copy_lshr0:_(s8) = COPY %lshr0
%mask2:_(s8) = G_CONSTANT i8 204
%mask3:_(s8) = G_CONSTANT i8 18
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%lshr1:_(s8) = G_LSHR %val1, %cst0
%copy_lshr1:_(s8) = COPY %lshr1
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 2];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res0 = Info.getKnownBits(SrcReg0);
// 11?01??0 >> 2
// = 0011?01?
EXPECT_EQ(0x32u, Res0.One.getZExtValue());
EXPECT_EQ(0xC4u, Res0.Zero.getZExtValue());
Register CopyReg1 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy1 = MRI->getVRegDef(CopyReg1);
Register SrcReg1 = FinalCopy1->getOperand(1).getReg();
KnownBits Res1 = Info.getKnownBits(SrcReg1);
// ??01??10 >> 2
// = 00??01??
EXPECT_EQ(0x04u, Res1.One.getZExtValue());
EXPECT_EQ(0xC8u, Res1.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsSHL) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 51
%mask1:_(s8) = G_CONSTANT i8 72
%tmp:_(s8) = G_AND %unknown, %mask0
%val:_(s8) = G_OR %tmp, %mask1
%cst:_(s8) = G_CONSTANT i8 3
%shl:_(s8) = G_SHL %val, %cst
%copy_shl:_(s8) = COPY %shl
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// 01??10?? << 3
// = ?10??000
EXPECT_EQ(0x40u, Res.One.getZExtValue());
EXPECT_EQ(0x27u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsADD) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s16) = G_LOAD %ptr(p0) :: (load 2)
%mask0:_(s16) = G_CONSTANT i16 4642
%mask1:_(s16) = G_CONSTANT i16 9536
%tmp0:_(s16) = G_AND %unknown, %mask0
%val0:_(s16) = G_OR %tmp0, %mask1
%mask2:_(s16) = G_CONSTANT i16 4096
%mask3:_(s16) = G_CONSTANT i16 371
%tmp1:_(s16) = G_AND %unknown, %mask2
%val1:_(s16) = G_OR %tmp1, %mask3
%add:_(s16) = G_ADD %val0, %val1
%copy_add:_(s16) = COPY %add
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// Add KnowBits works out known carry bits first and then calculates result.
// 001?01?101?000?0
// + 000?000101110011
// = 0??????01??10??1
EXPECT_EQ(0x0091u, Res.One.getZExtValue());
EXPECT_EQ(0x8108u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsSUB) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s16) = G_LOAD %ptr(p0) :: (load 2)
%mask0:_(s16) = G_CONSTANT i16 4642
%mask1:_(s16) = G_CONSTANT i16 9536
%tmp0:_(s16) = G_AND %unknown, %mask0
%val0:_(s16) = G_OR %tmp0, %mask1
%mask2:_(s16) = G_CONSTANT i16 4096
%mask3:_(s16) = G_CONSTANT i16 371
%tmp1:_(s16) = G_AND %unknown, %mask2
%val1:_(s16) = G_OR %tmp1, %mask3
%sub:_(s16) = G_SUB %val0, %val1
%copy_sub:_(s16) = COPY %sub
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// Sub KnowBits for LHS - RHS use Add KnownBits for LHS + ~RHS + 1.
EXPECT_EQ(0x01CDu, Res.One.getZExtValue());
EXPECT_EQ(0xC810u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsMUL) {
StringRef MIRString = R"(
%ptr0:_(p0) = G_IMPLICIT_DEF
%load0:_(s16) = G_LOAD %ptr0(p0) :: (load 2)
%mask0:_(s16) = G_CONSTANT i16 4
%mask1:_(s16) = G_CONSTANT i16 18
%tmp:_(s16) = G_AND %load0, %mask0
%val0:_(s16) = G_OR %tmp, %mask1
%cst:_(s16) = G_CONSTANT i16 12
%mul:_(s16) = G_MUL %val0, %cst
%copy_mul:_(s16) = COPY %mul
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// Mul KnowBits are conservatively correct, but not guaranteed to be precise.
// Precise for trailing bits up to the first unknown bit.
// 00010?10 * 00001100 =
// 00010?1000
// + 00010?10000
// = 0000000010??1000
// KB 0000000?????1000
EXPECT_EQ(0x0008u, Res.One.getZExtValue());
EXPECT_EQ(0xFE07u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsICMP) {
StringRef MIRString = R"(
%cst0:_(s32) = G_CONSTANT i32 0
%cst1:_(s32) = G_CONSTANT i32 1
%icmp:_(s32) = G_ICMP intpred(ne), %cst0, %cst1
%copy_icmp:_(s32) = COPY %icmp
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// For targets that use 0 or 1 as icmp result in large register set high bits
// to 0, does not analyze operands/compare predicate.
EXPECT_EQ(0x00000000u, Res.One.getZExtValue());
EXPECT_EQ(0xFFFFFFFEu, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsFCMP) {
StringRef MIRString = R"(
%cst0:_(s32) = G_FCONSTANT float 0.0
%cst1:_(s32) = G_FCONSTANT float 1.0
%fcmp:_(s32) = G_FCMP floatpred(one), %cst0, %cst1
%copy_fcmp:_(s32) = COPY %fcmp
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// For targets that use 0 or 1 as fcmp result in large register set high bits
// to 0, does not analyze operands/compare predicate.
EXPECT_EQ(0x00000000u, Res.One.getZExtValue());
EXPECT_EQ(0xFFFFFFFEu, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsSelect) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 24
%mask1:_(s8) = G_CONSTANT i8 224
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 146
%mask3:_(s8) = G_CONSTANT i8 36
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%cond:_(s1) = G_CONSTANT i1 false
%select:_(s8) = G_SELECT %cond, %val0, %val1
%copy_select:_(s8) = COPY %select
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg = Copies[Copies.size() - 1];
MachineInstr *FinalCopy = MRI->getVRegDef(CopyReg);
Register SrcReg = FinalCopy->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res = Info.getKnownBits(SrcReg);
// Select KnownBits takes common bits of LHS and RHS, does not analyze
// condition operand.
// 111??000
// select ?01?01?0
// = ??1????0
EXPECT_EQ(0x20u, Res.One.getZExtValue());
EXPECT_EQ(0x01u, Res.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBits) {
StringRef MIR = " %3:_(s32) = G_TRUNC %0\n"
@ -995,6 +1387,63 @@ TEST_F(AArch64GISelMITest, TestKnownBitsBSwapBitReverse) {
EXPECT_EQ(~TestVal, BitReverseKnown.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsUMAX) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 10
%mask1:_(s8) = G_CONSTANT i8 1
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 3
%mask3:_(s8) = G_CONSTANT i8 12
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%umax0:_(s8) = G_UMAX %val0, %val1
%copy_umax0:_(s8) = COPY %umax0
%mask4:_(s8) = G_CONSTANT i8 14
%mask5:_(s8) = G_CONSTANT i8 2
%tmp3:_(s8) = G_AND %unknown, %mask4
%val3:_(s8) = G_OR %tmp3, %mask5
%mask6:_(s8) = G_CONSTANT i8 4
%mask7:_(s8) = G_CONSTANT i8 11
%tmp4:_(s8) = G_AND %unknown, %mask6
%val4:_(s8) = G_OR %tmp4, %mask7
%umax1:_(s8) = G_UMAX %val3, %val4
%copy_umax1:_(s8) = COPY %umax1
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 2];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
// Compares min/max of LHS and RHS, min uses 0 for unknown bits, max uses 1.
// If min(LHS) >= max(RHS) returns KnownBits for LHS, similar for RHS. If this
// fails tries to calculate individual bits: common bits for both operands and
// a few leading bits in some cases.
// 0000?0?1
// umax 000011??
// = 000011??
KnownBits Res0 = Info.getKnownBits(SrcReg0);
EXPECT_EQ(0x0Cu, Res0.One.getZExtValue());
EXPECT_EQ(0xF0u, Res0.Zero.getZExtValue());
Register CopyReg1 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy1 = MRI->getVRegDef(CopyReg1);
Register SrcReg1 = FinalCopy1->getOperand(1).getReg();
KnownBits Res1 = Info.getKnownBits(SrcReg1);
// 0000??10
// umax 00001?11
// = 00001?1?
EXPECT_EQ(0x0Au, Res1.One.getZExtValue());
EXPECT_EQ(0xF0u, Res1.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsUMax) {
StringRef MIRString = R"(
%val:_(s32) = COPY $w0
@ -1019,6 +1468,110 @@ TEST_F(AArch64GISelMITest, TestKnownBitsUMax) {
EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsUMIN) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 10
%mask1:_(s8) = G_CONSTANT i8 1
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 3
%mask3:_(s8) = G_CONSTANT i8 12
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%umin:_(s8) = G_UMIN %val0, %val1
%copy_umin:_(s8) = COPY %umin
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res0 = Info.getKnownBits(SrcReg0);
// Flips the range of operands: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0],
// uses umax and flips result back.
// 0000?0?1
// umin 000011??
// = 0000?0?1
EXPECT_EQ(0x01u, Res0.One.getZExtValue());
EXPECT_EQ(0xF4u, Res0.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsSMAX) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 128
%mask1:_(s8) = G_CONSTANT i8 64
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 1
%mask3:_(s8) = G_CONSTANT i8 128
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%smax:_(s8) = G_SMAX %val0, %val1
%copy_smax:_(s8) = COPY %smax
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res0 = Info.getKnownBits(SrcReg0);
// Flips the range of operands: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF],
// uses umax and flips result back.
// RHS is negative, LHS is either positive or negative with smaller abs value.
// ?1000000
// smax 1000000?
// = ?1000000
EXPECT_EQ(0x40u, Res0.One.getZExtValue());
EXPECT_EQ(0x3Fu, Res0.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestKnownBitsSMIN) {
StringRef MIRString = R"(
%ptr:_(p0) = G_IMPLICIT_DEF
%unknown:_(s8) = G_LOAD %ptr(p0) :: (load 1)
%mask0:_(s8) = G_CONSTANT i8 128
%mask1:_(s8) = G_CONSTANT i8 64
%tmp0:_(s8) = G_AND %unknown, %mask0
%val0:_(s8) = G_OR %tmp0, %mask1
%mask2:_(s8) = G_CONSTANT i8 1
%mask3:_(s8) = G_CONSTANT i8 128
%tmp1:_(s8) = G_AND %unknown, %mask2
%val1:_(s8) = G_OR %tmp1, %mask3
%smin:_(s8) = G_SMIN %val0, %val1
%copy_smin:_(s8) = COPY %smin
)";
setUp(MIRString);
if (!TM)
return;
Register CopyReg0 = Copies[Copies.size() - 1];
MachineInstr *FinalCopy0 = MRI->getVRegDef(CopyReg0);
Register SrcReg0 = FinalCopy0->getOperand(1).getReg();
GISelKnownBits Info(*MF);
KnownBits Res0 = Info.getKnownBits(SrcReg0);
// Flips the range of operands: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0],
// uses umax and flips result back.
// RHS is negative, LHS is either positive or negative with smaller abs value.
// ?1000000
// smin 1000000?
// = 1000000?
EXPECT_EQ(0x80u, Res0.One.getZExtValue());
EXPECT_EQ(0x7Eu, Res0.Zero.getZExtValue());
}
TEST_F(AArch64GISelMITest, TestInvalidQueries) {
StringRef MIRString = R"(
%src:_(s32) = COPY $w0

File diff suppressed because it is too large Load Diff