1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[GlobalISel][AArch64] Combine and (lshr x, cst), mask -> ubfx x, cst, width

Also add a target hook which allows us to get around custom legalization on
AArch64.

Differential Revision: https://reviews.llvm.org/D99283
This commit is contained in:
Jessica Paquette 2021-03-24 10:52:21 -07:00
parent b2dfe60e88
commit 852a8449e7
8 changed files with 344 additions and 2 deletions

View File

@ -517,6 +517,10 @@ public:
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
/// Match: and (lshr x, cst), mask -> ubfx x, cst, width
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);

View File

@ -1793,6 +1793,12 @@ public:
/// Should be used only when getIRStackGuard returns nullptr.
virtual Function *getSSPStackGuardCheck(const Module &M) const;
/// \returns true if a constant G_UBFX is legal on the target.
virtual bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const {
return false;
}
protected:
Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
bool UseTLS) const;

View File

@ -620,6 +620,12 @@ def icmp_to_true_false_known_bits : GICombineRule<
[{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
def bitfield_extract_from_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
[{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]),
(apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>;
// FIXME: These should use the custom predicate feature once it lands.
@ -664,7 +670,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
div_rem_to_divrem, funnel_shift_combines]>;
div_rem_to_divrem, funnel_shift_combines, bitfield_extract_from_and]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and

View File

@ -3986,6 +3986,41 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
bool CombinerHelper::matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_AND);
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
TargetOpcode::G_UBFX, Ty, Ty))
return false;
int64_t AndImm, LSBImm;
Register ShiftSrc;
const unsigned Size = Ty.getScalarSizeInBits();
if (!mi_match(MI.getOperand(0).getReg(), MRI,
m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
m_ICst(AndImm))))
return false;
// The mask is a mask of the low bits iff imm & (imm+1) == 0.
auto MaybeMask = static_cast<uint64_t>(AndImm);
if (MaybeMask & (MaybeMask + 1))
return false;
// LSB must fit within the register.
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(Ty, Width);
auto LSBCst = B.buildConstant(Ty, LSBImm);
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
};
return true;
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;

View File

@ -174,7 +174,8 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
[{ return matchBitfieldExtractFromSExtInReg(*${root}, MRI, ${info}); }]),
(apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg]>;
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and]>;
def lower_vector_fcmp : GICombineRule<
(defs root:$root),

View File

@ -18179,3 +18179,8 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
return TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
}

View File

@ -1100,6 +1100,9 @@ private:
// to transition between unpacked and packed types of the same element type,
// with BITCAST used otherwise.
SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
};
namespace AArch64 {

View File

@ -0,0 +1,282 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner --aarch64postlegalizercombinerhelper-only-enable-rule="bitfield_extract_from_and" -verify-machineinstrs %s -o - | FileCheck %s
# REQUIRES: asserts
# Check that we can combine
#
# and (lshr x, cst), mask -> ubfx x, cst, width
...
---
name: ubfx_s32
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $w0
; LSB = 5
; Width = LSB + trailing_ones(255) - 1 =
; 5 + 8 - 1 = 12
; CHECK-LABEL: name: ubfx_s32
; CHECK: liveins: $w0
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %lsb:_(s32) = G_CONSTANT i32 5
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: %and:_(s32) = G_UBFX %x, %lsb(s32), [[C]]
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%lsb:_(s32) = G_CONSTANT i32 5
%mask:_(s32) = G_CONSTANT i32 255
%shift:_(s32) = G_LSHR %x, %lsb
%and:_(s32) = G_AND %shift, %mask
$w0 = COPY %and
RET_ReallyLR implicit $w0
...
---
name: ubfx_s64
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $x0
; LSB = 5
; Width = LSB + trailing_ones(1) - 1 =
; 5 + 1 - 1 = 5
; CHECK-LABEL: name: ubfx_s64
; CHECK: liveins: $x0
; CHECK: %x:_(s64) = COPY $x0
; CHECK: %lsb:_(s64) = G_CONSTANT i64 5
; CHECK: %mask:_(s64) = G_CONSTANT i64 1
; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), %mask
; CHECK: $x0 = COPY %and(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%lsb:_(s64) = G_CONSTANT i64 5
%mask:_(s64) = G_CONSTANT i64 1
%shift:_(s64) = G_LSHR %x, %lsb
%and:_(s64) = G_AND %shift, %mask
$x0 = COPY %and
RET_ReallyLR implicit $x0
...
---
name: dont_combine_no_and_cst
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $w0, $w1
; UBFX needs to be selected to UBFMWri/UBFMXri, so we need constants.
; CHECK-LABEL: name: dont_combine_no_and_cst
; CHECK: liveins: $w0, $w1
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %y:_(s32) = COPY $w1
; CHECK: %lsb:_(s32) = G_CONSTANT i32 5
; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32)
; CHECK: %and:_(s32) = G_AND %shift, %y
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%lsb:_(s32) = G_CONSTANT i32 5
%shift:_(s32) = G_LSHR %x, %lsb
%and:_(s32) = G_AND %shift, %y
$w0 = COPY %and
RET_ReallyLR implicit $w0
...
---
name: dont_combine_and_cst_not_mask
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $w0
; CHECK-LABEL: name: dont_combine_and_cst_not_mask
; CHECK: liveins: $w0
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %lsb:_(s32) = G_CONSTANT i32 5
; CHECK: %not_a_mask:_(s32) = G_CONSTANT i32 2
; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32)
; CHECK: %and:_(s32) = G_AND %shift, %not_a_mask
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%lsb:_(s32) = G_CONSTANT i32 5
%not_a_mask:_(s32) = G_CONSTANT i32 2
%shift:_(s32) = G_LSHR %x, %lsb
%and:_(s32) = G_AND %shift, %not_a_mask
$w0 = COPY %and
RET_ReallyLR implicit $w0
...
---
name: dont_combine_shift_more_than_one_use
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: dont_combine_shift_more_than_one_use
; CHECK: liveins: $x0
; CHECK: %x:_(s64) = COPY $x0
; CHECK: %lsb:_(s64) = G_CONSTANT i64 5
; CHECK: %mask:_(s64) = G_CONSTANT i64 1
; CHECK: %shift:_(s64) = G_LSHR %x, %lsb(s64)
; CHECK: %and:_(s64) = G_AND %shift, %mask
; CHECK: %sub:_(s64) = G_SUB %and, %shift
; CHECK: $x0 = COPY %sub(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%lsb:_(s64) = G_CONSTANT i64 5
%mask:_(s64) = G_CONSTANT i64 1
%shift:_(s64) = G_LSHR %x, %lsb
%and:_(s64) = G_AND %shift, %mask
%sub:_(s64) = G_SUB %and, %shift
$x0 = COPY %sub
RET_ReallyLR implicit $x0
...
---
name: dont_combine_negative_lsb
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $w0
; LSB must be in [0, reg_size)
; CHECK-LABEL: name: dont_combine_negative_lsb
; CHECK: liveins: $w0
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %negative:_(s32) = G_CONSTANT i32 -1
; CHECK: %mask:_(s32) = G_CONSTANT i32 255
; CHECK: %shift:_(s32) = G_LSHR %x, %negative(s32)
; CHECK: %and:_(s32) = G_AND %shift, %mask
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%negative:_(s32) = G_CONSTANT i32 -1
%mask:_(s32) = G_CONSTANT i32 255
%shift:_(s32) = G_LSHR %x, %negative
%and:_(s32) = G_AND %shift, %mask
$w0 = COPY %and
RET_ReallyLR implicit $w0
...
---
name: dont_combine_lsb_too_large
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $w0
; LSB must be in [0, reg_size)
; CHECK-LABEL: name: dont_combine_lsb_too_large
; CHECK: liveins: $w0
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %too_large:_(s32) = G_CONSTANT i32 32
; CHECK: %mask:_(s32) = G_CONSTANT i32 255
; CHECK: %shift:_(s32) = G_LSHR %x, %too_large(s32)
; CHECK: %and:_(s32) = G_AND %shift, %mask
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%too_large:_(s32) = G_CONSTANT i32 32
%mask:_(s32) = G_CONSTANT i32 255
%shift:_(s32) = G_LSHR %x, %too_large
%and:_(s32) = G_AND %shift, %mask
$w0 = COPY %and
RET_ReallyLR implicit $w0
...
---
name: dont_combine_vector
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $d0
; CHECK-LABEL: name: dont_combine_vector
; CHECK: liveins: $d0
; CHECK: %x:_(<2 x s32>) = COPY $d0
; CHECK: %lsb_cst:_(s32) = G_CONSTANT i32 5
; CHECK: %lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst(s32), %lsb_cst(s32)
; CHECK: %mask_cst:_(s32) = G_CONSTANT i32 255
; CHECK: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst(s32), %mask_cst(s32)
; CHECK: %shift:_(<2 x s32>) = G_LSHR %x, %lsb(<2 x s32>)
; CHECK: %and:_(<2 x s32>) = G_AND %shift, %mask
; CHECK: $d0 = COPY %and(<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
%x:_(<2 x s32>) = COPY $d0
%lsb_cst:_(s32) = G_CONSTANT i32 5
%lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst, %lsb_cst
%mask_cst:_(s32) = G_CONSTANT i32 255
%mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst, %mask_cst
%shift:_(<2 x s32>) = G_LSHR %x, %lsb
%and:_(<2 x s32>) = G_AND %shift, %mask
$d0 = COPY %and
RET_ReallyLR implicit $d0
...
---
name: max_signed_int_mask
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $x0
; mask = 0111 1111 1111 ... 1111
; mask + 1 = 1000 0000 0000 ... 0000
; CHECK-LABEL: name: max_signed_int_mask
; CHECK: liveins: $x0
; CHECK: %x:_(s64) = COPY $x0
; CHECK: %lsb:_(s64) = G_CONSTANT i64 0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), [[C]]
; CHECK: $x0 = COPY %and(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%lsb:_(s64) = G_CONSTANT i64 0
%mask:_(s64) = G_CONSTANT i64 9223372036854775807
%shift:_(s64) = G_LSHR %x, %lsb
%and:_(s64) = G_AND %shift, %mask
$x0 = COPY %and
RET_ReallyLR implicit $x0
...
---
name: max_unsigned_int_mask
tracksRegLiveness: true
legalized: true
body: |
bb.0:
liveins: $x0
; mask = 1111 1111 1111 ... 1111
; mask + 1 = 0000 0000 0000 ... 000
; CHECK-LABEL: name: max_unsigned_int_mask
; CHECK: liveins: $x0
; CHECK: %x:_(s64) = COPY $x0
; CHECK: %lsb:_(s64) = G_CONSTANT i64 5
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), [[C]]
; CHECK: $x0 = COPY %and(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%lsb:_(s64) = G_CONSTANT i64 5
%mask:_(s64) = G_CONSTANT i64 18446744073709551615
%shift:_(s64) = G_LSHR %x, %lsb
%and:_(s64) = G_AND %shift, %mask
$x0 = COPY %and
RET_ReallyLR implicit $x0