1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.

We're trying to match a few pointer computation patterns here for
re-association opportunities.
1) Isolating a constant operand to be on the RHS, e.g.:
   G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)

2) Folding two constants in each sub-tree as long as such folding
   doesn't break a legal addressing mode.
   G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)

AArch64 code size improvements on CTMark with -Os:
Program              before  after   diff
 pairlocalalign      251048  251044 -0.0%
 consumer-typeset    421820  421812 -0.0%
 kc                  431348  431320 -0.0%
 SPASS               413404  413300 -0.0%
 clamscan            384396  384220 -0.0%
 tramp3d-v4          370640  370412 -0.1%
 lencod              432096  431772 -0.1%
 bullet              479400  478796 -0.1%
 sqlite3             288504  288072 -0.1%
 7zip-benchmark      573796  570768 -0.5%
 Geomean difference                 -0.1%

Differential Revision: https://reviews.llvm.org/D105069
This commit is contained in:
Amara Emerson 2021-06-26 23:36:46 -07:00
parent 56d41a008d
commit b1924533d9
4 changed files with 315 additions and 1 deletions

View File

@ -511,8 +511,14 @@ public:
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction \p MI from the function.
bool applyBuildFn(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// This variant does not erase \p MI after calling the build function.
bool applyBuildFnNoErase(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
bool matchRotateOutOfRange(MachineInstr &MI);
@ -528,6 +534,11 @@ public:
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
@ -589,6 +600,11 @@ private:
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);
/// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
/// a re-association of its operands would break an existing legal addressing
/// mode that the address computation currently represents.
bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
};
} // namespace llvm

View File

@ -645,6 +645,13 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
[{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
def reassocs : GICombineGroup<[reassoc_ptradd]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,

View File

@ -1720,6 +1720,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
if (!MaybeImmVal)
return false;
// Don't do this combine if there multiple uses of the first PTR_ADD,
// since we may be able to compute the second PTR_ADD as an immediate
// offset anyway. Folding the first offset into the second may cause us
// to go beyond the bounds of our legal addressing modes.
if (!MRI.hasOneNonDBGUse(Add2))
return false;
MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
@ -3955,6 +3962,13 @@ bool CombinerHelper::applyBuildFn(
return true;
}
bool CombinerHelper::applyBuildFnNoErase(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);
MatchInfo(Builder);
return true;
}
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@ -4129,6 +4143,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
Register Src1Reg = PtrAdd.getOperand(1).getReg();
MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
if (!Src1Def)
return false;
Register Src2Reg = PtrAdd.getOperand(2).getReg();
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
if (!C1)
return false;
auto C2 = getConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
const APInt &C1APIntVal = *C1;
const APInt &C2APIntVal = *C2;
const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
// This combine may end up running before ptrtoint/inttoptr combines
// manage to eliminate redundant conversions, so try to look through them.
MachineInstr *ConvUseMI = &UseMI;
unsigned ConvUseOpc = ConvUseMI->getOpcode();
while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
ConvUseOpc == TargetOpcode::G_PTRTOINT) {
Register DefReg = ConvUseMI->getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(DefReg))
break;
ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
}
auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
ConvUseOpc == TargetOpcode::G_STORE;
if (!LoadStore)
continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
unsigned AS =
MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
Type *AccessTy =
getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
PtrAdd.getMF()->getFunction().getContext());
const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
AccessTy, AS))
continue;
// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
AccessTy, AS))
return true;
}
return false;
}
bool CombinerHelper::matchReassocPtrAdd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
// G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
//
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
// Try to match example 1).
if (RHS->getOpcode() != TargetOpcode::G_ADD)
return false;
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
if (!C2)
return false;
MatchInfo = [=,&MI](MachineIRBuilder &B) {
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
auto NewBase =
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
Observer.changingInstr(MI);
MI.getOperand(1).setReg(NewBase.getReg(0));
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
Observer.changedInstr(MI);
};
} else {
// Try to match example 2.
Register LHSSrc1 = LHS->getOperand(1).getReg();
Register LHSSrc2 = LHS->getOperand(2).getReg();
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
if (!C1)
return false;
auto C2 = getConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(LHSSrc1);
MI.getOperand(2).setReg(NewCst.getReg(0));
Observer.changedInstr(MI);
};
}
return !reassociationCanBreakAddressingModePattern(MI);
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;

View File

@ -0,0 +1,153 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
---
name: test1_noreassoc_legal_already_new_is_illegal
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK: $w0 = COPY [[LOAD]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%2:_(s64) = G_CONSTANT i64 4777
%4:_(s64) = G_CONSTANT i64 6
%9:_(s32) = G_CONSTANT i32 0
%10:_(p0) = G_PTR_ADD %0, %2(s64)
%11:_(p0) = G_PTR_ADD %10, %4(s64)
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
...
---
name: test2_reassoc_already_legal_new_also_legal
alignment: 4
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK: $w0 = COPY [[LOAD]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%2:_(s64) = G_CONSTANT i64 10
%4:_(s64) = G_CONSTANT i64 6
%9:_(s32) = G_CONSTANT i32 0
%10:_(p0) = G_PTR_ADD %0, %2(s64)
%11:_(p0) = G_PTR_ADD %10, %4(s64)
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
...
---
name: test3_noreassoc_only_oneuse
alignment: 4
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: test3_noreassoc_only_oneuse
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
; CHECK: $w0 = COPY [[LOAD]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%10:_(s64) = G_CONSTANT i64 4783
%9:_(p0) = G_PTR_ADD %0, %10(s64)
%7:_(s32) = G_LOAD %9(p0) :: (load 4)
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
...
---
name: test4_reassoc_existing_is_already_illegal
alignment: 4
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK: $w0 = COPY [[LOAD]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%2:_(s64) = G_CONSTANT i64 17
%4:_(s64) = G_CONSTANT i64 4079
%9:_(s32) = G_CONSTANT i32 0
%10:_(p0) = G_PTR_ADD %0, %2(s64)
%11:_(p0) = G_PTR_ADD %10, %4(s64)
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
...
---
name: test5_add_on_rhs
alignment: 4
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test5_add_on_rhs
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
; CHECK: $w0 = COPY [[LOAD]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = G_CONSTANT i64 1
%3:_(s64) = G_ADD %1, %2
%4:_(p0) = G_PTR_ADD %0, %3(s64)
%7:_(s32) = G_LOAD %4(p0) :: (load 1)
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
...