mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[GlobalISel] Add re-association combine for G_PTR_ADD to allow better addressing mode usage.
We're trying to match a few pointer computation patterns here for re-association opportunities. 1) Isolating a constant operand to be on the RHS, e.g.: G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) 2) Folding two constants in each sub-tree as long as such folding doesn't break a legal addressing mode. G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) AArch64 code size improvements on CTMark with -Os: Program before after diff pairlocalalign 251048 251044 -0.0% consumer-typeset 421820 421812 -0.0% kc 431348 431320 -0.0% SPASS 413404 413300 -0.0% clamscan 384396 384220 -0.0% tramp3d-v4 370640 370412 -0.1% lencod 432096 431772 -0.1% bullet 479400 478796 -0.1% sqlite3 288504 288072 -0.1% 7zip-benchmark 573796 570768 -0.5% Geomean difference -0.1% Differential Revision: https://reviews.llvm.org/D105069
This commit is contained in:
parent
56d41a008d
commit
b1924533d9
@ -511,8 +511,14 @@ public:
|
||||
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
|
||||
|
||||
/// Use a function which takes in a MachineIRBuilder to perform a combine.
|
||||
/// By default, it erases the instruction \p MI from the function.
|
||||
bool applyBuildFn(MachineInstr &MI,
|
||||
std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
/// Use a function which takes in a MachineIRBuilder to perform a combine.
|
||||
/// This variant does not erase \p MI after calling the build function.
|
||||
bool applyBuildFnNoErase(MachineInstr &MI,
|
||||
std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
|
||||
bool matchFunnelShiftToRotate(MachineInstr &MI);
|
||||
void applyFunnelShiftToRotate(MachineInstr &MI);
|
||||
bool matchRotateOutOfRange(MachineInstr &MI);
|
||||
@ -528,6 +534,11 @@ public:
|
||||
bool matchBitfieldExtractFromAnd(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
|
||||
/// Reassociate pointer calculations with G_ADD involved, to allow better
|
||||
/// addressing mode usage.
|
||||
bool matchReassocPtrAdd(MachineInstr &MI,
|
||||
std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
bool tryCombine(MachineInstr &MI);
|
||||
@ -589,6 +600,11 @@ private:
|
||||
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
|
||||
const SmallVector<Register, 8> &RegsToVisit,
|
||||
const unsigned MemSizeInBits);
|
||||
|
||||
/// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
|
||||
/// a re-association of its operands would break an existing legal addressing
|
||||
/// mode that the address computation currently represents.
|
||||
bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
|
@ -645,6 +645,13 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
|
||||
|
||||
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
|
||||
bitfield_extract_from_and]>;
|
||||
def reassoc_ptradd : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_PTR_ADD):$root,
|
||||
[{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
|
||||
|
||||
def reassocs : GICombineGroup<[reassoc_ptradd]>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
|
||||
mul_by_neg_one]>;
|
||||
|
||||
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
|
||||
extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
|
||||
extract_vec_elt_combines, combines_for_extload,
|
||||
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
|
||||
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
|
||||
reassocs, ptr_add_immed_chain,
|
||||
shl_ashr_to_sext_inreg, sext_inreg_of_load,
|
||||
width_reduction_combines, select_combines,
|
||||
known_bits_simplifications, ext_ext_fold,
|
||||
|
@ -1720,6 +1720,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
|
||||
if (!MaybeImmVal)
|
||||
return false;
|
||||
|
||||
// Don't do this combine if there multiple uses of the first PTR_ADD,
|
||||
// since we may be able to compute the second PTR_ADD as an immediate
|
||||
// offset anyway. Folding the first offset into the second may cause us
|
||||
// to go beyond the bounds of our legal addressing modes.
|
||||
if (!MRI.hasOneNonDBGUse(Add2))
|
||||
return false;
|
||||
|
||||
MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
|
||||
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
|
||||
return false;
|
||||
@ -3955,6 +3962,13 @@ bool CombinerHelper::applyBuildFn(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::applyBuildFnNoErase(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
MatchInfo(Builder);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
|
||||
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
@ -4129,6 +4143,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
|
||||
MachineInstr &PtrAdd) {
|
||||
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
|
||||
|
||||
Register Src1Reg = PtrAdd.getOperand(1).getReg();
|
||||
MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
|
||||
if (!Src1Def)
|
||||
return false;
|
||||
|
||||
Register Src2Reg = PtrAdd.getOperand(2).getReg();
|
||||
|
||||
if (MRI.hasOneNonDBGUse(Src1Reg))
|
||||
return false;
|
||||
|
||||
auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
|
||||
if (!C1)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(Src2Reg, MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
|
||||
const APInt &C1APIntVal = *C1;
|
||||
const APInt &C2APIntVal = *C2;
|
||||
const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
|
||||
|
||||
for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
|
||||
// This combine may end up running before ptrtoint/inttoptr combines
|
||||
// manage to eliminate redundant conversions, so try to look through them.
|
||||
MachineInstr *ConvUseMI = &UseMI;
|
||||
unsigned ConvUseOpc = ConvUseMI->getOpcode();
|
||||
while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
|
||||
ConvUseOpc == TargetOpcode::G_PTRTOINT) {
|
||||
Register DefReg = ConvUseMI->getOperand(0).getReg();
|
||||
if (!MRI.hasOneNonDBGUse(DefReg))
|
||||
break;
|
||||
ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
|
||||
}
|
||||
auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
|
||||
ConvUseOpc == TargetOpcode::G_STORE;
|
||||
if (!LoadStore)
|
||||
continue;
|
||||
// Is x[offset2] already not a legal addressing mode? If so then
|
||||
// reassociating the constants breaks nothing (we test offset2 because
|
||||
// that's the one we hope to fold into the load or store).
|
||||
TargetLoweringBase::AddrMode AM;
|
||||
AM.HasBaseReg = true;
|
||||
AM.BaseOffs = C2APIntVal.getSExtValue();
|
||||
unsigned AS =
|
||||
MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
|
||||
Type *AccessTy =
|
||||
getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
|
||||
PtrAdd.getMF()->getFunction().getContext());
|
||||
const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
|
||||
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
|
||||
AccessTy, AS))
|
||||
continue;
|
||||
|
||||
// Would x[offset1+offset2] still be a legal addressing mode?
|
||||
AM.BaseOffs = CombinedValue;
|
||||
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
|
||||
AccessTy, AS))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchReassocPtrAdd(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
|
||||
// We're trying to match a few pointer computation patterns here for
|
||||
// re-association opportunities.
|
||||
// 1) Isolating a constant operand to be on the RHS, e.g.:
|
||||
// G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
|
||||
//
|
||||
// 2) Folding two constants in each sub-tree as long as such folding
|
||||
// doesn't break a legal addressing mode.
|
||||
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
|
||||
Register Src1Reg = MI.getOperand(1).getReg();
|
||||
Register Src2Reg = MI.getOperand(2).getReg();
|
||||
MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
|
||||
MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
|
||||
|
||||
if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
|
||||
// Try to match example 1).
|
||||
if (RHS->getOpcode() != TargetOpcode::G_ADD)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
|
||||
MatchInfo = [=,&MI](MachineIRBuilder &B) {
|
||||
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
|
||||
auto NewBase =
|
||||
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(NewBase.getReg(0));
|
||||
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
} else {
|
||||
// Try to match example 2.
|
||||
Register LHSSrc1 = LHS->getOperand(1).getReg();
|
||||
Register LHSSrc2 = LHS->getOperand(2).getReg();
|
||||
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
|
||||
if (!C1)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(Src2Reg, MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
|
||||
MatchInfo = [=, &MI](MachineIRBuilder &B) {
|
||||
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(LHSSrc1);
|
||||
MI.getOperand(2).setReg(NewCst.getReg(0));
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
}
|
||||
return !reassociationCanBreakAddressingModePattern(MI);
|
||||
}
|
||||
|
||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
|
||||
if (tryCombineCopy(MI))
|
||||
return true;
|
||||
|
153
test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
Normal file
153
test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
Normal file
@ -0,0 +1,153 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
|
||||
---
|
||||
name: test1_noreassoc_legal_already_new_is_illegal
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
|
||||
; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(p0) = COPY $x0
|
||||
%2:_(s64) = G_CONSTANT i64 4777
|
||||
%4:_(s64) = G_CONSTANT i64 6
|
||||
%9:_(s32) = G_CONSTANT i32 0
|
||||
%10:_(p0) = G_PTR_ADD %0, %2(s64)
|
||||
%11:_(p0) = G_PTR_ADD %10, %4(s64)
|
||||
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
|
||||
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: test2_reassoc_already_legal_new_also_legal
|
||||
alignment: 4
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
|
||||
; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(p0) = COPY $x0
|
||||
%2:_(s64) = G_CONSTANT i64 10
|
||||
%4:_(s64) = G_CONSTANT i64 6
|
||||
%9:_(s32) = G_CONSTANT i32 0
|
||||
%10:_(p0) = G_PTR_ADD %0, %2(s64)
|
||||
%11:_(p0) = G_PTR_ADD %10, %4(s64)
|
||||
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
|
||||
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: test3_noreassoc_only_oneuse
|
||||
alignment: 4
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: test3_noreassoc_only_oneuse
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(p0) = COPY $x0
|
||||
%10:_(s64) = G_CONSTANT i64 4783
|
||||
%9:_(p0) = G_PTR_ADD %0, %10(s64)
|
||||
%7:_(s32) = G_LOAD %9(p0) :: (load 4)
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: test4_reassoc_existing_is_already_illegal
|
||||
alignment: 4
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
|
||||
; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(p0) = COPY $x0
|
||||
%2:_(s64) = G_CONSTANT i64 17
|
||||
%4:_(s64) = G_CONSTANT i64 4079
|
||||
%9:_(s32) = G_CONSTANT i32 0
|
||||
%10:_(p0) = G_PTR_ADD %0, %2(s64)
|
||||
%11:_(p0) = G_PTR_ADD %10, %4(s64)
|
||||
%7:_(s32) = G_LOAD %11(p0) :: (load 4)
|
||||
G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: test5_add_on_rhs
|
||||
alignment: 4
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: test5_add_on_rhs
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s64) = COPY $x1
|
||||
%2:_(s64) = G_CONSTANT i64 1
|
||||
%3:_(s64) = G_ADD %1, %2
|
||||
%4:_(p0) = G_PTR_ADD %0, %3(s64)
|
||||
%7:_(s32) = G_LOAD %4(p0) :: (load 1)
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user