1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

GlobalISel: add combiner to form indexed loads.

Loosely based on DAGCombiner version, but this part is slightly simpler in
GlobalIsel because all address calculation is performed by G_GEP. That makes
the inc/dec distinction moot so there's just pre/post to think about.

No targets can handle it yet so testing is via a special flag that overrides
target hooks.

llvm-svn: 371384
This commit is contained in:
Tim Northover 2019-09-09 10:04:23 +00:00
parent 71a8db23ee
commit ba4be9e30b
12 changed files with 561 additions and 20 deletions

View File

@ -28,6 +28,7 @@ class MachineRegisterInfo;
class MachineInstr; class MachineInstr;
class MachineOperand; class MachineOperand;
class GISelKnownBits; class GISelKnownBits;
class MachineDominatorTree;
struct PreferredTuple { struct PreferredTuple {
LLT Ty; // The result type of the extend. LLT Ty; // The result type of the extend.
@ -41,10 +42,12 @@ protected:
MachineRegisterInfo &MRI; MachineRegisterInfo &MRI;
GISelChangeObserver &Observer; GISelChangeObserver &Observer;
GISelKnownBits *KB; GISelKnownBits *KB;
MachineDominatorTree *MDT;
public: public:
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
GISelKnownBits *KB = nullptr); GISelKnownBits *KB = nullptr,
MachineDominatorTree *MDT = nullptr);
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@ -60,17 +63,61 @@ public:
bool matchCombineCopy(MachineInstr &MI); bool matchCombineCopy(MachineInstr &MI);
void applyCombineCopy(MachineInstr &MI); void applyCombineCopy(MachineInstr &MI);
/// \brief \returns true if \p DefMI precedes \p UseMI or they are the same
/// instruction. Both must be in the same basic block.
bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI);
/// \brief \returns true if \p DefMI dominates \p UseMI. By definition an
/// instruction dominates itself.
///
/// If we haven't been provided with a MachineDominatorTree during
/// construction, this function returns a conservative result that tracks just
/// a single basic block.
bool dominates(MachineInstr &DefMI, MachineInstr &UseMI);
/// If \p MI is extend that consumes the result of a load, try to combine it. /// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed. /// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI); bool tryCombineExtendingLoads(MachineInstr &MI);
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
/// Combine \p MI into a pre-indexed or post-indexed load/store operation if
/// legal and the surrounding code makes it useful.
bool tryCombineIndexedLoadStore(MachineInstr &MI);
bool matchCombineBr(MachineInstr &MI); bool matchCombineBr(MachineInstr &MI);
bool tryCombineBr(MachineInstr &MI); bool tryCombineBr(MachineInstr &MI);
/// Optimize memcpy intrinsics et al, e.g. constant len calls. /// Optimize memcpy intrinsics et al, e.g. constant len calls.
/// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
///
/// For example (pre-indexed):
///
/// $addr = G_GEP $base, $offset
/// [...]
/// $val = G_LOAD $addr
/// [...]
/// $whatever = COPY $addr
///
/// -->
///
/// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre)
/// [...]
/// $whatever = COPY $addr
///
/// or (post-indexed):
///
/// G_STORE $val, $base
/// [...]
/// $addr = G_GEP $base, $offset
/// [...]
/// $whatever = COPY $addr
///
/// -->
///
/// $addr = G_INDEXED_STORE $val, $base, $offset
/// [...]
/// $whatever = COPY $addr
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
/// Try to transform \p MI by using all of the above /// Try to transform \p MI by using all of the above
@ -87,6 +134,20 @@ private:
bool IsVolatile); bool IsVolatile);
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
unsigned KnownLen, unsigned DstAlign, bool IsVolatile); unsigned KnownLen, unsigned DstAlign, bool IsVolatile);
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
/// \returns true if a candidate is found.
bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
Register &Offset);
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a pre-indexing operation.
///
/// \returns true if a candidate is found.
bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
Register &Offset);
}; };
} // namespace llvm } // namespace llvm

View File

@ -2959,6 +2959,14 @@ public:
return false; return false;
} }
/// Returns true if the specified base+offset is a legal indexed addressing
/// mode for this target. \p MI is the load or store instruction that is being
/// considered for transformation.
virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
bool IsPre, MachineRegisterInfo &MRI) const {
return false;
}
/// Return the entry encoding for a jump table in the current function. The /// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
virtual unsigned getJumpTableEncoding() const; virtual unsigned getJumpTableEncoding() const;

View File

@ -294,9 +294,21 @@ HANDLE_TARGET_OPCODE(G_SEXTLOAD)
/// Generic zeroext load /// Generic zeroext load
HANDLE_TARGET_OPCODE(G_ZEXTLOAD) HANDLE_TARGET_OPCODE(G_ZEXTLOAD)
/// Generic indexed load (including anyext load)
HANDLE_TARGET_OPCODE(G_INDEXED_LOAD)
/// Generic indexed signext load
HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD)
/// Generic indexed zeroext load
HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD)
/// Generic store. /// Generic store.
HANDLE_TARGET_OPCODE(G_STORE) HANDLE_TARGET_OPCODE(G_STORE)
/// Generic indexed store.
HANDLE_TARGET_OPCODE(G_INDEXED_STORE)
/// Generic atomic cmpxchg with internal success check. /// Generic atomic cmpxchg with internal success check.
HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)

View File

@ -788,6 +788,32 @@ def G_ZEXTLOAD : GenericInstruction {
let mayLoad = 1; let mayLoad = 1;
} }
// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset.
// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed)
// then the value is loaded from $newaddr.
def G_INDEXED_LOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
let hasSideEffects = 0;
let mayLoad = 1;
}
// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.
def G_INDEXED_SEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
let hasSideEffects = 0;
let mayLoad = 1;
}
// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.
def G_INDEXED_ZEXTLOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
let hasSideEffects = 0;
let mayLoad = 1;
}
// Generic store. Expects a MachineMemOperand in addition to explicit operands. // Generic store. Expects a MachineMemOperand in addition to explicit operands.
def G_STORE : GenericInstruction { def G_STORE : GenericInstruction {
let OutOperandList = (outs); let OutOperandList = (outs);
@ -796,6 +822,15 @@ def G_STORE : GenericInstruction {
let mayStore = 1; let mayStore = 1;
} }
// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.
def G_INDEXED_STORE : GenericInstruction {
let OutOperandList = (outs ptype0:$newaddr);
let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset,
unknown:$am);
let hasSideEffects = 0;
let mayStore = 1;
}
// Generic atomic cmpxchg with internal success check. Expects a // Generic atomic cmpxchg with internal success check. Expects a
// MachineMemOperand in addition to explicit operands. // MachineMemOperand in addition to explicit operands.
def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction {

View File

@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
@ -22,10 +23,19 @@
using namespace llvm; using namespace llvm;
// Option to allow testing of the combiner while no targets know about indexed
// addressing.
static cl::opt<bool>
ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
cl::desc("Force all indexed operations to be "
"legal for the GlobalISel combiner"));
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB) MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
KB(KB) { KB(KB), MDT(MDT) {
(void)this->KB; (void)this->KB;
} }
@ -349,6 +359,204 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI); Observer.changedInstr(MI);
} }
bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
// Loop through the basic block until we find one of the instructions.
MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
for (; &*I != &DefMI && &*I != &UseMI; ++I)
return &*I == &DefMI;
llvm_unreachable("Block must contain instructions");
}
bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
if (MDT)
return MDT->dominates(&DefMI, &UseMI);
else if (DefMI.getParent() != UseMI.getParent())
return false;
return isPredecessor(DefMI, UseMI);
}
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
Base = MI.getOperand(1).getReg();
MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
for (auto &Use : MRI.use_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_GEP)
continue;
Offset = Use.getOperand(2).getReg();
if (!ForceLegalIndexing &&
!TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
<< Use);
continue;
}
// Make sure the offset calculation is before the potentially indexed op.
// FIXME: we really care about dependency here. The offset calculation might
// be movable.
MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
if (!OffsetDef || !dominates(*OffsetDef, MI)) {
LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
<< Use);
continue;
}
// FIXME: check whether all uses of Base are load/store with foldable
// addressing modes. If so, using the normal addr-modes is better than
// forming an indexed one.
bool MemOpDominatesAddrUses = true;
for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
if (!dominates(MI, GEPUse)) {
MemOpDominatesAddrUses = false;
break;
}
}
if (!MemOpDominatesAddrUses) {
LLVM_DEBUG(
dbgs() << " Ignoring candidate as memop does not dominate uses: "
<< Use);
continue;
}
LLVM_DEBUG(dbgs() << " Found match: " << Use);
Addr = Use.getOperand(0).getReg();
return true;
}
return false;
}
bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
Addr = MI.getOperand(1).getReg();
MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
if (!AddrDef || MRI.hasOneUse(Addr))
return false;
Base = AddrDef->getOperand(1).getReg();
Offset = AddrDef->getOperand(2).getReg();
LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
if (!ForceLegalIndexing &&
!TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
return false;
}
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
return false;
}
if (MI.getOpcode() == TargetOpcode::G_STORE) {
// Would require a copy.
if (Base == MI.getOperand(0).getReg()) {
LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
return false;
}
// We're expecting one use of Addr in MI, but it could also be the
// value stored, which isn't actually dominated by the instruction.
if (MI.getOperand(0).getReg() == Addr) {
LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
return false;
}
}
// FIXME: check whether all uses of the base pointer are constant GEPs. That
// might allow us to end base's liveness here by adjusting the constant.
for (auto &UseMI : MRI.use_instructions(Addr)) {
if (!dominates(MI, UseMI)) {
LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
return false;
}
}
return true;
}
bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
bool IsStore = Opcode == TargetOpcode::G_STORE;
Register Addr, Base, Offset;
bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
return false;
unsigned NewOpcode;
switch (Opcode) {
case TargetOpcode::G_LOAD:
NewOpcode = TargetOpcode::G_INDEXED_LOAD;
break;
case TargetOpcode::G_SEXTLOAD:
NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
break;
case TargetOpcode::G_ZEXTLOAD:
NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
break;
case TargetOpcode::G_STORE:
NewOpcode = TargetOpcode::G_INDEXED_STORE;
break;
default:
llvm_unreachable("Unknown load/store opcode");
}
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
MachineIRBuilder MIRBuilder(MI);
auto MIB = MIRBuilder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(Addr);
MIB.addUse(MI.getOperand(0).getReg());
} else {
MIB.addDef(MI.getOperand(0).getReg());
MIB.addDef(Addr);
}
MIB.addUse(Base);
MIB.addUse(Offset);
MIB.addImm(IsPre);
MI.eraseFromParent();
AddrDef.eraseFromParent();
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
return true;
}
bool CombinerHelper::matchCombineBr(MachineInstr &MI) { bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
// Try to match the following: // Try to match the following:
@ -909,5 +1117,9 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
bool CombinerHelper::tryCombine(MachineInstr &MI) { bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI)) if (tryCombineCopy(MI))
return true; return true;
return tryCombineExtendingLoads(MI); if (tryCombineExtendingLoads(MI))
return true;
if (tryCombineIndexedLoadStore(MI))
return true;
return false;
} }

View File

@ -36,7 +36,7 @@ using namespace llvm;
/// NOTE: The TargetMachine owns TLOF. /// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm) TargetLowering::TargetLowering(const TargetMachine &tm)
: TargetLoweringBase(tm) {} : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr; return nullptr;

View File

@ -55,7 +55,7 @@ FunctionPass *createAArch64CollectLOHPass();
InstructionSelector * InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &, createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &); AArch64Subtarget &, AArch64RegisterBankInfo &);
FunctionPass *createAArch64PreLegalizeCombiner(); FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPass(bool MergeInit); FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
FunctionPass *createAArch64StackTaggingPreRAPass(); FunctionPass *createAArch64StackTaggingPreRAPass();

View File

@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
@ -29,13 +30,14 @@ using namespace MIPatternMatch;
namespace { namespace {
class AArch64PreLegalizerCombinerInfo : public CombinerInfo { class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB; GISelKnownBits *KB;
MachineDominatorTree *MDT;
public: public:
AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
GISelKnownBits *KB) GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB) {} KB(KB), MDT(MDT) {}
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override; MachineIRBuilder &B) const override;
}; };
@ -43,7 +45,7 @@ public:
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI, MachineInstr &MI,
MachineIRBuilder &B) const { MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB); CombinerHelper Helper(Observer, B, KB, MDT);
switch (MI.getOpcode()) { switch (MI.getOpcode()) {
default: default:
@ -54,8 +56,14 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineBr(MI); return Helper.tryCombineBr(MI);
case TargetOpcode::G_LOAD: case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_ZEXTLOAD: {
return Helper.tryCombineExtendingLoads(MI); bool Changed = false;
Changed |= Helper.tryCombineExtendingLoads(MI);
Changed |= Helper.tryCombineIndexedLoadStore(MI);
return Changed;
}
case TargetOpcode::G_STORE:
return Helper.tryCombineIndexedLoadStore(MI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
switch (MI.getIntrinsicID()) { switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy: case Intrinsic::memcpy:
@ -83,13 +91,15 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass {
public: public:
static char ID; static char ID;
AArch64PreLegalizerCombiner(); AArch64PreLegalizerCombiner(bool IsOptNone = false);
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override; bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override; void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
bool IsOptNone;
}; };
} }
@ -99,10 +109,15 @@ void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
getSelectionDAGFallbackAnalysisUsage(AU); getSelectionDAGFallbackAnalysisUsage(AU);
AU.addRequired<GISelKnownBitsAnalysis>(); AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>(); AU.addPreserved<GISelKnownBitsAnalysis>();
if (!IsOptNone) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
MachineFunctionPass::getAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU);
} }
AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) { AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
} }
@ -115,8 +130,10 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
bool EnableOpt = bool EnableOpt =
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
F.hasMinSize(), KB); F.hasMinSize(), KB, MDT);
Combiner C(PCInfo, TPC); Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
} }
@ -133,7 +150,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
namespace llvm { namespace llvm {
FunctionPass *createAArch64PreLegalizeCombiner() { FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) {
return new AArch64PreLegalizerCombiner(); return new AArch64PreLegalizerCombiner(IsOptNone);
} }
} // end namespace llvm } // end namespace llvm

View File

@ -504,7 +504,8 @@ bool AArch64PassConfig::addIRTranslator() {
} }
void AArch64PassConfig::addPreLegalizeMachineIR() { void AArch64PassConfig::addPreLegalizeMachineIR() {
addPass(createAArch64PreLegalizeCombiner()); bool IsOptNone = getOptLevel() == CodeGenOpt::None;
addPass(createAArch64PreLegalizeCombiner(IsOptNone));
} }
bool AArch64PassConfig::addLegalizeMachineIR() { bool AArch64PassConfig::addLegalizeMachineIR() {

View File

@ -0,0 +1,182 @@
; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
define i8* @test_simple_load_pre(i8* %ptr) {
; CHECK-LABEL: name: test_simple_load_pre
; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
; CHECK-NOT: G_GEP
; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
; CHECK: $x0 = COPY [[NEXT]](p0)
%next = getelementptr i8, i8* %ptr, i32 42
load volatile i8, i8* %next
ret i8* %next
}
define void @test_load_multiple_dominated(i8* %ptr, i1 %tst, i1 %tst2) {
; CHECK-LABEL: name: test_load_multiple_dominated
; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
; CHECK-NOT: G_GEP
; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
; CHECK: $x0 = COPY [[NEXT]](p0)
%next = getelementptr i8, i8* %ptr, i32 42
br i1 %tst, label %do_load, label %end
do_load:
load volatile i8, i8* %next
br i1 %tst2, label %bb1, label %bb2
bb1:
store volatile i8* %next, i8** undef
ret void
bb2:
call void @bar(i8* %next)
ret void
end:
ret void
}
define i8* @test_simple_store_pre(i8* %ptr) {
; CHECK-LABEL: name: test_simple_store_pre
; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0
; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
; CHECK-NOT: G_GEP
; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1
; CHECK: $x0 = COPY [[NEXT]](p0)
%next = getelementptr i8, i8* %ptr, i32 42
store volatile i8 0, i8* %next
ret i8* %next
}
; The potentially pre-indexed address is used as the value stored. Converting
; would produce the value too late but only by one instruction.
define i64** @test_store_pre_val_loop(i64** %ptr) {
; CHECK-LABEL: name: test_store_pre_val_loop
; CHECK: G_GEP
; CHECK: G_STORE %
%next = getelementptr i64*, i64** %ptr, i32 42
%next.p0 = bitcast i64** %next to i64*
store volatile i64* %next.p0, i64** %next
ret i64** %next
}
; Potentially pre-indexed address is used between GEP computing it and load.
define i8* @test_load_pre_before(i8* %ptr) {
; CHECK-LABEL: name: test_load_pre_before
; CHECK: G_GEP
; CHECK: BL @bar
; CHECK: G_LOAD %
%next = getelementptr i8, i8* %ptr, i32 42
call void @bar(i8* %next)
load volatile i8, i8* %next
ret i8* %next
}
; Materializing the base into a writable register (from sp/fp) would be just as
; bad as the original GEP.
define i8* @test_alloca_load_pre() {
; CHECK-LABEL: name: test_alloca_load_pre
; CHECK: G_GEP
; CHECK: G_LOAD %
%ptr = alloca i8, i32 128
%next = getelementptr i8, i8* %ptr, i32 42
load volatile i8, i8* %next
ret i8* %next
}
; Load does not dominate use of its address. No indexing.
define i8* @test_pre_nodom(i8* %in, i1 %tst) {
; CHECK-LABEL: name: test_pre_nodom
; CHECK: G_GEP
; CHECK: G_LOAD %
%next = getelementptr i8, i8* %in, i32 16
br i1 %tst, label %do_indexed, label %use_addr
do_indexed:
%val = load i8, i8* %next
store i8 %val, i8* @var
store i8* %next, i8** @varp8
br label %use_addr
use_addr:
ret i8* %next
}
define i8* @test_simple_load_post(i8* %ptr) {
; CHECK-LABEL: name: test_simple_load_post
; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
; CHECK-NOT: G_GEP
; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
; CHECK: $x0 = COPY [[NEXT]](p0)
%next = getelementptr i8, i8* %ptr, i32 42
load volatile i8, i8* %ptr
ret i8* %next
}
define i8* @test_simple_load_post_gep_after(i8* %ptr) {
; CHECK-LABEL: name: test_simple_load_post_gep_after
; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
; CHECK: BL @get_offset
; CHECK: [[OFFSET:%.*]]:_(s64) = COPY $x0
; CHECK: {{%.*}}:_(s8), [[ADDR:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
; CHECK: $x0 = COPY [[ADDR]](p0)
%offset = call i64 @get_offset()
load volatile i8, i8* %ptr
%next = getelementptr i8, i8* %ptr, i64 %offset
ret i8* %next
}
define i8* @test_load_post_keep_looking(i8* %ptr) {
; CHECK: name: test_load_post_keep_looking
; CHECK: G_INDEXED_LOAD
%offset = call i64 @get_offset()
load volatile i8, i8* %ptr
%intval = ptrtoint i8* %ptr to i8
store i8 %intval, i8* @var
%next = getelementptr i8, i8* %ptr, i64 %offset
ret i8* %next
}
; Base is frame index. Using indexing would need copy anyway.
define i8* @test_load_post_alloca() {
; CHECK-LABEL: name: test_load_post_alloca
; CHECK: G_GEP
; CHECK: G_LOAD %
%ptr = alloca i8, i32 128
%next = getelementptr i8, i8* %ptr, i32 42
load volatile i8, i8* %ptr
ret i8* %next
}
; Offset computation does not dominate the load we might be indexing.
define i8* @test_load_post_gep_offset_after(i8* %ptr) {
; CHECK-LABEL: name: test_load_post_gep_offset_after
; CHECK: G_LOAD %
; CHECK: BL @get_offset
; CHECK: G_GEP
load volatile i8, i8* %ptr
%offset = call i64 @get_offset()
%next = getelementptr i8, i8* %ptr, i64 %offset
ret i8* %next
}
declare void @bar(i8*)
declare i64 @get_offset()
@var = global i8 0
@varp8 = global i8* null

View File

@ -16,15 +16,15 @@
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -global-isel \ ; RUN: -verify-machineinstrs=0 -global-isel \
; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK ; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -global-isel -global-isel-abort=2 \ ; RUN: -verify-machineinstrs=0 -global-isel -global-isel-abort=2 \
; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK ; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=3 \ ; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=3 \
; RUN: | FileCheck %s --check-prefix ENABLED ; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=0 \ ; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=0 \
@ -44,6 +44,7 @@
; ENABLED: IRTranslator ; ENABLED: IRTranslator
; VERIFY-NEXT: Verify generated machine code ; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-NEXT: Analysis for ComputingKnownBits
; ENABLED-O1-NEXT: MachineDominator Tree Construction
; ENABLED-NEXT: PreLegalizerCombiner ; ENABLED-NEXT: PreLegalizerCombiner
; VERIFY-NEXT: Verify generated machine code ; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: Analysis containing CSE Info

View File

@ -134,9 +134,21 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_LOAD (opcode 64): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_INDEXED_SEXTLOAD (opcode 65): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_INDEXED_ZEXTLOAD (opcode 66): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_STORE (opcode 68): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected