mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[AArch64][GlobalISel] Create a new minimal combiner pass just for -O0.
We never bothered to have a separate set of combines for -O0 in the prelegalizer before. This results in some minor performance hits for a mode where performance isn't a concern (although not regressing code size significantly is still preferable). This also removes the CSE option since we don't need it for -O0. Through experiments, I've arrived at a set of combines that gets the most code size improvement at -O0, while reducing the amount of time spent in the combiner by around 35% give or take. Differential Revision: https://reviews.llvm.org/D102038
This commit is contained in:
parent
818c390c9c
commit
9146866d14
@ -658,3 +658,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
|
|||||||
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
|
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
|
||||||
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
|
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
|
||||||
div_rem_to_divrem, funnel_shift_combines]>;
|
div_rem_to_divrem, funnel_shift_combines]>;
|
||||||
|
|
||||||
|
// A combine group used to for prelegalizer combiners at -O0. The combines in
|
||||||
|
// this group have been selected based on experiments to balance code size and
|
||||||
|
// compile time performance.
|
||||||
|
def optnone_combines : GICombineGroup<[trivial_combines,
|
||||||
|
ptr_add_immed_chain, combines_for_extload,
|
||||||
|
not_cmp_fold, opt_brcond_by_inverting_cond]>;
|
||||||
|
@ -59,7 +59,8 @@ ModulePass *createSVEIntrinsicOptsPass();
|
|||||||
InstructionSelector *
|
InstructionSelector *
|
||||||
createAArch64InstructionSelector(const AArch64TargetMachine &,
|
createAArch64InstructionSelector(const AArch64TargetMachine &,
|
||||||
AArch64Subtarget &, AArch64RegisterBankInfo &);
|
AArch64Subtarget &, AArch64RegisterBankInfo &);
|
||||||
FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
|
FunctionPass *createAArch64O0PreLegalizerCombiner();
|
||||||
|
FunctionPass *createAArch64PreLegalizerCombiner();
|
||||||
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
|
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
|
||||||
FunctionPass *createAArch64PostLegalizerLowering();
|
FunctionPass *createAArch64PostLegalizerLowering();
|
||||||
FunctionPass *createAArch64PostSelectOptimize();
|
FunctionPass *createAArch64PostSelectOptimize();
|
||||||
@ -82,6 +83,7 @@ void initializeAArch64SpeculationHardeningPass(PassRegistry&);
|
|||||||
void initializeAArch64LoadStoreOptPass(PassRegistry&);
|
void initializeAArch64LoadStoreOptPass(PassRegistry&);
|
||||||
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
|
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
|
||||||
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
|
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
|
||||||
|
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
|
||||||
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
|
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
|
||||||
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
|
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
|
||||||
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
|
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
|
||||||
|
@ -43,6 +43,13 @@ def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
|
|||||||
let AdditionalArguments = [];
|
let AdditionalArguments = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
|
||||||
|
"AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
|
||||||
|
let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
|
||||||
|
let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
|
||||||
|
let AdditionalArguments = [];
|
||||||
|
}
|
||||||
|
|
||||||
// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
|
// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
|
||||||
// target-specific opcode.
|
// target-specific opcode.
|
||||||
def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
|
def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
|
||||||
|
@ -184,6 +184,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
|
|||||||
initializeAArch64ExpandPseudoPass(*PR);
|
initializeAArch64ExpandPseudoPass(*PR);
|
||||||
initializeAArch64LoadStoreOptPass(*PR);
|
initializeAArch64LoadStoreOptPass(*PR);
|
||||||
initializeAArch64SIMDInstrOptPass(*PR);
|
initializeAArch64SIMDInstrOptPass(*PR);
|
||||||
|
initializeAArch64O0PreLegalizerCombinerPass(*PR);
|
||||||
initializeAArch64PreLegalizerCombinerPass(*PR);
|
initializeAArch64PreLegalizerCombinerPass(*PR);
|
||||||
initializeAArch64PostLegalizerCombinerPass(*PR);
|
initializeAArch64PostLegalizerCombinerPass(*PR);
|
||||||
initializeAArch64PostLegalizerLoweringPass(*PR);
|
initializeAArch64PostLegalizerLoweringPass(*PR);
|
||||||
@ -562,8 +563,10 @@ bool AArch64PassConfig::addIRTranslator() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AArch64PassConfig::addPreLegalizeMachineIR() {
|
void AArch64PassConfig::addPreLegalizeMachineIR() {
|
||||||
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
|
if (getOptLevel() == CodeGenOpt::None)
|
||||||
addPass(createAArch64PreLegalizerCombiner(IsOptNone));
|
addPass(createAArch64O0PreLegalizerCombiner());
|
||||||
|
else
|
||||||
|
addPass(createAArch64PreLegalizerCombiner());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AArch64PassConfig::addLegalizeMachineIR() {
|
bool AArch64PassConfig::addLegalizeMachineIR() {
|
||||||
|
@ -10,6 +10,8 @@ tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
|
|||||||
tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
|
tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
|
||||||
tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
|
tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
|
||||||
tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
|
tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
|
||||||
|
tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
|
||||||
|
-combiners="AArch64O0PreLegalizerCombinerHelper")
|
||||||
tablegen(LLVM AArch64GenPreLegalizeGICombiner.inc -gen-global-isel-combiner
|
tablegen(LLVM AArch64GenPreLegalizeGICombiner.inc -gen-global-isel-combiner
|
||||||
-combiners="AArch64PreLegalizerCombinerHelper")
|
-combiners="AArch64PreLegalizerCombinerHelper")
|
||||||
tablegen(LLVM AArch64GenPostLegalizeGICombiner.inc -gen-global-isel-combiner
|
tablegen(LLVM AArch64GenPostLegalizeGICombiner.inc -gen-global-isel-combiner
|
||||||
@ -32,6 +34,7 @@ add_llvm_target(AArch64CodeGen
|
|||||||
GISel/AArch64GlobalISelUtils.cpp
|
GISel/AArch64GlobalISelUtils.cpp
|
||||||
GISel/AArch64InstructionSelector.cpp
|
GISel/AArch64InstructionSelector.cpp
|
||||||
GISel/AArch64LegalizerInfo.cpp
|
GISel/AArch64LegalizerInfo.cpp
|
||||||
|
GISel/AArch64O0PreLegalizerCombiner.cpp
|
||||||
GISel/AArch64PreLegalizerCombiner.cpp
|
GISel/AArch64PreLegalizerCombiner.cpp
|
||||||
GISel/AArch64PostLegalizerCombiner.cpp
|
GISel/AArch64PostLegalizerCombiner.cpp
|
||||||
GISel/AArch64PostLegalizerLowering.cpp
|
GISel/AArch64PostLegalizerLowering.cpp
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "AArch64GlobalISelUtils.h"
|
#include "AArch64GlobalISelUtils.h"
|
||||||
#include "AArch64InstrInfo.h"
|
#include "AArch64InstrInfo.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||||
|
#include "llvm/CodeGen/TargetLowering.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
@ -57,3 +58,38 @@ bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
|
|||||||
getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
|
getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
|
||||||
return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
|
return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
|
||||||
|
MachineIRBuilder &MIRBuilder,
|
||||||
|
bool MinSize) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
|
||||||
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
||||||
|
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
|
||||||
|
if (!TLI.getLibcallName(RTLIB::BZERO))
|
||||||
|
return false;
|
||||||
|
auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
|
||||||
|
if (!Zero || Zero->Value.getSExtValue() != 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// It's not faster to use bzero rather than memset for sizes <= 256.
|
||||||
|
// However, it *does* save us a mov from wzr, so if we're going for
|
||||||
|
// minsize, use bzero even if it's slower.
|
||||||
|
if (!MinSize) {
|
||||||
|
// If the size is known, check it. If it is not known, assume using bzero is
|
||||||
|
// better.
|
||||||
|
if (auto Size =
|
||||||
|
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
|
||||||
|
if (Size->Value.getSExtValue() <= 256)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MIRBuilder.setInstrAndDebugLoc(MI);
|
||||||
|
MIRBuilder
|
||||||
|
.buildInstr(TargetOpcode::G_BZERO, {},
|
||||||
|
{MI.getOperand(0), MI.getOperand(2)})
|
||||||
|
.addImm(MI.getOperand(3).getImm())
|
||||||
|
.addMemOperand(*MI.memoperands_begin());
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
|
#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
|
||||||
|
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||||
#include "llvm/CodeGen/Register.h"
|
#include "llvm/CodeGen/Register.h"
|
||||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||||
@ -44,6 +45,14 @@ Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
|
|||||||
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
|
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
|
||||||
const MachineRegisterInfo &MRI);
|
const MachineRegisterInfo &MRI);
|
||||||
|
|
||||||
|
/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
|
||||||
|
/// supported and beneficial to do so.
|
||||||
|
///
|
||||||
|
/// \note This only applies on Darwin.
|
||||||
|
///
|
||||||
|
/// \returns true if \p MI was replaced with a G_BZERO.
|
||||||
|
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize);
|
||||||
|
|
||||||
} // namespace AArch64GISelUtils
|
} // namespace AArch64GISelUtils
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
|
171
lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
Normal file
171
lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
//=== lib/CodeGen/GlobalISel/AArch64O0PreLegalizerCombiner.cpp ------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This pass does combining of machine instructions at the generic MI level,
|
||||||
|
// before the legalizer.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AArch64GlobalISelUtils.h"
|
||||||
|
#include "AArch64TargetMachine.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineDominators.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||||
|
#include "llvm/IR/Instructions.h"
|
||||||
|
#include "llvm/Support/Debug.h"
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "aarch64-O0-prelegalizer-combiner"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
using namespace MIPatternMatch;
|
||||||
|
|
||||||
|
class AArch64O0PreLegalizerCombinerHelperState {
|
||||||
|
protected:
|
||||||
|
CombinerHelper &Helper;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AArch64O0PreLegalizerCombinerHelperState(CombinerHelper &Helper)
|
||||||
|
: Helper(Helper) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||||
|
#include "AArch64GenO0PreLegalizeGICombiner.inc"
|
||||||
|
#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
|
||||||
|
#include "AArch64GenO0PreLegalizeGICombiner.inc"
|
||||||
|
#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
|
||||||
|
|
||||||
|
class AArch64O0PreLegalizerCombinerInfo : public CombinerInfo {
|
||||||
|
GISelKnownBits *KB;
|
||||||
|
MachineDominatorTree *MDT;
|
||||||
|
AArch64GenO0PreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AArch64O0PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
|
||||||
|
GISelKnownBits *KB,
|
||||||
|
MachineDominatorTree *MDT)
|
||||||
|
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
|
||||||
|
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
|
||||||
|
KB(KB), MDT(MDT) {
|
||||||
|
if (!GeneratedRuleCfg.parseCommandLineOption())
|
||||||
|
report_fatal_error("Invalid rule identifier");
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
|
||||||
|
MachineIRBuilder &B) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
||||||
|
MachineInstr &MI,
|
||||||
|
MachineIRBuilder &B) const {
|
||||||
|
CombinerHelper Helper(Observer, B, KB, MDT);
|
||||||
|
AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
|
||||||
|
|
||||||
|
if (Generated.tryCombineAll(Observer, MI, B))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
unsigned Opc = MI.getOpcode();
|
||||||
|
switch (Opc) {
|
||||||
|
case TargetOpcode::G_CONCAT_VECTORS:
|
||||||
|
return Helper.tryCombineConcatVectors(MI);
|
||||||
|
case TargetOpcode::G_SHUFFLE_VECTOR:
|
||||||
|
return Helper.tryCombineShuffleVector(MI);
|
||||||
|
case TargetOpcode::G_MEMCPY:
|
||||||
|
case TargetOpcode::G_MEMMOVE:
|
||||||
|
case TargetOpcode::G_MEMSET: {
|
||||||
|
// At -O0 set a maxlen of 32 to inline;
|
||||||
|
unsigned MaxLen = 32;
|
||||||
|
// Try to inline memcpy type calls if optimizations are enabled.
|
||||||
|
if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
|
||||||
|
return true;
|
||||||
|
if (Opc == TargetOpcode::G_MEMSET)
|
||||||
|
return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
||||||
|
#include "AArch64GenO0PreLegalizeGICombiner.inc"
|
||||||
|
#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
||||||
|
|
||||||
|
// Pass boilerplate
|
||||||
|
// ================
|
||||||
|
|
||||||
|
class AArch64O0PreLegalizerCombiner : public MachineFunctionPass {
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
|
||||||
|
AArch64O0PreLegalizerCombiner();
|
||||||
|
|
||||||
|
StringRef getPassName() const override {
|
||||||
|
return "AArch64O0PreLegalizerCombiner";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||||
|
};
|
||||||
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
void AArch64O0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
AU.addRequired<TargetPassConfig>();
|
||||||
|
AU.setPreservesCFG();
|
||||||
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
||||||
|
AU.addRequired<GISelKnownBitsAnalysis>();
|
||||||
|
AU.addPreserved<GISelKnownBitsAnalysis>();
|
||||||
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
|
}
|
||||||
|
|
||||||
|
AArch64O0PreLegalizerCombiner::AArch64O0PreLegalizerCombiner()
|
||||||
|
: MachineFunctionPass(ID) {
|
||||||
|
initializeAArch64O0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
if (MF.getProperties().hasProperty(
|
||||||
|
MachineFunctionProperties::Property::FailedISel))
|
||||||
|
return false;
|
||||||
|
auto &TPC = getAnalysis<TargetPassConfig>();
|
||||||
|
|
||||||
|
const Function &F = MF.getFunction();
|
||||||
|
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
||||||
|
AArch64O0PreLegalizerCombinerInfo PCInfo(
|
||||||
|
false, F.hasOptSize(), F.hasMinSize(), KB, nullptr /* MDT */);
|
||||||
|
Combiner C(PCInfo, &TPC);
|
||||||
|
return C.combineMachineInstrs(MF, nullptr /* CSEInfo */);
|
||||||
|
}
|
||||||
|
|
||||||
|
char AArch64O0PreLegalizerCombiner::ID = 0;
|
||||||
|
INITIALIZE_PASS_BEGIN(AArch64O0PreLegalizerCombiner, DEBUG_TYPE,
|
||||||
|
"Combine AArch64 machine instrs before legalization",
|
||||||
|
false, false)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
|
||||||
|
INITIALIZE_PASS_END(AArch64O0PreLegalizerCombiner, DEBUG_TYPE,
|
||||||
|
"Combine AArch64 machine instrs before legalization", false,
|
||||||
|
false)
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
FunctionPass *createAArch64O0PreLegalizerCombiner() {
|
||||||
|
return new AArch64O0PreLegalizerCombiner();
|
||||||
|
}
|
||||||
|
} // end namespace llvm
|
@ -11,6 +11,7 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AArch64GlobalISelUtils.h"
|
||||||
#include "AArch64TargetMachine.h"
|
#include "AArch64TargetMachine.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||||
@ -219,46 +220,6 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
|
|
||||||
/// supported and beneficial to do so.
|
|
||||||
///
|
|
||||||
/// \note This only applies on Darwin.
|
|
||||||
///
|
|
||||||
/// \returns true if \p MI was replaced with a G_BZERO.
|
|
||||||
static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
|
|
||||||
bool MinSize) {
|
|
||||||
assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
|
|
||||||
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
||||||
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
|
|
||||||
if (!TLI.getLibcallName(RTLIB::BZERO))
|
|
||||||
return false;
|
|
||||||
auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
|
|
||||||
if (!Zero || Zero->Value.getSExtValue() != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// It's not faster to use bzero rather than memset for sizes <= 256.
|
|
||||||
// However, it *does* save us a mov from wzr, so if we're going for
|
|
||||||
// minsize, use bzero even if it's slower.
|
|
||||||
if (!MinSize) {
|
|
||||||
// If the size is known, check it. If it is not known, assume using bzero is
|
|
||||||
// better.
|
|
||||||
if (auto Size =
|
|
||||||
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
|
|
||||||
if (Size->Value.getSExtValue() <= 256)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MIRBuilder.setInstrAndDebugLoc(MI);
|
|
||||||
MIRBuilder
|
|
||||||
.buildInstr(TargetOpcode::G_BZERO, {},
|
|
||||||
{MI.getOperand(0), MI.getOperand(2)})
|
|
||||||
.addImm(MI.getOperand(3).getImm())
|
|
||||||
.addMemOperand(*MI.memoperands_begin());
|
|
||||||
MI.eraseFromParent();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
class AArch64PreLegalizerCombinerHelperState {
|
class AArch64PreLegalizerCombinerHelperState {
|
||||||
protected:
|
protected:
|
||||||
CombinerHelper &Helper;
|
CombinerHelper &Helper;
|
||||||
@ -321,7 +282,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|||||||
if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen))
|
if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen))
|
||||||
return true;
|
return true;
|
||||||
if (Opc == TargetOpcode::G_MEMSET)
|
if (Opc == TargetOpcode::G_MEMSET)
|
||||||
return tryEmitBZero(MI, B, EnableMinSize);
|
return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -340,15 +301,13 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass {
|
|||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
AArch64PreLegalizerCombiner(bool IsOptNone = false);
|
AArch64PreLegalizerCombiner();
|
||||||
|
|
||||||
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
|
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
|
||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||||
private:
|
|
||||||
bool IsOptNone;
|
|
||||||
};
|
};
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
@ -358,17 +317,15 @@ void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||||||
getSelectionDAGFallbackAnalysisUsage(AU);
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
||||||
AU.addRequired<GISelKnownBitsAnalysis>();
|
AU.addRequired<GISelKnownBitsAnalysis>();
|
||||||
AU.addPreserved<GISelKnownBitsAnalysis>();
|
AU.addPreserved<GISelKnownBitsAnalysis>();
|
||||||
if (!IsOptNone) {
|
AU.addRequired<MachineDominatorTree>();
|
||||||
AU.addRequired<MachineDominatorTree>();
|
AU.addPreserved<MachineDominatorTree>();
|
||||||
AU.addPreserved<MachineDominatorTree>();
|
|
||||||
}
|
|
||||||
AU.addRequired<GISelCSEAnalysisWrapperPass>();
|
AU.addRequired<GISelCSEAnalysisWrapperPass>();
|
||||||
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
|
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
}
|
}
|
||||||
|
|
||||||
AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone)
|
AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
|
||||||
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
|
: MachineFunctionPass(ID) {
|
||||||
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
|
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -387,8 +344,7 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
bool EnableOpt =
|
bool EnableOpt =
|
||||||
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
|
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
|
||||||
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
||||||
MachineDominatorTree *MDT =
|
MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
|
||||||
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
|
|
||||||
AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
|
AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
|
||||||
F.hasMinSize(), KB, MDT);
|
F.hasMinSize(), KB, MDT);
|
||||||
Combiner C(PCInfo, &TPC);
|
Combiner C(PCInfo, &TPC);
|
||||||
@ -408,7 +364,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
|
|||||||
|
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
|
FunctionPass *createAArch64PreLegalizerCombiner() {
|
||||||
return new AArch64PreLegalizerCombiner(IsOptNone);
|
return new AArch64PreLegalizerCombiner();
|
||||||
}
|
}
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
@ -56,9 +56,11 @@
|
|||||||
; VERIFY-NEXT: Verify generated machine code
|
; VERIFY-NEXT: Verify generated machine code
|
||||||
; ENABLED-NEXT: Analysis for ComputingKnownBits
|
; ENABLED-NEXT: Analysis for ComputingKnownBits
|
||||||
; ENABLED-O1-NEXT: MachineDominator Tree Construction
|
; ENABLED-O1-NEXT: MachineDominator Tree Construction
|
||||||
; ENABLED-NEXT: Analysis containing CSE Info
|
; ENABLED-O1-NEXT: Analysis containing CSE Info
|
||||||
; ENABLED-NEXT: PreLegalizerCombiner
|
; ENABLED-O1-NEXT: PreLegalizerCombiner
|
||||||
|
; VERIFY-O0-NEXT: AArch64O0PreLegalizerCombiner
|
||||||
; VERIFY-NEXT: Verify generated machine code
|
; VERIFY-NEXT: Verify generated machine code
|
||||||
|
; VERIFY-O0-NEXT: Analysis containing CSE Info
|
||||||
; ENABLED-NEXT: Legalizer
|
; ENABLED-NEXT: Legalizer
|
||||||
; VERIFY-NEXT: Verify generated machine code
|
; VERIFY-NEXT: Verify generated machine code
|
||||||
; ENABLED: RegBankSelect
|
; ENABLED: RegBankSelect
|
||||||
|
@ -34,8 +34,8 @@
|
|||||||
; CHECK-NEXT: Analysis containing CSE Info
|
; CHECK-NEXT: Analysis containing CSE Info
|
||||||
; CHECK-NEXT: IRTranslator
|
; CHECK-NEXT: IRTranslator
|
||||||
; CHECK-NEXT: Analysis for ComputingKnownBits
|
; CHECK-NEXT: Analysis for ComputingKnownBits
|
||||||
|
; CHECK-NEXT: AArch64O0PreLegalizerCombiner
|
||||||
; CHECK-NEXT: Analysis containing CSE Info
|
; CHECK-NEXT: Analysis containing CSE Info
|
||||||
; CHECK-NEXT: AArch64PreLegalizerCombiner
|
|
||||||
; CHECK-NEXT: Legalizer
|
; CHECK-NEXT: Legalizer
|
||||||
; CHECK-NEXT: AArch64PostLegalizerLowering
|
; CHECK-NEXT: AArch64PostLegalizerLowering
|
||||||
; CHECK-NEXT: RegBankSelect
|
; CHECK-NEXT: RegBankSelect
|
||||||
|
@ -1,21 +0,0 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
||||||
; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s
|
|
||||||
|
|
||||||
define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
|
|
||||||
; CHECK-LABEL: z:
|
|
||||||
; CHECK: // %bb.0:
|
|
||||||
; CHECK-NEXT: ldr d1, [x0]
|
|
||||||
; CHECK-NEXT: ldr d2, [x0, #8]
|
|
||||||
; CHECK-NEXT: // implicit-def: $q0
|
|
||||||
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
||||||
; CHECK-NEXT: // implicit-def: $q1
|
|
||||||
; CHECK-NEXT: mov v1.16b, v2.16b
|
|
||||||
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
|
||||||
; CHECK-NEXT: ret
|
|
||||||
%b = load i64, i64* %p
|
|
||||||
%p2 = getelementptr i64, i64* %p, i64 1
|
|
||||||
%bb = load i64, i64* %p2
|
|
||||||
%r1 = insertelement <2 x i64> zeroinitializer, i64 %b, i32 0
|
|
||||||
%r2 = insertelement <2 x i64> %r1, i64 %bb, i32 1
|
|
||||||
ret <2 x i64> %r2
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user