mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Hook in GlobalMerge pass
llvm-svn: 109359
This commit is contained in:
parent
8cbc57da86
commit
7ae895e007
@ -792,6 +792,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// getMaximalGlobalOffset - Returns the maximal possible offset which can be
|
||||
/// used for loads / stores from the global.
|
||||
virtual unsigned getMaximalGlobalOffset() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// TargetLowering Optimization Methods
|
||||
//
|
||||
|
@ -98,6 +98,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
|
||||
|
||||
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
|
||||
FunctionPass *createARMExpandPseudoPass();
|
||||
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
|
||||
FunctionPass *createARMConstantIslandPass();
|
||||
FunctionPass *createNEONPreAllocPass();
|
||||
FunctionPass *createNEONMoveFixPass();
|
||||
|
203
lib/Target/ARM/ARMGlobalMerge.cpp
Normal file
203
lib/Target/ARM/ARMGlobalMerge.cpp
Normal file
@ -0,0 +1,203 @@
|
||||
//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This pass merges globals with internal linkage into one. This way all the
|
||||
// globals which were merged into a biggest one can be addressed using offsets
|
||||
// from the same base pointer (no need for separate base pointer for each of the
|
||||
// global). Such a transformation can significantly reduce the register pressure
|
||||
// when many globals are involved.
|
||||
//
|
||||
// For example, consider the code which touches several global variables at once:
|
||||
//
|
||||
// static int foo[N], bar[N], baz[N];
|
||||
//
|
||||
// for (i = 0; i < N; ++i) {
|
||||
// foo[i] = bar[i] * baz[i];
|
||||
// }
|
||||
//
|
||||
// On ARM the addresses of 3 arrays should be kept in the registers, thus
|
||||
// this code has quite large register pressure (loop body):
|
||||
//
|
||||
// ldr r1, [r5], #4
|
||||
// ldr r2, [r6], #4
|
||||
// mul r1, r2, r1
|
||||
// str r1, [r0], #4
|
||||
//
|
||||
// Pass converts the code to something like:
|
||||
//
|
||||
// static struct {
|
||||
// int foo[N];
|
||||
// int bar[N];
|
||||
// int baz[N];
|
||||
// } merged;
|
||||
//
|
||||
// for (i = 0; i < N; ++i) {
|
||||
// merged.foo[i] = merged.bar[i] * merged.baz[i];
|
||||
// }
|
||||
//
|
||||
// and in ARM code this becomes:
|
||||
//
|
||||
// ldr r0, [r5, #40]
|
||||
// ldr r1, [r5, #80]
|
||||
// mul r0, r1, r0
|
||||
// str r0, [r5], #4
|
||||
//
|
||||
// note that we saved 2 registers here almostly "for free".
|
||||
// ===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm-global-merge"
|
||||
#include "ARM.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/Attributes.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/GlobalVariable.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
|
||||
/// TLI - Keep a pointer of a TargetLowering to consult for determining
|
||||
/// target type sizes.
|
||||
const TargetLowering *TLI;
|
||||
|
||||
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
Module &M, bool) const;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
explicit ARMGlobalMerge(const TargetLowering *tli)
|
||||
: FunctionPass(&ID), TLI(tli) {}
|
||||
|
||||
virtual bool doInitialization(Module &M);
|
||||
virtual bool runOnFunction(Function& F);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "Merge internal globals";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
struct GlobalCmp {
|
||||
const TargetData *TD;
|
||||
|
||||
GlobalCmp(const TargetData *td):
|
||||
TD(td) { };
|
||||
|
||||
bool operator() (const GlobalVariable* GV1,
|
||||
const GlobalVariable* GV2) {
|
||||
const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
|
||||
const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
|
||||
|
||||
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
|
||||
}
|
||||
};
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
char ARMGlobalMerge::ID = 0;
|
||||
|
||||
bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
Module &M, bool isConst) const {
|
||||
const TargetData *TD = TLI->getTargetData();
|
||||
|
||||
// FIXME: Infer the maximum possible offset depending on the actual users
|
||||
// (these max offsets are different for the users inside Thumb or ARM
|
||||
// functions)
|
||||
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
|
||||
|
||||
// FIXME: Find better heuristics
|
||||
std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
|
||||
|
||||
const Type *Int32Ty = Type::getInt32Ty(M.getContext());
|
||||
|
||||
for (size_t i = 0, e = Globals.size(); i != e; ) {
|
||||
size_t j = 0;
|
||||
uint64_t MergedSize = 0;
|
||||
std::vector<const Type*> Tys;
|
||||
std::vector<Constant*> Inits;
|
||||
for (j = i; MergedSize < MaxOffset && j != e; ++j) {
|
||||
const Type* Ty = Globals[j]->getType()->getElementType();
|
||||
Tys.push_back(Ty);
|
||||
Inits.push_back(Globals[j]->getInitializer());
|
||||
MergedSize += TD->getTypeAllocSize(Ty);
|
||||
}
|
||||
|
||||
StructType* MergedTy = StructType::get(M.getContext(), Tys);
|
||||
Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
|
||||
GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
|
||||
GlobalValue::InternalLinkage,
|
||||
MergedInit, "merged");
|
||||
for (size_t k = i; k < j; ++k) {
|
||||
SmallVector<Constant*, 2> Idx;
|
||||
Idx.push_back(ConstantInt::get(Int32Ty, 0));
|
||||
Idx.push_back(ConstantInt::get(Int32Ty, k-i));
|
||||
|
||||
Constant* GEP =
|
||||
ConstantExpr::getInBoundsGetElementPtr(MergedGV,
|
||||
&Idx[0], Idx.size());
|
||||
|
||||
Globals[k]->replaceAllUsesWith(GEP);
|
||||
Globals[k]->eraseFromParent();
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ARMGlobalMerge::doInitialization(Module& M) {
|
||||
SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
|
||||
const TargetData *TD = TLI->getTargetData();
|
||||
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
|
||||
bool Changed = false;
|
||||
|
||||
// Grab all non-const globals.
|
||||
for (Module::global_iterator I = M.global_begin(),
|
||||
E = M.global_end(); I != E; ++I) {
|
||||
// Merge is safe for "normal" internal globals only
|
||||
if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
|
||||
continue;
|
||||
|
||||
// Ignore fancy-aligned globals for now.
|
||||
if (I->getAlignment() != 0)
|
||||
continue;
|
||||
|
||||
if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
|
||||
if (I->isConstant())
|
||||
ConstGlobals.push_back(I);
|
||||
else
|
||||
Globals.push_back(I);
|
||||
}
|
||||
}
|
||||
|
||||
if (Globals.size() > 1)
|
||||
Changed |= doMerge(Globals, M, false);
|
||||
if (ConstGlobals.size() > 1)
|
||||
Changed |= doMerge(ConstGlobals, M, true);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool ARMGlobalMerge::runOnFunction(Function& F) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
|
||||
return new ARMGlobalMerge(tli);
|
||||
}
|
@ -703,6 +703,12 @@ unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
|
||||
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
|
||||
}
|
||||
|
||||
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
|
||||
/// be used for loads / stores from the global.
|
||||
unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
|
||||
return (Subtarget->isThumb1Only() ? 127 : 4095);
|
||||
}
|
||||
|
||||
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
|
||||
unsigned NumVals = N->getNumValues();
|
||||
if (!NumVals)
|
||||
|
@ -263,6 +263,10 @@ namespace llvm {
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||
|
||||
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
|
||||
/// be used for loads / stores from the global.
|
||||
virtual unsigned getMaximalGlobalOffset() const;
|
||||
|
||||
/// createFastISel - This method returns a target specific FastISel object,
|
||||
/// or null if the target does not support "fast" ISel.
|
||||
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
|
||||
|
@ -85,9 +85,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
|
||||
TSInfo(*this) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
if (OptLevel != CodeGenOpt::None)
|
||||
PM.add(createARMGlobalMergePass(getTargetLowering()));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
PM.add(createARMISelDag(*this, OptLevel));
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
|
||||
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
|
||||
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
|
||||
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
|
||||
|
Loading…
Reference in New Issue
Block a user