1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

Hook in GlobalMerge pass

llvm-svn: 109359
This commit is contained in:
Anton Korobeynikov 2010-07-24 21:52:08 +00:00
parent 8cbc57da86
commit 7ae895e007
7 changed files with 229 additions and 2 deletions

View File

@ -792,6 +792,12 @@ public:
return false;
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can be
/// used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const {
return 0;
}
//===--------------------------------------------------------------------===//
// TargetLowering Optimization Methods
//

View File

@ -98,6 +98,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();

View File

@ -0,0 +1,203 @@
//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This pass merges globals with internal linkage into one. This way all the
// globals which were merged into a biggest one can be addressed using offsets
// from the same base pointer (no need for separate base pointer for each of the
// global). Such a transformation can significantly reduce the register pressure
// when many globals are involved.
//
// For example, consider the code which touches several global variables at once:
//
// static int foo[N], bar[N], baz[N];
//
// for (i = 0; i < N; ++i) {
// foo[i] = bar[i] * baz[i];
// }
//
// On ARM the addresses of 3 arrays should be kept in the registers, thus
// this code has quite large register pressure (loop body):
//
// ldr r1, [r5], #4
// ldr r2, [r6], #4
// mul r1, r2, r1
// str r1, [r0], #4
//
// Pass converts the code to something like:
//
// static struct {
// int foo[N];
// int bar[N];
// int baz[N];
// } merged;
//
// for (i = 0; i < N; ++i) {
// merged.foo[i] = merged.bar[i] * merged.baz[i];
// }
//
// and in ARM code this becomes:
//
// ldr r0, [r5, #40]
// ldr r1, [r5, #80]
// mul r0, r1, r0
// str r0, [r5], #4
//
// note that we saved 2 registers here almostly "for free".
// ===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-global-merge"
#include "ARM.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Attributes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
namespace {
class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// target type sizes.
const TargetLowering *TLI;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool) const;
public:
static char ID; // Pass identification, replacement for typeid.
explicit ARMGlobalMerge(const TargetLowering *tli)
: FunctionPass(&ID), TLI(tli) {}
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function& F);
const char *getPassName() const {
return "Merge internal globals";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
}
struct GlobalCmp {
const TargetData *TD;
GlobalCmp(const TargetData *td):
TD(td) { };
bool operator() (const GlobalVariable* GV1,
const GlobalVariable* GV2) {
const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
}
};
};
} // end anonymous namespace
char ARMGlobalMerge::ID = 0;
bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst) const {
const TargetData *TD = TLI->getTargetData();
// FIXME: Infer the maximum possible offset depending on the actual users
// (these max offsets are different for the users inside Thumb or ARM
// functions)
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
// FIXME: Find better heuristics
std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
const Type *Int32Ty = Type::getInt32Ty(M.getContext());
for (size_t i = 0, e = Globals.size(); i != e; ) {
size_t j = 0;
uint64_t MergedSize = 0;
std::vector<const Type*> Tys;
std::vector<Constant*> Inits;
for (j = i; MergedSize < MaxOffset && j != e; ++j) {
const Type* Ty = Globals[j]->getType()->getElementType();
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
MergedSize += TD->getTypeAllocSize(Ty);
}
StructType* MergedTy = StructType::get(M.getContext(), Tys);
Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
MergedInit, "merged");
for (size_t k = i; k < j; ++k) {
SmallVector<Constant*, 2> Idx;
Idx.push_back(ConstantInt::get(Int32Ty, 0));
Idx.push_back(ConstantInt::get(Int32Ty, k-i));
Constant* GEP =
ConstantExpr::getInBoundsGetElementPtr(MergedGV,
&Idx[0], Idx.size());
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
}
i = j;
}
return true;
}
bool ARMGlobalMerge::doInitialization(Module& M) {
SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
const TargetData *TD = TLI->getTargetData();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I) {
// Merge is safe for "normal" internal globals only
if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
continue;
// Ignore fancy-aligned globals for now.
if (I->getAlignment() != 0)
continue;
if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
if (I->isConstant())
ConstGlobals.push_back(I);
else
Globals.push_back(I);
}
}
if (Globals.size() > 1)
Changed |= doMerge(Globals, M, false);
if (ConstGlobals.size() > 1)
Changed |= doMerge(ConstGlobals, M, true);
return Changed;
}
bool ARMGlobalMerge::runOnFunction(Function& F) {
return false;
}
FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
return new ARMGlobalMerge(tli);
}

View File

@ -703,6 +703,12 @@ unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
return (Subtarget->isThumb1Only() ? 127 : 4095);
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)

View File

@ -263,6 +263,10 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const;
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;

View File

@ -85,9 +85,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
TSInfo(*this) {
}
// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
if (OptLevel != CodeGenOpt::None)
PM.add(createARMGlobalMergePass(getTargetLowering()));
return false;
}
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));

View File

@ -50,6 +50,7 @@ public:
}
// Pass Pipeline Configuration
virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);