mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Re-apply the patch to improve the optimizations of memcpy's, with several
bugs fixed. This now passes PPC bootstrap. llvm-svn: 47026
This commit is contained in:
parent
1122ee4ded
commit
274aa2846e
@ -100,6 +100,11 @@ class MemoryDependenceAnalysis : public FunctionPass {
|
|||||||
/// updating the dependence of instructions that previously depended on it.
|
/// updating the dependence of instructions that previously depended on it.
|
||||||
void removeInstruction(Instruction* rem);
|
void removeInstruction(Instruction* rem);
|
||||||
|
|
||||||
|
/// dropInstruction - Remove an instruction from the analysis, making
|
||||||
|
/// absolutely conservative assumptions when updating the cache. This is
|
||||||
|
/// useful, for example when an instruction is changed rather than removed.
|
||||||
|
void dropInstruction(Instruction* drop);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Instruction* getCallSiteDependency(CallSite C, Instruction* start,
|
Instruction* getCallSiteDependency(CallSite C, Instruction* start,
|
||||||
BasicBlock* block);
|
BasicBlock* block);
|
||||||
|
@ -457,6 +457,46 @@ Instruction* MemoryDependenceAnalysis::getDependency(Instruction* query,
|
|||||||
return NonLocal;
|
return NonLocal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// dropInstruction - Remove an instruction from the analysis, making
|
||||||
|
/// absolutely conservative assumptions when updating the cache. This is
|
||||||
|
/// useful, for example when an instruction is changed rather than removed.
|
||||||
|
void MemoryDependenceAnalysis::dropInstruction(Instruction* drop) {
|
||||||
|
depMapType::iterator depGraphEntry = depGraphLocal.find(drop);
|
||||||
|
if (depGraphEntry != depGraphLocal.end())
|
||||||
|
reverseDep[depGraphEntry->second.first].erase(drop);
|
||||||
|
|
||||||
|
// Drop dependency information for things that depended on this instr
|
||||||
|
SmallPtrSet<Instruction*, 4>& set = reverseDep[drop];
|
||||||
|
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
|
||||||
|
I != E; ++I)
|
||||||
|
depGraphLocal.erase(*I);
|
||||||
|
|
||||||
|
depGraphLocal.erase(drop);
|
||||||
|
reverseDep.erase(drop);
|
||||||
|
|
||||||
|
for (DenseMap<BasicBlock*, Value*>::iterator DI =
|
||||||
|
depGraphNonLocal[drop].begin(), DE = depGraphNonLocal[drop].end();
|
||||||
|
DI != DE; ++DI)
|
||||||
|
if (DI->second != None)
|
||||||
|
reverseDepNonLocal[DI->second].erase(drop);
|
||||||
|
|
||||||
|
if (reverseDepNonLocal.count(drop)) {
|
||||||
|
SmallPtrSet<Instruction*, 4>& set = reverseDepNonLocal[drop];
|
||||||
|
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
|
||||||
|
I != E; ++I)
|
||||||
|
for (DenseMap<BasicBlock*, Value*>::iterator DI =
|
||||||
|
depGraphNonLocal[*I].begin(), DE = depGraphNonLocal[*I].end();
|
||||||
|
DI != DE; ++DI)
|
||||||
|
if (DI->second == drop)
|
||||||
|
DI->second = Dirty;
|
||||||
|
}
|
||||||
|
|
||||||
|
reverseDepNonLocal.erase(drop);
|
||||||
|
nonLocalDepMapType::iterator I = depGraphNonLocal.find(drop);
|
||||||
|
if (I != depGraphNonLocal.end())
|
||||||
|
depGraphNonLocal.erase(I);
|
||||||
|
}
|
||||||
|
|
||||||
/// removeInstruction - Remove an instruction from the dependence analysis,
|
/// removeInstruction - Remove an instruction from the dependence analysis,
|
||||||
/// updating the dependence of instructions that previously depended on it.
|
/// updating the dependence of instructions that previously depended on it.
|
||||||
/// This method attempts to keep the cache coherent using the reverse map.
|
/// This method attempts to keep the cache coherent using the reverse map.
|
||||||
@ -473,7 +513,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction* rem) {
|
|||||||
depMapType::iterator depGraphEntry = depGraphLocal.find(rem);
|
depMapType::iterator depGraphEntry = depGraphLocal.find(rem);
|
||||||
|
|
||||||
if (depGraphEntry != depGraphLocal.end()) {
|
if (depGraphEntry != depGraphLocal.end()) {
|
||||||
reverseDep[depGraphLocal[rem].first].erase(rem);
|
reverseDep[depGraphEntry->second.first].erase(rem);
|
||||||
|
|
||||||
if (depGraphEntry->second.first != NonLocal &&
|
if (depGraphEntry->second.first != NonLocal &&
|
||||||
depGraphEntry->second.first != None &&
|
depGraphEntry->second.first != None &&
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "llvm/Constants.h"
|
#include "llvm/Constants.h"
|
||||||
#include "llvm/DerivedTypes.h"
|
#include "llvm/DerivedTypes.h"
|
||||||
#include "llvm/Function.h"
|
#include "llvm/Function.h"
|
||||||
|
#include "llvm/IntrinsicInst.h"
|
||||||
#include "llvm/Instructions.h"
|
#include "llvm/Instructions.h"
|
||||||
#include "llvm/Value.h"
|
#include "llvm/Value.h"
|
||||||
#include "llvm/ADT/BitVector.h"
|
#include "llvm/ADT/BitVector.h"
|
||||||
@ -736,6 +737,7 @@ namespace {
|
|||||||
SmallVector<Instruction*, 4>& toErase);
|
SmallVector<Instruction*, 4>& toErase);
|
||||||
bool processNonLocalLoad(LoadInst* L,
|
bool processNonLocalLoad(LoadInst* L,
|
||||||
SmallVector<Instruction*, 4>& toErase);
|
SmallVector<Instruction*, 4>& toErase);
|
||||||
|
bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
|
||||||
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
|
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
|
||||||
DenseMap<BasicBlock*, Value*> &Phis,
|
DenseMap<BasicBlock*, Value*> &Phis,
|
||||||
bool top_level = false);
|
bool top_level = false);
|
||||||
@ -1044,6 +1046,79 @@ bool GVN::processLoad(LoadInst* L,
|
|||||||
return deletedLoad;
|
return deletedLoad;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
|
||||||
|
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
|
||||||
|
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
|
||||||
|
/// This allows later passes to remove the first memcpy altogether.
|
||||||
|
bool GVN::processMemCpy(MemCpyInst* M,
|
||||||
|
SmallVector<Instruction*, 4>& toErase) {
|
||||||
|
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||||
|
|
||||||
|
// First, we have to check that the dependency is another memcpy
|
||||||
|
Instruction* dep = MD.getDependency(M);
|
||||||
|
if (dep == MemoryDependenceAnalysis::None ||
|
||||||
|
dep == MemoryDependenceAnalysis::NonLocal ||
|
||||||
|
!isa<MemCpyInst>(dep))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We can only transforms memcpy's where the dest of one is the source of the
|
||||||
|
// other
|
||||||
|
MemCpyInst* MDep = cast<MemCpyInst>(dep);
|
||||||
|
if (M->getSource() != MDep->getDest())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Second, the length of the memcpy's must be the same, or the preceeding one
|
||||||
|
// must be larger than the following one.
|
||||||
|
ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
|
||||||
|
ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
|
||||||
|
if (!C1 || !C2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
uint64_t CpySize = C1->getValue().getZExtValue();
|
||||||
|
uint64_t DepSize = C2->getValue().getZExtValue();
|
||||||
|
|
||||||
|
if (DepSize < CpySize)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Finally, we have to make sure that the dest of the second does not
|
||||||
|
// alias the source of the first
|
||||||
|
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
|
||||||
|
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
|
||||||
|
AliasAnalysis::NoAlias)
|
||||||
|
return false;
|
||||||
|
else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
|
||||||
|
AliasAnalysis::NoAlias)
|
||||||
|
return false;
|
||||||
|
else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
|
||||||
|
!= AliasAnalysis::NoAlias)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If all checks passed, then we can transform these memcpy's
|
||||||
|
bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
|
||||||
|
Function* MemMoveFun = Intrinsic::getDeclaration(
|
||||||
|
M->getParent()->getParent()->getParent(),
|
||||||
|
is32bit ? Intrinsic::memcpy_i32 :
|
||||||
|
Intrinsic::memcpy_i64);
|
||||||
|
|
||||||
|
std::vector<Value*> args;
|
||||||
|
args.push_back(M->getRawDest());
|
||||||
|
args.push_back(MDep->getRawSource());
|
||||||
|
args.push_back(M->getLength());
|
||||||
|
args.push_back(M->getAlignment());
|
||||||
|
|
||||||
|
CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
|
||||||
|
|
||||||
|
if (MD.getDependency(C) == MDep) {
|
||||||
|
MD.dropInstruction(M);
|
||||||
|
toErase.push_back(M);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
MD.removeInstruction(C);
|
||||||
|
toErase.push_back(C);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// processInstruction - When calculating availability, handle an instruction
|
/// processInstruction - When calculating availability, handle an instruction
|
||||||
/// by inserting it into the appropriate sets
|
/// by inserting it into the appropriate sets
|
||||||
bool GVN::processInstruction(Instruction* I,
|
bool GVN::processInstruction(Instruction* I,
|
||||||
@ -1052,6 +1127,8 @@ bool GVN::processInstruction(Instruction* I,
|
|||||||
SmallVector<Instruction*, 4>& toErase) {
|
SmallVector<Instruction*, 4>& toErase) {
|
||||||
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
|
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
|
||||||
return processLoad(L, lastSeenLoad, toErase);
|
return processLoad(L, lastSeenLoad, toErase);
|
||||||
|
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
|
||||||
|
return processMemCpy(M, toErase);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned num = VN.lookup_or_add(I);
|
unsigned num = VN.lookup_or_add(I);
|
||||||
@ -1161,8 +1238,9 @@ bool GVN::iterateOnFunction(Function &F) {
|
|||||||
++BI;
|
++BI;
|
||||||
|
|
||||||
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
|
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
|
||||||
E = toErase.end(); I != E; ++I)
|
E = toErase.end(); I != E; ++I) {
|
||||||
(*I)->eraseFromParent();
|
(*I)->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
toErase.clear();
|
toErase.clear();
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep memcpy | count 2
|
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep memcpy | count 2
|
||||||
; XFAIL: *
|
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||||
target triple = "i686-apple-darwin9"
|
target triple = "i686-apple-darwin9"
|
||||||
|
Loading…
Reference in New Issue
Block a user