mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
enhance memcpy opt to turn memmoves into memcpy when the src/dest
don't alias. Remove an old and poorly reduced testcase that fails with this transform for reasons unrelated to the original test. llvm-svn: 80693
This commit is contained in:
parent
fdf1a0718e
commit
2c2c37b2b1
@ -317,6 +317,7 @@ namespace {
|
||||
// Helper fuctions
|
||||
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
|
||||
bool processMemCpy(MemCpyInst *M);
|
||||
bool processMemMove(MemMoveInst *M);
|
||||
bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
|
||||
bool iterateOnFunction(Function &F);
|
||||
};
|
||||
@ -431,9 +432,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
BasicBlock::iterator InsertPt = BI;
|
||||
|
||||
if (MemSetF == 0) {
|
||||
const Type *Tys[] = {Type::getInt64Ty(SI->getContext())};
|
||||
MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset,
|
||||
Tys, 1);
|
||||
const Type *Ty = Type::getInt64Ty(SI->getContext());
|
||||
MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
|
||||
}
|
||||
|
||||
// Get the starting pointer of the block.
|
||||
@ -679,11 +679,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
return false;
|
||||
|
||||
// If all checks passed, then we can transform these memcpy's
|
||||
const Type *Tys[1];
|
||||
Tys[0] = M->getLength()->getType();
|
||||
const Type *Ty = M->getLength()->getType();
|
||||
Function *MemCpyFun = Intrinsic::getDeclaration(
|
||||
M->getParent()->getParent()->getParent(),
|
||||
M->getIntrinsicID(), Tys, 1);
|
||||
M->getIntrinsicID(), &Ty, 1);
|
||||
|
||||
Value *Args[4] = {
|
||||
M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
|
||||
@ -708,6 +707,36 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
|
||||
/// are guaranteed not to alias.
|
||||
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
|
||||
// If the memmove is a constant size, use it for the alias query, this allows
|
||||
// us to optimize things like: memmove(P, P+64, 64);
|
||||
uint64_t MemMoveSize = ~0ULL;
|
||||
if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
|
||||
MemMoveSize = Len->getZExtValue();
|
||||
|
||||
// See if the pointers alias.
|
||||
if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
|
||||
AliasAnalysis::NoAlias)
|
||||
return false;
|
||||
|
||||
DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
|
||||
|
||||
// If not, then we know we can transform this.
|
||||
Module *Mod = M->getParent()->getParent()->getParent();
|
||||
const Type *Ty = M->getLength()->getType();
|
||||
M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
|
||||
|
||||
// MemDep may have over conservative information about this instruction, just
|
||||
// conservatively flush it from the cache.
|
||||
getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
|
||||
bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
bool MadeChange = false;
|
||||
@ -723,6 +752,12 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
MadeChange |= processStore(SI, BI);
|
||||
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
|
||||
MadeChange |= processMemCpy(M);
|
||||
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
|
||||
if (processMemMove(M)) {
|
||||
--BI; // Reprocess the new memcpy.
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,107 +0,0 @@
|
||||
; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*memmove.*arg1.*}
|
||||
; PR2401
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
|
||||
target triple = "i686-pc-linux-gnu"
|
||||
%struct.Info = type <{ i32, i32, i8*, i8*, i8*, [32 x i8*], i32, [32 x i32], i32, i32, i32, [32 x i32] }>
|
||||
%struct.S98 = type <{ [31 x double] }>
|
||||
%struct._IO_FILE = type <{ i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }>
|
||||
%struct._IO_marker = type <{ %struct._IO_marker*, %struct._IO_FILE*, i32 }>
|
||||
%struct.anon = type <{ }>
|
||||
%union.anon = type { }
|
||||
@info = common global %struct.Info zeroinitializer, align 4 ; <%struct.Info*> [#uses=13]
|
||||
@fails = common global i32 0, align 4 ; <i32*> [#uses=37]
|
||||
@s98 = common global %struct.S98 zeroinitializer, align 4 ; <%struct.S98*> [#uses=2]
|
||||
@a98 = common global [5 x %struct.S98] zeroinitializer, align 4 ; <[5 x %struct.S98]*> [#uses=5]
|
||||
@stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=1]
|
||||
|
||||
declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define void @test98() nounwind {
|
||||
entry:
|
||||
%arg = alloca %struct.S98, align 8 ; <%struct.S98*> [#uses=2]
|
||||
%tmp13 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
|
||||
%tmp14 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
|
||||
%tmp15 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
|
||||
%tmp17 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
|
||||
%tmp21 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp23 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp25 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp27 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp29 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp31 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
%tmp33 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
|
||||
call void @llvm.memset.i32( i8* bitcast (%struct.S98* @s98 to i8*), i8 0, i32 248, i32 4 )
|
||||
call void @llvm.memset.i32( i8* bitcast ([5 x %struct.S98]* @a98 to i8*), i8 0, i32 1240, i32 4 )
|
||||
call void @llvm.memset.i32( i8* bitcast (%struct.Info* @info to i8*), i8 0, i32 420, i32 4 )
|
||||
store i8* bitcast (%struct.S98* @s98 to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 2)
|
||||
store i8* bitcast ([5 x %struct.S98]* @a98 to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 3)
|
||||
store i8* bitcast (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 3) to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 4)
|
||||
store i32 248, i32* getelementptr (%struct.Info* @info, i32 0, i32 6)
|
||||
store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 8)
|
||||
store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 9)
|
||||
store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 10)
|
||||
%tmp = load i32* getelementptr (%struct.Info* @info, i32 0, i32 8) ; <i32> [#uses=1]
|
||||
%sub = add i32 %tmp, -1 ; <i32> [#uses=1]
|
||||
%and = and i32 %sub, ptrtoint (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 3) to i32) ; <i32> [#uses=1]
|
||||
%tobool = icmp eq i32 %and, 0 ; <i1> [#uses=1]
|
||||
br i1 %tobool, label %ifend, label %ifthen
|
||||
|
||||
ifthen: ; preds = %entry
|
||||
%tmp3 = load i32* @fails ; <i32> [#uses=1]
|
||||
%inc = add i32 %tmp3, 1 ; <i32> [#uses=1]
|
||||
store i32 %inc, i32* @fails
|
||||
br label %ifend
|
||||
|
||||
ifend: ; preds = %ifthen, %entry
|
||||
store i8* bitcast (double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18) to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 5, i32 0)
|
||||
store i32 8, i32* getelementptr (%struct.Info* @info, i32 0, i32 7, i32 0)
|
||||
store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 11, i32 0)
|
||||
store double 0xC1075E4620000000, double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18)
|
||||
store double 0x410CD219E0000000, double* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 2, i32 0, i32 18)
|
||||
store i32 1, i32* getelementptr (%struct.Info* @info, i32 0, i32 0)
|
||||
store i32 0, i32* getelementptr (%struct.Info* @info, i32 0, i32 1)
|
||||
%tmp16 = bitcast %struct.S98* %tmp15 to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memmove.i32( i8* %tmp16, i8* bitcast (%struct.S98* @s98 to i8*), i32 248, i32 4 )
|
||||
%tmp18 = bitcast %struct.S98* %tmp17 to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memmove.i32( i8* %tmp18, i8* bitcast (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 2) to i8*), i32 248, i32 4 )
|
||||
call void @check98( %struct.S98* sret %tmp14, %struct.S98* byval %tmp15, %struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 1), %struct.S98* byval %tmp17 )
|
||||
%tmp19 = bitcast %struct.S98* %tmp13 to i8* ; <i8*> [#uses=1]
|
||||
%tmp20 = bitcast %struct.S98* %tmp14 to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memmove.i32( i8* %tmp19, i8* %tmp20, i32 248, i32 8 )
|
||||
%tmp1 = bitcast %struct.S98* %arg to i8* ; <i8*> [#uses=1]
|
||||
%tmp2 = bitcast %struct.S98* %tmp13 to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i64( i8* %tmp1, i8* %tmp2, i64 248, i32 8 )
|
||||
%arrayidx.i = getelementptr %struct.S98* %arg, i32 0, i32 0, i32 18 ; <double*> [#uses=1]
|
||||
%tmp1.i = load double* %arrayidx.i, align 8 ; <double> [#uses=1]
|
||||
%tmp2.i = load double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18) ; <double> [#uses=1]
|
||||
%cmp.i = fcmp une double %tmp1.i, %tmp2.i ; <i1> [#uses=1]
|
||||
br i1 %cmp.i, label %ifthen.i, label %checkx98.exit
|
||||
|
||||
ifthen.i: ; preds = %ifend
|
||||
%tmp3.i = load i32* @fails ; <i32> [#uses=1]
|
||||
%inc.i = add i32 %tmp3.i, 1 ; <i32> [#uses=1]
|
||||
store i32 %inc.i, i32* @fails
|
||||
br label %checkx98.exit
|
||||
|
||||
checkx98.exit: ; preds = %ifthen.i, %ifend
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @check98(%struct.S98* sret %agg.result, %struct.S98* byval %arg0, %struct.S98* %arg1, %struct.S98* byval %arg2) nounwind
|
||||
|
||||
declare void @llvm.va_start(i8*) nounwind
|
||||
|
||||
declare void @llvm.va_end(i8*) nounwind
|
||||
|
||||
declare i32 @main() noreturn
|
||||
|
||||
declare i32 @fflush(%struct._IO_FILE*)
|
||||
|
||||
declare void @abort() noreturn nounwind
|
||||
|
||||
declare void @exit(i32) noreturn nounwind
|
||||
|
||||
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
||||
|
||||
declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind
|
37
test/Transforms/MemCpyOpt/memmove.ll
Normal file
37
test/Transforms/MemCpyOpt/memmove.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | FileCheck %s
|
||||
; These memmoves should get optimized to memcpys.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-apple-darwin9.0"
|
||||
|
||||
declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
|
||||
|
||||
define i8* @test1(i8* nocapture %src) nounwind {
|
||||
entry:
|
||||
; CHECK: @test1
|
||||
; CHECK: call void @llvm.memcpy
|
||||
|
||||
%call3 = malloc [13 x i8] ; <[13 x i8]*> [#uses=1]
|
||||
%call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
|
||||
tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
|
||||
ret i8* %call3.sub
|
||||
}
|
||||
|
||||
define void @test2(i8* %P) nounwind {
|
||||
entry:
|
||||
; CHECK: @test2
|
||||
; CHECK: call void @llvm.memcpy
|
||||
%add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1]
|
||||
tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; This cannot be optimize because the src/dst really do overlap.
|
||||
define void @test3(i8* %P) nounwind {
|
||||
entry:
|
||||
; CHECK: @test3
|
||||
; CHECK: call void @llvm.memmove
|
||||
%add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1]
|
||||
tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user