mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Enhance SROA to "promote to scalar" allocas which are
memcpy/memmove'd into or out of. This fixes a serious perf issue that Nate ran into. llvm-svn: 66366
This commit is contained in:
parent
67ccd79e39
commit
54d2292fe5
@ -1356,6 +1356,16 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a memcpy or memmove into or out of the whole allocation, we
|
||||
// can handle it like a load or store of the scalar type.
|
||||
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
|
||||
if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
|
||||
if (Len->getZExtValue() == AllocaSize && Offset == 0) {
|
||||
IsNotTrivial = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore dbg intrinsic.
|
||||
if (isa<DbgInfoIntrinsic>(User))
|
||||
@ -1440,6 +1450,44 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
|
||||
MSI->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
// If this is a memcpy or memmove into or out of the whole allocation, we
|
||||
// can handle it like a load or store of the scalar type.
|
||||
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
|
||||
assert(Offset == 0 && "must be store to start of alloca");
|
||||
|
||||
// If the source and destination are both to the same alloca, then this is
|
||||
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
|
||||
// as appropriate.
|
||||
AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
|
||||
|
||||
if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
|
||||
// Dest must be OrigAI, change this to be a load from the original
|
||||
// pointer (bitcasted), then a store to our new alloca.
|
||||
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
|
||||
Value *SrcPtr = MTI->getSource();
|
||||
SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
|
||||
|
||||
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
|
||||
SrcVal->setAlignment(MTI->getAlignment());
|
||||
Builder.CreateStore(SrcVal, NewAI);
|
||||
} else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
|
||||
// Src must be OrigAI, change this to be a load from NewAI then a store
|
||||
// through the original dest pointer (bitcasted).
|
||||
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
|
||||
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
|
||||
|
||||
Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
|
||||
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
|
||||
NewStore->setAlignment(MTI->getAlignment());
|
||||
} else {
|
||||
// Noop transfer. Src == Dst
|
||||
}
|
||||
|
||||
|
||||
MTI->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
// If user is a dbg info intrinsic then it is safe to remove it.
|
||||
if (isa<DbgInfoIntrinsic>(User)) {
|
||||
|
@ -6,12 +6,11 @@ target triple = "i386-apple-darwin8"
|
||||
|
||||
define void @memtest1(i8* %dst, i8* %src) nounwind {
|
||||
entry:
|
||||
%temp = alloca [100 x i8] ; <[100 x i8]*> [#uses=2]
|
||||
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
||||
%temp1 = bitcast [100 x i8]* %temp to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 )
|
||||
%temp3 = bitcast [100 x i8]* %temp to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 )
|
||||
%temp = alloca [200 x i8] ; <[100 x i8]*> [#uses=2]
|
||||
%temp1 = bitcast [200 x i8]* %temp to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
|
||||
%temp3 = bitcast [200 x i8]* %temp to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
|
15
test/Transforms/ScalarRepl/vector_memcpy.ll
Normal file
15
test/Transforms/ScalarRepl/vector_memcpy.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A}
|
||||
define <16 x float> @foo(<16 x float> %A) nounwind {
|
||||
%tmp = alloca <16 x float>, align 16
|
||||
%tmp2 = alloca <16 x float>, align 16
|
||||
store <16 x float> %A, <16 x float>* %tmp
|
||||
%s = bitcast <16 x float>* %tmp to i8*
|
||||
%s2 = bitcast <16 x float>* %tmp2 to i8*
|
||||
call void @llvm.memcpy.i64(i8* %s2, i8* %s, i64 64, i32 16)
|
||||
|
||||
%R = load <16 x float>* %tmp2
|
||||
ret <16 x float> %R
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
|
||||
|
Loading…
Reference in New Issue
Block a user