mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[MemCpyOpt] memset->memcpy forwarding with undef tail
Currently memcpyopt optimizes cases like memset(a, byte, N); memcpy(b, a, M); to memset(a, byte, N); memset(b, byte, M); if M <= N. Often this allows further simplifications down the line, which drop the first memset entirely. This patch extends this optimization for the case where M > N, but we know that the bytes a[N..M] are undef due to alloca/lifetime.start. This situation arises relatively often for Rust code, because Rust does not initialize trailing structure padding and loves to insert redundant memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844. For the implementation, I'm reusing a bit of code for a similar existing optimization (direct memcpy of undef). I've also added memset support to MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be used, but it seems to make more sense to add this to GetLocation and thus make the computation cachable. Differential Revision: https://reviews.llvm.org/D55120 llvm-svn: 348645
This commit is contained in:
parent
6fc1e6bf13
commit
31fa5fa3a2
@ -154,6 +154,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
|
|||||||
return ModRefInfo::Mod;
|
return ModRefInfo::Mod;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const MemSetInst *MI = dyn_cast<MemSetInst>(Inst)) {
|
||||||
|
Loc = MemoryLocation::getForDest(MI);
|
||||||
|
// Conversatively assume ModRef for volatile memset.
|
||||||
|
return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod;
|
||||||
|
}
|
||||||
|
|
||||||
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
|
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
|
||||||
switch (II->getIntrinsicID()) {
|
switch (II->getIntrinsicID()) {
|
||||||
case Intrinsic::lifetime_start:
|
case Intrinsic::lifetime_start:
|
||||||
|
@ -1144,6 +1144,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Determine whether the instruction has undefined content for the given Size,
|
||||||
|
/// either because it was freshly alloca'd or started its lifetime.
|
||||||
|
static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
|
||||||
|
if (isa<AllocaInst>(I))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||||
|
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
|
||||||
|
if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
|
||||||
|
if (LTSize->getZExtValue() >= Size->getZExtValue())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Transform memcpy to memset when its source was just memset.
|
/// Transform memcpy to memset when its source was just memset.
|
||||||
/// In other words, turn:
|
/// In other words, turn:
|
||||||
/// \code
|
/// \code
|
||||||
@ -1167,12 +1182,23 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
|
|||||||
if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
|
if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
|
// A known memset size is required.
|
||||||
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
|
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
|
||||||
|
if (!MemSetSize)
|
||||||
|
return false;
|
||||||
|
|
||||||
// Make sure the memcpy doesn't read any more than what the memset wrote.
|
// Make sure the memcpy doesn't read any more than what the memset wrote.
|
||||||
// Don't worry about sizes larger than i64.
|
// Don't worry about sizes larger than i64.
|
||||||
if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
|
ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
|
||||||
return false;
|
if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
|
||||||
|
// If the memcpy is larger than the memset, but the memory was undef prior
|
||||||
|
// to the memset, we can just ignore the tail.
|
||||||
|
MemDepResult DepInfo = MD->getDependency(MemSet);
|
||||||
|
if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
|
||||||
|
CopySize = MemSetSize;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
IRBuilder<> Builder(MemCpy);
|
IRBuilder<> Builder(MemCpy);
|
||||||
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
|
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
|
||||||
@ -1252,19 +1278,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
|
|||||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
||||||
return processMemCpyMemCpyDependence(M, MDep);
|
return processMemCpyMemCpyDependence(M, MDep);
|
||||||
} else if (SrcDepInfo.isDef()) {
|
} else if (SrcDepInfo.isDef()) {
|
||||||
Instruction *I = SrcDepInfo.getInst();
|
if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
|
||||||
bool hasUndefContents = false;
|
|
||||||
|
|
||||||
if (isa<AllocaInst>(I)) {
|
|
||||||
hasUndefContents = true;
|
|
||||||
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
|
||||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
|
|
||||||
if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
|
|
||||||
if (LTSize->getZExtValue() >= CopySize->getZExtValue())
|
|
||||||
hasUndefContents = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasUndefContents) {
|
|
||||||
MD->removeInstruction(M);
|
MD->removeInstruction(M);
|
||||||
M->eraseFromParent();
|
M->eraseFromParent();
|
||||||
++NumMemCpyInstr;
|
++NumMemCpyInstr;
|
||||||
|
@ -12,7 +12,7 @@ define void @test_alloca(i8* %result) {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
|
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
|
||||||
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
||||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%a = alloca %T, align 8
|
%a = alloca %T, align 8
|
||||||
@ -28,7 +28,7 @@ define void @test_alloca_with_lifetimes(i8* %result) {
|
|||||||
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
||||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
|
||||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
@ -46,7 +46,7 @@ define void @test_malloc_with_lifetimes(i8* %result) {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
|
; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
|
||||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
|
||||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
|
||||||
; CHECK-NEXT: call void @free(i8* [[A]])
|
; CHECK-NEXT: call void @free(i8* [[A]])
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
@ -98,7 +98,7 @@ define void @test_volatile_memset(i8* %result) {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
|
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
|
||||||
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
|
||||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%a = alloca %T, align 8
|
%a = alloca %T, align 8
|
||||||
|
Loading…
Reference in New Issue
Block a user