1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[MemCpyOpt] Use dereferenceable pointer helper

The call slot optimization has some home-grown code for checking
whether the destination is dereferenceable. Replace this with the
generic isDereferenceableAndAlignedPointer() helper.

I'm not checking alignment here, because that is currently handled
separately and may be an enforced alignment for allocas. The clean
way of integrating that part would probably be to accept a callback
in isDereferenceableAndAlignedPointer() for the actual isAligned check,
which would then have a chance to use an enforced alignment instead.

This allows the destination to be a GEP (among other things), though
the two open TODOs may prevent it from working in practice.

Differential Revision: https://reviews.llvm.org/D88805
This commit is contained in:
Nikita Popov 2020-10-04 21:03:02 +02:00
parent c845089f3a
commit a7e1d4a9e3
2 changed files with 8 additions and 35 deletions

View File

@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
@ -788,40 +789,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
// TODO: Use isDereferenceablePointer() API instead.
if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
// The destination is an alloca. Check it is larger than srcSize.
ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
if (!destArraySize)
return false;
uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();
if (destSize < srcSize)
return false;
} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
if (A->getDereferenceableBytes() < srcSize) {
// If the destination is an sret parameter then only accesses that are
// outside of the returned struct type can trap.
if (!A->hasStructRetAttr())
return false;
Type *StructTy = A->getParamStructRetType();
if (!StructTy->isSized()) {
// The call may never return and hence the copy-instruction may never
// be executed, and therefore it's not safe to say "the destination
// has at least <cpyLen> bytes, as implied by the copy-instruction",
return false;
}
uint64_t destSize = DL.getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
}
} else {
if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
DL, C, DT))
return false;
}
// Make sure that nothing can observe cpyDest being written early. There are
// a number of cases to consider:
@ -837,6 +807,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// guaranteed to be executed if C is. As it is a non-atomic access, it
// renders accesses from other threads undefined.
// TODO: This is currently not checked.
// TODO: Check underlying object, so we can look through GEPs.
if (!isa<AllocaInst>(cpyDest)) {
assert(C->getParent() == cpyStore->getParent() &&
"call and copy must be in the same block");
@ -893,6 +864,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
// TODO: Support moving instructions like GEPs upwards.
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT->dominates(cpyDestInst, C))
return false;

View File

@ -110,8 +110,9 @@ define void @dest_is_gep_nounwind_call() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1
; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8*
; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8
; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3:#.*]]
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false)
; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]*
; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8*
; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3:#.*]]
; CHECK-NEXT: ret void
;
%dest = alloca [16 x i8]