mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[MemCpyOpt] Allow variable lengths in memcpy optimizer
This makes the memcpy-memcpy and memcpy-memset optimizations work for variable sizes as long as they are equal, relaxing the old restriction that they are constant integers. If they're not equal, the old requirement that they are constant integers with certain size restrictions is used. The implementation works by pushing the length tests further down in the code, which reveals some places where it's enough that the lengths are equal (but not necessarily constant). Differential Revision: https://reviews.llvm.org/D100870
This commit is contained in:
parent
8a2dd1008a
commit
16a7ccaf6b
@ -1050,10 +1050,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
|
||||
|
||||
// Second, the length of the memcpy's must be the same, or the preceding one
|
||||
// must be larger than the following one.
|
||||
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
|
||||
return false;
|
||||
if (MDep->getLength() != M->getLength()) {
|
||||
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the two
|
||||
// transfers. For example, in:
|
||||
@ -1229,21 +1231,23 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
|
||||
|
||||
/// Determine whether the instruction has undefined content for the given Size,
|
||||
/// either because it was freshly alloca'd or started its lifetime.
|
||||
static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
|
||||
static bool hasUndefContents(Instruction *I, Value *Size) {
|
||||
if (isa<AllocaInst>(I))
|
||||
return true;
|
||||
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
|
||||
if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
|
||||
if (LTSize->getZExtValue() >= Size->getZExtValue())
|
||||
return true;
|
||||
if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
|
||||
if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
|
||||
if (LTSize->getZExtValue() >= CSize->getZExtValue())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
|
||||
MemoryDef *Def, ConstantInt *Size) {
|
||||
MemoryDef *Def, Value *Size) {
|
||||
if (MSSA->isLiveOnEntryDef(Def))
|
||||
return isa<AllocaInst>(getUnderlyingObject(V));
|
||||
|
||||
@ -1251,14 +1255,17 @@ static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
|
||||
dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
|
||||
ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
|
||||
if (AA->isMustAlias(V, II->getArgOperand(1)) &&
|
||||
LTSize->getZExtValue() >= Size->getZExtValue())
|
||||
return true;
|
||||
|
||||
// If the lifetime.start covers a whole alloca (as it almost always does)
|
||||
// and we're querying a pointer based on that alloca, then we know the
|
||||
// memory is definitely undef, regardless of how exactly we alias. The
|
||||
// size also doesn't matter, as an out-of-bounds access would be UB.
|
||||
if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
|
||||
if (AA->isMustAlias(V, II->getArgOperand(1)) &&
|
||||
LTSize->getZExtValue() >= CSize->getZExtValue())
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the lifetime.start covers a whole alloca (as it almost always
|
||||
// does) and we're querying a pointer based on that alloca, then we know
|
||||
// the memory is definitely undef, regardless of how exactly we alias.
|
||||
// The size also doesn't matter, as an out-of-bounds access would be UB.
|
||||
AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
|
||||
if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
|
||||
DataLayout DL = Alloca->getModule()->getDataLayout();
|
||||
@ -1284,8 +1291,6 @@ static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
|
||||
/// memset(dst2, c, dst2_size);
|
||||
/// \endcode
|
||||
/// When dst2_size <= dst1_size.
|
||||
///
|
||||
/// The \p MemCpy must have a Constant length.
|
||||
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
|
||||
MemSetInst *MemSet) {
|
||||
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
|
||||
@ -1293,38 +1298,47 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
|
||||
if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
|
||||
return false;
|
||||
|
||||
// A known memset size is required.
|
||||
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
|
||||
if (!MemSetSize)
|
||||
return false;
|
||||
Value *MemSetSize = MemSet->getLength();
|
||||
Value *CopySize = MemCpy->getLength();
|
||||
|
||||
// Make sure the memcpy doesn't read any more than what the memset wrote.
|
||||
// Don't worry about sizes larger than i64.
|
||||
ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
|
||||
if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
|
||||
// If the memcpy is larger than the memset, but the memory was undef prior
|
||||
// to the memset, we can just ignore the tail. Technically we're only
|
||||
// interested in the bytes from MemSetSize..CopySize here, but as we can't
|
||||
// easily represent this location, we use the full 0..CopySize range.
|
||||
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
|
||||
bool CanReduceSize = false;
|
||||
if (EnableMemorySSA) {
|
||||
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
|
||||
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
|
||||
MemSetAccess->getDefiningAccess(), MemCpyLoc);
|
||||
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
|
||||
if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
|
||||
CanReduceSize = true;
|
||||
} else {
|
||||
MemDepResult DepInfo = MD->getPointerDependencyFrom(
|
||||
MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
|
||||
if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
|
||||
CanReduceSize = true;
|
||||
}
|
||||
if (MemSetSize != CopySize) {
|
||||
// Make sure the memcpy doesn't read any more than what the memset wrote.
|
||||
// Don't worry about sizes larger than i64.
|
||||
|
||||
if (!CanReduceSize)
|
||||
// A known memset size is required.
|
||||
ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
|
||||
if (!CMemSetSize)
|
||||
return false;
|
||||
CopySize = MemSetSize;
|
||||
|
||||
// A known memcpy size is also required.
|
||||
ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
|
||||
if (!CCopySize)
|
||||
return false;
|
||||
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
|
||||
// If the memcpy is larger than the memset, but the memory was undef prior
|
||||
// to the memset, we can just ignore the tail. Technically we're only
|
||||
// interested in the bytes from MemSetSize..CopySize here, but as we can't
|
||||
// easily represent this location, we use the full 0..CopySize range.
|
||||
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
|
||||
bool CanReduceSize = false;
|
||||
if (EnableMemorySSA) {
|
||||
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
|
||||
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
|
||||
MemSetAccess->getDefiningAccess(), MemCpyLoc);
|
||||
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
|
||||
if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
|
||||
CanReduceSize = true;
|
||||
} else {
|
||||
MemDepResult DepInfo = MD->getPointerDependencyFrom(
|
||||
MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
|
||||
if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
|
||||
CanReduceSize = true;
|
||||
}
|
||||
|
||||
if (!CanReduceSize)
|
||||
return false;
|
||||
CopySize = MemSetSize;
|
||||
}
|
||||
}
|
||||
|
||||
IRBuilder<> Builder(MemCpy);
|
||||
@ -1396,10 +1410,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
if (processMemSetMemCpyDependence(M, MDep))
|
||||
return true;
|
||||
|
||||
// The optimizations after this point require the memcpy size.
|
||||
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!CopySize) return false;
|
||||
|
||||
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
|
||||
AnyClobber, MemoryLocation::getForSource(M));
|
||||
|
||||
@ -1412,26 +1422,29 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
// d) memcpy from a just-memset'd source can be turned into memset.
|
||||
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
|
||||
if (Instruction *MI = MD->getMemoryInst()) {
|
||||
if (auto *C = dyn_cast<CallInst>(MI)) {
|
||||
// The memcpy must post-dom the call. Limit to the same block for now.
|
||||
// Additionally, we need to ensure that there are no accesses to dest
|
||||
// between the call and the memcpy. Accesses to src will be checked
|
||||
// by performCallSlotOptzn().
|
||||
// TODO: Support non-local call-slot optimization?
|
||||
if (C->getParent() == M->getParent() &&
|
||||
!accessedBetween(*AA, DestLoc, MD, MA)) {
|
||||
// FIXME: Can we pass in either of dest/src alignment here instead
|
||||
// of conservatively taking the minimum?
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment, C)) {
|
||||
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
|
||||
<< " call: " << *C << "\n"
|
||||
<< " memcpy: " << *M << "\n");
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
|
||||
if (auto *C = dyn_cast<CallInst>(MI)) {
|
||||
// The memcpy must post-dom the call. Limit to the same block for
|
||||
// now. Additionally, we need to ensure that there are no accesses
|
||||
// to dest between the call and the memcpy. Accesses to src will be
|
||||
// checked by performCallSlotOptzn().
|
||||
// TODO: Support non-local call-slot optimization?
|
||||
if (C->getParent() == M->getParent() &&
|
||||
!accessedBetween(*AA, DestLoc, MD, MA)) {
|
||||
// FIXME: Can we pass in either of dest/src alignment here instead
|
||||
// of conservatively taking the minimum?
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment,
|
||||
C)) {
|
||||
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
|
||||
<< " call: " << *C << "\n"
|
||||
<< " memcpy: " << *M << "\n");
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1447,7 +1460,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
}
|
||||
}
|
||||
|
||||
if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
|
||||
if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
|
||||
LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
@ -1464,10 +1477,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
if (processMemSetMemCpyDependence(M, MDep))
|
||||
return true;
|
||||
|
||||
// The optimizations after this point require the memcpy size.
|
||||
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!CopySize) return false;
|
||||
|
||||
// There are four possible optimizations we can do for memcpy:
|
||||
// a) memcpy-memcpy xform which exposes redundance for DSE.
|
||||
// b) call-memcpy xform for return slot optimization.
|
||||
@ -1475,17 +1484,19 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
// its lifetime copies undefined data, and we can therefore eliminate
|
||||
// the memcpy in favor of the data that was already at the destination.
|
||||
// d) memcpy from a just-memset'd source can be turned into memset.
|
||||
if (DepInfo.isClobber()) {
|
||||
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
|
||||
// FIXME: Can we pass in either of dest/src alignment here instead
|
||||
// of conservatively taking the minimum?
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment, C)) {
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
|
||||
if (DepInfo.isClobber()) {
|
||||
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
|
||||
// FIXME: Can we pass in either of dest/src alignment here instead
|
||||
// of conservatively taking the minimum?
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment, C)) {
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1498,7 +1509,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep);
|
||||
} else if (SrcDepInfo.isDef()) {
|
||||
if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
|
||||
if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
|
39
test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
Normal file
39
test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @test(i8* %src, i64 %size) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE]], i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC]], i64 [[SIZE]], i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%tmp = alloca i8, i64 %size
|
||||
%dst = alloca i8, i64 %size
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Differing sizes, so left as it is.
|
||||
define void @negative_test(i8* %src, i64 %size1, i64 %size2) {
|
||||
; CHECK-LABEL: @negative_test(
|
||||
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE1]], i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[TMP]], i64 [[SIZE2]], i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%tmp = alloca i8, i64 %size1
|
||||
%dst = alloca i8, i64 %size2
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size2, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
|
32
test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll
Normal file
32
test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @test(i64 %size) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%src = alloca i8, i64 %size
|
||||
%dst = alloca i8, i64 %size
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %size, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2(i64 %size1, i64 %size2, i64 %cpy_size) {
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%src = alloca i8, i64 %size1
|
||||
%dst = alloca i8, i64 %size2
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %cpy_size, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
|
40
test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
Normal file
40
test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
|
||||
; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @test(i8* %src, i8 %c, i64 %size) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE]], align 1
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE]], i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2]], i8 [[C]], i64 [[SIZE]], i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%dst1 = alloca i8, i64 %size
|
||||
%dst2 = alloca i8, i64 %size
|
||||
call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Differing sizes, so left as it is.
|
||||
define void @negative_test(i8* %src, i8 %c, i64 %size1, i64 %size2) {
|
||||
; CHECK-LABEL: @negative_test(
|
||||
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
|
||||
; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST2]], i8* align 8 [[DST1]], i64 [[SIZE2]], i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%dst1 = alloca i8, i64 %size1
|
||||
%dst2 = alloca i8, i64 %size2
|
||||
call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size2, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
|
Loading…
x
Reference in New Issue
Block a user