1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[LoopIdiom] Transform memmove-like loop into memmove (PR46179)

The purpose of patch is to learn Loop idiom recognition pass how to recognize simple memmove patterns
in similar way like GCC: https://godbolt.org/z/fh95e83od
LoopIdiomRecognize already has machinery for memset and memcpy recognition, patch tries to extend exisiting capabilities with minimal effort.

Differential Revision: https://reviews.llvm.org/D104464
This commit is contained in:
Dawid Jurczak 2021-07-22 11:42:59 +02:00
parent fa5973a54d
commit 60d27bc367
3 changed files with 382 additions and 22 deletions

View File

@ -21,7 +21,7 @@
// TODO List:
//
// Future loop memory idioms to recognize:
// memcmp, memmove, strlen, etc.
// memcmp, strlen, etc.
// Future floating point idioms to recognize in -ffast-math mode:
// fpowi
// Future integer operation idioms to recognize:
@ -109,6 +109,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@ -1236,23 +1237,30 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// the return value will read this comment, and leave them alone.
Changed = true;
SmallPtrSet<Instruction *, 1> Stores;
SmallPtrSet<Instruction *, 2> Stores;
Stores.insert(TheStore);
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
<< ore::NV("Inst", InstRemark) << " in "
<< ore::NV("Function", TheStore->getFunction())
<< " function will not be hoisted: "
<< ore::NV("Reason", "The loop may access store location");
});
return Changed;
bool UseMemMove =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores);
if (UseMemMove) {
Stores.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
BECount, StoreSize, *AA, Stores)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
<< ore::NV("Inst", InstRemark) << " in "
<< ore::NV("Function", TheStore->getFunction())
<< " function will not be hoisted: "
<< ore::NV("Reason", "The loop may access store location");
});
return Changed;
}
Stores.erase(TheLoad);
}
const SCEV *LdStart = LoadEv->getStart();
@ -1282,6 +1290,22 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
});
return Changed;
}
if (UseMemMove) {
// Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
// negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
int64_t LoadOff = 0, StoreOff = 0;
const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
int64_t LoadSize =
DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
return Changed;
if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
(NegStride && LoadOff + LoadSize > StoreOff))
return Changed;
}
if (avoidLIRForMultiBlockLoop())
return Changed;
@ -1298,10 +1322,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!TheStore->isAtomic() && !TheLoad->isAtomic())
NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
LoadAlign, NumBytes);
else {
if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
if (UseMemMove)
NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
LoadAlign, NumBytes);
else
NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
LoadAlign, NumBytes);
} else {
// For now don't support unordered atomic memmove.
if (UseMemMove)
return Changed;
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&
@ -1331,7 +1362,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
}
LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *TheLoad
<< "\n"
<< " from store ptr=" << *StoreEv << " at: " << *TheStore
@ -1354,7 +1385,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
deleteDeadInstruction(TheStore);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemCpy;
if (UseMemMove)
++NumMemMove;
else
++NumMemCpy;
ExpCleaner.markResultUsed();
return true;
}

View File

@ -454,3 +454,55 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %for.body
ret void
}
; Make sure that atomic memcpy or memmove don't get recognized by mistake
; when looping with positive stride
define void @test_no_memcpy_memmove1(i8* %Src, i64 %Size) {
; CHECK-LABEL: @test_no_memcpy_memmove1(
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK-NOT: call void @llvm.memmove.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Src, i64 %indvar
%V = load i8, i8* %SrcI, align 1
store atomic i8 %V, i8* %DestI unordered, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Make sure that atomic memcpy or memmove don't get recognized by mistake
; when looping with negative stride
define void @test_no_memcpy_memmove2(i8* %Src, i64 %Size) {
; CHECK-LABEL: @test_no_memcpy_memmove2(
; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK-NOT: call void @llvm.memmove.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
%Step = add nsw i64 %indvar, -1
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
%V = load i8, i8* %SrcI, align 1
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
store atomic i8 %V, i8* %DestI unordered, align 1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}

View File

@ -689,15 +689,23 @@ define void @PR14241(i32* %s, i64 %size) {
;
; CHECK-LABEL: @PR14241(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S1:%.*]] = bitcast i32* [[S:%.*]] to i8*
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[S]], i64 1
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[SIZE]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -8
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 4
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP5]], i1 false)
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR]], align 4
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 0
; CHECK-NEXT: store i32 [[VAL]], i32* [[DST_PTR]], align 4
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
@ -709,8 +717,6 @@ entry:
%end.idx = add i64 %size, -1
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
br label %while.body
; FIXME: When we regain the ability to form a memmove here, this test should be
; reversed and turned into a positive assertion.
while.body:
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
@ -1063,6 +1069,274 @@ exit:
ret void
}
;; Memmove formation.
define void @PR46179_positive_stride(i8* %Src, i64 %Size) {
; CHECK-LABEL: @PR46179_positive_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Src, i64 %indvar
%V = load i8, i8* %SrcI, align 1
store i8 %V, i8* %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Memmove formation.
define void @PR46179_negative_stride(i8* %Src, i64 %Size) {
; CHECK-LABEL: @PR46179_negative_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
%Step = add nsw i64 %indvar, -1
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
%V = load i8, i8* %SrcI, align 1
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
store i8 %V, i8* %DestI, align 1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Do not form memmove from previous store when stride is positive.
define void @do_not_form_memmove1(i8* %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove1(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, -1
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Src, i64 %indvar
%V = load i8, i8* %SrcI, align 1
store i8 %V, i8* %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Do not form memmove from next store when stride is negative.
define void @do_not_form_memmove2(i8* %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove2(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
%V = load i8, i8* %SrcI, align 1
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
store i8 %V, i8* %DestI, align 1
%indvar.next = add nsw i64 %indvar, -1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Do not form memmove when underaligned load is overlapped with store.
define void @do_not_form_memmove3(i32* %s, i64 %size) {
; CHECK-LABEL: @do_not_form_memmove3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[NEXT:%.*]] = bitcast i32* [[PHI_PTR]] to i16*
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr i16, i16* [[NEXT]], i64 1
; CHECK-NEXT: [[SRC_PTR2:%.*]] = bitcast i16* [[SRC_PTR]] to i32*
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR2]], align 2
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr i32, i32* [[PHI_PTR]], i64 0
; CHECK-NEXT: store i32 [[VAL]], i32* [[DST_PTR]], align 4
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr i32, i32* [[PHI_PTR]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%end.idx = add i64 %size, -1
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
br label %while.body
while.body:
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
%next = bitcast i32* %phi.ptr to i16*
%src.ptr = getelementptr i16, i16* %next, i64 1
%src.ptr2 = bitcast i16* %src.ptr to i32*
; below underaligned load is overlapped with store.
%val = load i32, i32* %src.ptr2, align 2
%dst.ptr = getelementptr i32, i32* %phi.ptr, i64 0
store i32 %val, i32* %dst.ptr, align 4
%next.ptr = getelementptr i32, i32* %phi.ptr, i64 1
%cmp = icmp eq i32* %next.ptr, %end.ptr
br i1 %cmp, label %exit, label %while.body
exit:
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
;; FIXME: Do not form memmove from loop body containing memcpy.
define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove4(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Src, i64 %indvar
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Memcpy formation is still preferred over memmove.
define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
; CHECK-LABEL: @prefer_memcpy_over_memmove(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 42
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST:%.*]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 42
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 42
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
%V = load i8, i8* %SrcI, align 1
store i8 %V, i8* %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Validate that "memset_pattern" has the proper attributes.
; CHECK: declare void @memset_pattern16(i8* nocapture writeonly, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]]
; CHECK: [[ATTRS]] = { argmemonly nofree }