1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[LoopIdiom] Don't transform loop into memmove when load from body has more than one use

This change fixes issue found by Markus: https://reviews.llvm.org/rG11338e998df1
Before this patch following code was transformed to memmove:

for (int i = 15; i >= 1; i--) {
  p[i] = p[i-1];
  sum += p[i-1];
}

However load from p[i-1] is used not only by store to p[i] but also by sum computation.
Therefore we cannot emit memmove in loop header.

Differential Revision: https://reviews.llvm.org/D107964

(cherry picked from commit bdcf04246c401aec9bdddf32fabc99fa4834a477)
This commit is contained in:
Dawid Jurczak 2021-08-25 13:13:18 +02:00
parent b61f80921f
commit 79ff6e4b2d
2 changed files with 7 additions and 5 deletions

View File

@ -1247,6 +1247,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores); StoreSize, *AA, Stores);
if (UseMemMove) { if (UseMemMove) {
// For memmove case it's not enough to guarantee that loop doesn't access
// TheStore and TheLoad. Additionally we need to make sure that TheStore is
// the only user of TheLoad.
if (!TheLoad->hasOneUse())
return Changed;
Stores.insert(TheLoad); Stores.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
BECount, StoreSize, *AA, Stores)) { BECount, StoreSize, *AA, Stores)) {

View File

@ -1300,14 +1300,10 @@ for.end: ; preds = %for.body, %entry
ret void ret void
} }
;; FIXME: Do not form memmove when load has more than one use. ;; Do not form memmove when load has more than one use.
define i32 @do_not_form_memmove5(i32* %p) { define i32 @do_not_form_memmove5(i32* %p) {
; CHECK-LABEL: @do_not_form_memmove5( ; CHECK-LABEL: @do_not_form_memmove5(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP1]], i8* align 4 [[P2]], i64 60, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup: ; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
@ -1321,6 +1317,7 @@ define i32 @do_not_form_memmove5(i32* %p) {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[IDXPROM]]
; CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM:%.*]] ; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]