2019-05-30 15:02:06 +02:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2020-06-27 05:41:37 +02:00
|
|
|
; RUN: opt -basic-aa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
|
2019-05-30 15:02:06 +02:00
|
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
|
|
|
|
; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
|
|
|
|
|
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like:
```
struct S {
int x;
int y;
char b;
};
unsigned foo(S* __restrict__ a, S* b, int n) {
for (int i = 0; i < n; i++)
a[i] = b[i];
return sizeof(a[0]);
}
```
We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll`
```
%struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr {
entry:
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret i32 12
for.body: ; preds = %for.body, %for.body.preheader
%i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%idxprom = zext i32 %i.08 to i64
%arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
%arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
%0 = bitcast %struct.S* %arrayidx2 to i8*
%1 = bitcast %struct.S* %arrayidx to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false)
%inc = add nuw nsw i32 %i.08, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
```
The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic.
With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass.
```
%struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr {
entry:
%a1 = bitcast %struct.S* %a to i8*
%b2 = bitcast %struct.S* %b to i8*
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
%0 = zext i32 %n to i64
%1 = mul nuw nsw i64 %0, 12
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false)
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret i32 12
for.body: ; preds = %for.body, %for.body.preheader
%i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%idxprom = zext i32 %i.08 to i64
%arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
%arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
%2 = bitcast %struct.S* %arrayidx2 to i8*
%3 = bitcast %struct.S* %arrayidx to i8*
%inc = add nuw nsw i32 %i.08, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
```
Reviewed By: zino
Differential Revision: https://reviews.llvm.org/D97667
2021-02-09 02:24:25 +01:00
|
|
|
; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() intrinsic from load and store instruction in test6_dest_align function
|
2019-05-30 15:02:06 +02:00
|
|
|
|
|
|
|
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
|
|
|
|
; CHECK-LABEL: @test6_dest_align(
|
|
|
|
; CHECK-NEXT: bb.nph:
|
|
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
2019-06-17 12:05:18 +02:00
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2, !dbg !18
|
2019-05-30 15:02:06 +02:00
|
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18
|
|
|
|
; CHECK: for.body:
|
|
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20
|
|
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21
|
|
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22
|
|
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23
|
|
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25
|
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26
|
|
|
|
; CHECK: for.end:
|
|
|
|
; CHECK-NEXT: ret void, !dbg !27
|
|
|
|
;
|
|
|
|
bb.nph:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
|
|
%V = load i32, i32* %I.0.014, align 1
|
|
|
|
store i32 %V, i32* %DestI, align 4
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
|
|
ret void
|
|
|
|
}
|