mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
33552ebc41
For a simple loop like: ``` struct S { int x; int y; char b; }; unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++) a[i] = b[i]; return sizeof(a[0]); } ``` We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll` ``` %struct.S = type { i32, i32, i8 } define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry br label %for.body for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12 for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %0 = bitcast %struct.S* %arrayidx2 to i8* %1 = bitcast %struct.S* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit } ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 attributes #0 = { argmemonly nofree nosync nounwind willreturn } ``` The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic. With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass. ``` %struct.S = type { i32, i32, i8 } define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %a1 = bitcast %struct.S* %a to i8* %b2 = bitcast %struct.S* %b to i8* %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry %0 = zext i32 %n to i64 %1 = mul nuw nsw i64 %0, 12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false) br label %for.body for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12 for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %2 = bitcast %struct.S* %arrayidx2 to i8* %3 = bitcast %struct.S* %arrayidx to i8* %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit } ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 attributes #0 = { argmemonly nofree nosync nounwind willreturn } ``` Reviewed By: zino Differential Revision: https://reviews.llvm.org/D97667
52 lines
3.1 KiB
LLVM
52 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -basic-aa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
|
|
|
|
; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() intrinsic from load and store instruction in test6_dest_align function
|
|
|
|
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_dest_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2, !dbg !18
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void, !dbg !27
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
%V = load i32, i32* %I.0.014, align 1
|
|
store i32 %V, i32* %DestI, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|