1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[LoopFusion] Restrict loop fusion to rotated loops.

Summary:
This patch restricts loop fusion to only consider rotated loops as valid candidates.
This simplifies the analysis and transformation and aligns with other loop optimizations.

Reviewers: jdoerfert, Meinersbur, dmgreen, etiotto, Whitney, fhahn, hfinkel

Reviewed By: Meinersbur

Subscribers: ormris, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71025
This commit is contained in:
Kit Barton 2019-12-16 11:57:52 -05:00
parent aabf6bdfe5
commit 9809dd2f91
6 changed files with 580 additions and 630 deletions

View File

@ -91,6 +91,7 @@ STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
STATISTIC(NotRotated, "Candidate is not rotated");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@ -319,6 +320,11 @@ struct FusionCandidate {
return reportInvalidCandidate(NotSimplifiedForm);
}
if (!isRotated()) {
LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
return reportInvalidCandidate(NotRotated);
}
return true;
}

View File

@ -15,60 +15,64 @@
; CHECK: bb20.preheader
; CHECK: ****************************
; CHECK: Loop Fusion complete
define void @non_cfe(i32* noalias %arg) {
define void @non_cfe(i32* noalias %arg, i32 %N) {
bb:
br label %bb5
br label %bb7
bb5: ; preds = %bb14, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ]
%.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb16
bb7: ; preds = %bb5
%tmp = add nsw i32 %.01, -3
%tmp8 = add nuw nsw i64 %indvars.iv2, 3
bb7: ; preds = %bb, %bb14
%.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ]
%tmp = add nsw i32 %.014, -3
%tmp8 = add nuw nsw i64 %indvars.iv23, 3
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = mul nsw i32 %tmp, %tmp9
%tmp11 = trunc i64 %indvars.iv2 to i32
%tmp11 = trunc i64 %indvars.iv23 to i32
%tmp12 = srem i32 %tmp10, %tmp11
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
store i32 %tmp12, i32* %tmp13, align 4
br label %bb14
bb14: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%tmp15 = add nuw nsw i32 %.01, 1
br label %bb5
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
%tmp15 = add nuw nsw i32 %.014, 1
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
br i1 %exitcond4, label %bb7, label %bb34
bb16: ; preds = %bb5
bb34:
%cmp = icmp slt i32 %N, 50
br i1 %cmp, label %bb16, label %bb33
bb16: ; preds = %bb34
%tmp17 = load i32, i32* %arg, align 4
%tmp18 = icmp slt i32 %tmp17, 0
br i1 %tmp18, label %bb20, label %bb33
br i1 %tmp18, label %bb20.preheader, label %bb33
bb20: ; preds = %bb30, %bb16
%indvars.iv = phi i64 [ %indvars.iv.next, %bb30 ], [ 0, %bb16 ]
%.0 = phi i32 [ 0, %bb16 ], [ %tmp31, %bb30 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb22, label %bb33
bb20.preheader: ; preds = %bb16
br label %bb22
bb22: ; preds = %bb20
%tmp23 = add nsw i32 %.0, -3
%tmp24 = add nuw nsw i64 %indvars.iv, 3
bb22: ; preds = %bb20.preheader, %bb30
%.02 = phi i32 [ 0, %bb20.preheader ], [ %tmp31, %bb30 ]
%indvars.iv1 = phi i64 [ 0, %bb20.preheader ], [ %indvars.iv.next, %bb30 ]
%tmp23 = add nsw i32 %.02, -3
%tmp24 = add nuw nsw i64 %indvars.iv1, 3
%tmp25 = trunc i64 %tmp24 to i32
%tmp26 = mul nsw i32 %tmp23, %tmp25
%tmp27 = trunc i64 %indvars.iv to i32
%tmp27 = trunc i64 %indvars.iv1 to i32
%tmp28 = srem i32 %tmp26, %tmp27
%tmp29 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp29 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
store i32 %tmp28, i32* %tmp29, align 4
br label %bb30
bb30: ; preds = %bb22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%tmp31 = add nuw nsw i32 %.0, 1
br label %bb20
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%tmp31 = add nuw nsw i32 %.02, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb22, label %bb33.loopexit
bb33: ; preds = %bb20, %bb16
bb33.loopexit: ; preds = %bb30
br label %bb33
bb33: ; preds = %bb33.loopexit, %bb16, %bb34
ret void
}
@ -88,54 +92,48 @@ bb33: ; preds = %bb20, %bb16
; CHECK: Loop Fusion complete
define void @non_adjacent(i32* noalias %arg) {
bb:
br label %bb3
br label %bb5
bb3: ; preds = %bb11, %bb
%.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%exitcond2 = icmp ne i64 %.01, 100
br i1 %exitcond2, label %bb5, label %bb4
bb4: ; preds = %bb3
bb4: ; preds = %bb11
br label %bb13
bb5: ; preds = %bb3
%tmp = add nsw i64 %.01, -3
%tmp6 = add nuw nsw i64 %.01, 3
bb5: ; preds = %bb, %bb11
%.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%tmp = add nsw i64 %.013, -3
%tmp6 = add nuw nsw i64 %.013, 3
%tmp7 = mul nsw i64 %tmp, %tmp6
%tmp8 = srem i64 %tmp7, %.01
%tmp8 = srem i64 %tmp7, %.013
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01
%tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.013
store i32 %tmp9, i32* %tmp10, align 4
br label %bb11
bb11: ; preds = %bb5
%tmp12 = add nuw nsw i64 %.01, 1
br label %bb3
%tmp12 = add nuw nsw i64 %.013, 1
%exitcond2 = icmp ne i64 %tmp12, 100
br i1 %exitcond2, label %bb5, label %bb4
bb13: ; preds = %bb4
br label %bb14
br label %bb16
bb14: ; preds = %bb23, %bb13
%.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
%exitcond = icmp ne i64 %.0, 100
br i1 %exitcond, label %bb16, label %bb15
bb15: ; preds = %bb14
bb15: ; preds = %bb23
br label %bb25
bb16: ; preds = %bb14
%tmp17 = add nsw i64 %.0, -3
%tmp18 = add nuw nsw i64 %.0, 3
bb16: ; preds = %bb13, %bb23
%.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
%tmp17 = add nsw i64 %.02, -3
%tmp18 = add nuw nsw i64 %.02, 3
%tmp19 = mul nsw i64 %tmp17, %tmp18
%tmp20 = srem i64 %tmp19, %.0
%tmp20 = srem i64 %tmp19, %.02
%tmp21 = trunc i64 %tmp20 to i32
%tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0
%tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02
store i32 %tmp21, i32* %tmp22, align 4
br label %bb23
bb23: ; preds = %bb16
%tmp24 = add nuw nsw i64 %.0, 1
br label %bb14
%tmp24 = add nuw nsw i64 %.02, 1
%exitcond = icmp ne i64 %tmp24, 100
br i1 %exitcond, label %bb16, label %bb15
bb25: ; preds = %bb15
ret void
@ -156,54 +154,48 @@ bb25: ; preds = %bb15
; CHECK: Loop Fusion complete
define void @different_bounds(i32* noalias %arg) {
bb:
br label %bb3
br label %bb5
bb3: ; preds = %bb11, %bb
%.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%exitcond2 = icmp ne i64 %.01, 100
br i1 %exitcond2, label %bb5, label %bb4
bb4: ; preds = %bb3
bb4: ; preds = %bb11
br label %bb13
bb5: ; preds = %bb3
%tmp = add nsw i64 %.01, -3
%tmp6 = add nuw nsw i64 %.01, 3
bb5: ; preds = %bb, %bb11
%.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%tmp = add nsw i64 %.013, -3
%tmp6 = add nuw nsw i64 %.013, 3
%tmp7 = mul nsw i64 %tmp, %tmp6
%tmp8 = srem i64 %tmp7, %.01
%tmp8 = srem i64 %tmp7, %.013
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01
%tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.013
store i32 %tmp9, i32* %tmp10, align 4
br label %bb11
bb11: ; preds = %bb5
%tmp12 = add nuw nsw i64 %.01, 1
br label %bb3
%tmp12 = add nuw nsw i64 %.013, 1
%exitcond2 = icmp ne i64 %tmp12, 100
br i1 %exitcond2, label %bb5, label %bb4
bb13: ; preds = %bb4
br label %bb14
br label %bb16
bb14: ; preds = %bb23, %bb13
%.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
%exitcond = icmp ne i64 %.0, 200
br i1 %exitcond, label %bb16, label %bb15
bb15: ; preds = %bb14
bb15: ; preds = %bb23
br label %bb25
bb16: ; preds = %bb14
%tmp17 = add nsw i64 %.0, -3
%tmp18 = add nuw nsw i64 %.0, 3
bb16: ; preds = %bb13, %bb23
%.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
%tmp17 = add nsw i64 %.02, -3
%tmp18 = add nuw nsw i64 %.02, 3
%tmp19 = mul nsw i64 %tmp17, %tmp18
%tmp20 = srem i64 %tmp19, %.0
%tmp20 = srem i64 %tmp19, %.02
%tmp21 = trunc i64 %tmp20 to i32
%tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0
%tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02
store i32 %tmp21, i32* %tmp22, align 4
br label %bb23
bb23: ; preds = %bb16
%tmp24 = add nuw nsw i64 %.0, 1
br label %bb14
%tmp24 = add nuw nsw i64 %.02, 1
%exitcond = icmp ne i64 %tmp24, 200
br i1 %exitcond, label %bb16, label %bb15
bb25: ; preds = %bb15
ret void
@ -225,41 +217,38 @@ bb25: ; preds = %bb15
; CHECK: Loop Fusion complete
define void @negative_dependence(i32* noalias %arg) {
bb:
br label %bb5
br label %bb7
bb5: ; preds = %bb9, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb11
bb11.preheader: ; preds = %bb9
br label %bb13
bb7: ; preds = %bb5
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp8 = trunc i64 %indvars.iv2 to i32
bb7: ; preds = %bb, %bb9
%indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ]
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22
%tmp8 = trunc i64 %indvars.iv22 to i32
store i32 %tmp8, i32* %tmp, align 4
br label %bb9
bb9: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
br label %bb5
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
br i1 %exitcond4, label %bb7, label %bb11.preheader
bb11: ; preds = %bb18, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb13, label %bb19
bb13: ; preds = %bb11
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
bb13: ; preds = %bb11.preheader, %bb18
%indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv.next
%tmp15 = load i32, i32* %tmp14, align 4
%tmp16 = shl nsw i32 %tmp15, 1
%tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
store i32 %tmp16, i32* %tmp17, align 4
br label %bb18
bb18: ; preds = %bb13
br label %bb11
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb13, label %bb19
bb19: ; preds = %bb11
bb19: ; preds = %bb18
ret void
}
@ -282,41 +271,38 @@ bb19: ; preds = %bb11
; CHECK: Loop Fusion complete
define i32 @sumTest(i32* noalias %arg) {
bb:
br label %bb6
br label %bb9
bb6: ; preds = %bb9, %bb
%indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb9 ], [ 0, %bb ]
%.01 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ]
%exitcond5 = icmp ne i64 %indvars.iv3, 100
br i1 %exitcond5, label %bb9, label %bb13
bb13.preheader: ; preds = %bb9
br label %bb15
bb9: ; preds = %bb6
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv3
bb9: ; preds = %bb, %bb9
%.01.lcssa = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ]
%.013 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ]
%indvars.iv32 = phi i64 [ 0, %bb ], [ %indvars.iv.next4, %bb9 ]
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv32
%tmp10 = load i32, i32* %tmp, align 4
%tmp11 = add nsw i32 %.01, %tmp10
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
br label %bb6
%tmp11 = add nsw i32 %.013, %tmp10
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv32, 1
%exitcond5 = icmp ne i64 %indvars.iv.next4, 100
br i1 %exitcond5, label %bb9, label %bb13.preheader
bb13: ; preds = %bb20, %bb6
%.01.lcssa = phi i32 [ %.01, %bb6 ], [ %.01.lcssa, %bb20 ]
%indvars.iv = phi i64 [ %indvars.iv.next, %bb20 ], [ 0, %bb6 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb15, label %bb14
bb14: ; preds = %bb13
bb14: ; preds = %bb20
br label %bb21
bb15: ; preds = %bb13
%tmp16 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv
bb15: ; preds = %bb13.preheader, %bb20
%indvars.iv1 = phi i64 [ 0, %bb13.preheader ], [ %indvars.iv.next, %bb20 ]
%tmp16 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1
%tmp17 = load i32, i32* %tmp16, align 4
%tmp18 = sdiv i32 %tmp17, %.01.lcssa
%tmp19 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp19 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
store i32 %tmp18, i32* %tmp19, align 4
br label %bb20
bb20: ; preds = %bb15
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb13
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb15, label %bb14
bb21: ; preds = %bb14
ret i32 %.01.lcssa
@ -369,3 +355,62 @@ for.cond.cleanup7: ; preds = %for.body8, %entry
%sum1.0.lcssa36 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body8 ]
ret float %sum1.0.lcssa36
}
; Check that non-rotated loops are not considered for fusion.
; CHECK: Performing Loop Fusion on function notRotated
; CHECK: Loop bb{{.*}} is not rotated!
; CHECK: Loop bb{{.*}} is not rotated!
define void @notRotated(i32* noalias %arg) {
bb:
br label %bb5
bb5: ; preds = %bb14, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ]
%.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb17
bb7: ; preds = %bb5
%tmp = add nsw i32 %.01, -3
%tmp8 = add nuw nsw i64 %indvars.iv2, 3
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = mul nsw i32 %tmp, %tmp9
%tmp11 = trunc i64 %indvars.iv2 to i32
%tmp12 = srem i32 %tmp10, %tmp11
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
store i32 %tmp12, i32* %tmp13, align 4
br label %bb14
bb14: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%tmp15 = add nuw nsw i32 %.01, 1
br label %bb5
bb17: ; preds = %bb27, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ]
%.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb19, label %bb18
bb18: ; preds = %bb17
br label %bb29
bb19: ; preds = %bb17
%tmp20 = add nsw i32 %.0, -3
%tmp21 = add nuw nsw i64 %indvars.iv, 3
%tmp22 = trunc i64 %tmp21 to i32
%tmp23 = mul nsw i32 %tmp20, %tmp22
%tmp24 = trunc i64 %indvars.iv to i32
%tmp25 = srem i32 %tmp23, %tmp24
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %tmp25, i32* %tmp26, align 4
br label %bb27
bb27: ; preds = %bb19
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%tmp28 = add nuw nsw i32 %.0, 1
br label %bb17
bb29: ; preds = %bb18
ret void
}

View File

@ -1,301 +1,269 @@
; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
;
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
define void @non_adjacent(i32* noalias %A) !dbg !67 {
define void @non_adjacent(i32* noalias %A) !dbg !14 {
entry:
br label %for.cond
br label %for.body
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
for.cond.cleanup: ; preds = %for.inc
br label %for.end
for.body: ; preds = %for.cond
%sub = add nsw i64 %i.0, -3
%add = add nuw nsw i64 %i.0, 3
for.body: ; preds = %entry, %for.inc
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%sub = add nsw i64 %i.02, -3
%add = add nuw nsw i64 %i.02, 3
%mul = mul nsw i64 %sub, %add
%rem = srem i64 %mul, %i.0
%rem = srem i64 %mul, %i.02
%conv = trunc i64 %rem to i32
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02
store i32 %conv, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nuw nsw i64 %i.0, 1, !dbg !86
br label %for.cond, !dbg !87, !llvm.loop !88
%inc = add nuw nsw i64 %i.02, 1, !dbg !26
%exitcond1 = icmp ne i64 %inc, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !28
for.end: ; preds = %for.cond.cleanup
br label %for.cond2
br label %for.body6
for.cond2: ; preds = %for.inc13, %for.end
%i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%exitcond = icmp ne i64 %i1.0, 100
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
for.cond.cleanup5: ; preds = %for.cond2
for.cond.cleanup5: ; preds = %for.inc13
br label %for.end15
for.body6: ; preds = %for.cond2
%sub7 = add nsw i64 %i1.0, -3
%add8 = add nuw nsw i64 %i1.0, 3
for.body6: ; preds = %for.end, %for.inc13
%i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%sub7 = add nsw i64 %i1.01, -3
%add8 = add nuw nsw i64 %i1.01, 3
%mul9 = mul nsw i64 %sub7, %add8
%rem10 = srem i64 %mul9, %i1.0
%rem10 = srem i64 %mul9, %i1.01
%conv11 = trunc i64 %rem10 to i32
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.01
store i32 %conv11, i32* %arrayidx12, align 4
br label %for.inc13
for.inc13: ; preds = %for.body6
%inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100
br label %for.cond2, !dbg !101, !llvm.loop !102
%inc14 = add nuw nsw i64 %i1.01, 1, !dbg !31
%exitcond = icmp ne i64 %inc14, 100
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !33
for.end15: ; preds = %for.cond.cleanup5
ret void
}
; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same
define void @different_bounds(i32* noalias %A) !dbg !105 {
define void @different_bounds(i32* noalias %A) !dbg !36 {
entry:
br label %for.cond
br label %for.body
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
for.cond.cleanup: ; preds = %for.inc
br label %for.end
for.body: ; preds = %for.cond
%sub = add nsw i64 %i.0, -3
%add = add nuw nsw i64 %i.0, 3
for.body: ; preds = %entry, %for.inc
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%sub = add nsw i64 %i.02, -3
%add = add nuw nsw i64 %i.02, 3
%mul = mul nsw i64 %sub, %add
%rem = srem i64 %mul, %i.0
%rem = srem i64 %mul, %i.02
%conv = trunc i64 %rem to i32
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02
store i32 %conv, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nuw nsw i64 %i.0, 1, !dbg !123
br label %for.cond, !dbg !124, !llvm.loop !125
%inc = add nuw nsw i64 %i.02, 1, !dbg !43
%exitcond1 = icmp ne i64 %inc, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !45
for.end: ; preds = %for.cond.cleanup
br label %for.cond2
br label %for.body6
for.cond2: ; preds = %for.inc13, %for.end
%i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%exitcond = icmp ne i64 %i1.0, 200
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
for.cond.cleanup5: ; preds = %for.cond2
for.cond.cleanup5: ; preds = %for.inc13
br label %for.end15
for.body6: ; preds = %for.cond2
%sub7 = add nsw i64 %i1.0, -3
%add8 = add nuw nsw i64 %i1.0, 3
for.body6: ; preds = %for.end, %for.inc13
%i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%sub7 = add nsw i64 %i1.01, -3
%add8 = add nuw nsw i64 %i1.01, 3
%mul9 = mul nsw i64 %sub7, %add8
%rem10 = srem i64 %mul9, %i1.0
%rem10 = srem i64 %mul9, %i1.01
%conv11 = trunc i64 %rem10 to i32
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.01
store i32 %conv11, i32* %arrayidx12, align 4
br label %for.inc13
for.inc13: ; preds = %for.body6
%inc14 = add nuw nsw i64 %i1.0, 1
br label %for.cond2, !dbg !138, !llvm.loop !139
%inc14 = add nuw nsw i64 %i1.01, 1
%exitcond = icmp ne i64 %inc14, 200
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !48
for.end15: ; preds = %for.cond.cleanup5
ret void
}
; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader
define void @negative_dependence(i32* noalias %A) !dbg !142 {
define void @negative_dependence(i32* noalias %A) !dbg !51 {
entry:
br label %for.cond
br label %for.body
for.cond: ; preds = %for.inc, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
%exitcond3 = icmp ne i64 %indvars.iv1, 100
br i1 %exitcond3, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
%tmp = trunc i64 %indvars.iv1 to i32
for.body: ; preds = %entry, %for.inc
%indvars.iv13 = phi i64 [ 0, %entry ], [ %indvars.iv.next2, %for.inc ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv13
%tmp = trunc i64 %indvars.iv13 to i32
store i32 %tmp, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond, !dbg !160, !llvm.loop !161
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv13, 1
%exitcond3 = icmp ne i64 %indvars.iv.next2, 100
br i1 %exitcond3, label %for.body, label %for.end, !llvm.loop !58
for.end: ; preds = %for.cond
call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163
br label %for.cond2, !dbg !164
for.end: ; preds = %for.inc
call void @llvm.dbg.value(metadata i32 0, metadata !56, metadata !DIExpression()), !dbg !61
br label %for.body5
for.cond2: ; preds = %for.inc10, %for.end
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.body5: ; preds = %for.cond2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
for.body5: ; preds = %for.end, %for.inc10
%indvars.iv2 = phi i64 [ 0, %for.end ], [ %indvars.iv.next, %for.inc10 ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
%arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
%tmp4 = load i32, i32* %arrayidx7, align 4
%mul = shl nsw i32 %tmp4, 1
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv2
store i32 %mul, i32* %arrayidx9, align 4
br label %for.inc10
for.inc10: ; preds = %for.body5
br label %for.cond2
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.end12: ; preds = %for.cond.
ret void, !dbg !178
for.end12: ; preds = %for.inc10
ret void, !dbg !62
}
; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: entry and for.cond2.preheader: Dependencies prevent fusion
define i32 @sumTest(i32* noalias %A) !dbg !179 {
define i32 @sumTest(i32* noalias %A) !dbg !63 {
entry:
br label %for.cond
br label %for.body
for.cond: ; preds = %for.inc, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
%exitcond3 = icmp ne i64 %indvars.iv1, 100
br i1 %exitcond3, label %for.body, label %for.cond2
for.cond2.preheader: ; preds = %for.inc
br label %for.body5
for.body: ; preds = %for.cond
for.body: ; preds = %entry, %for.inc
%sum.04 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
%indvars.iv13 = phi i64 [ 0, %entry ], [ %indvars.iv.next2, %for.inc ]
br label %for.inc
for.inc: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv13
%tmp = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %sum.0, %tmp
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond, !dbg !199, !llvm.loop !200
%add = add nsw i32 %sum.04, %tmp
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv13, 1
%exitcond3 = icmp ne i64 %indvars.iv.next2, 100
br i1 %exitcond3, label %for.body, label %for.cond2.preheader, !llvm.loop !73
for.cond2: ; preds = %for.inc10, %for.cond
%sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ]
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.body5: ; preds = %for.cond2
%arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
for.body5: ; preds = %for.cond2.preheader, %for.inc10
%indvars.iv2 = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next, %for.inc10 ]
%arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2
%tmp4 = load i32, i32* %arrayidx7, align 4
%div = sdiv i32 %tmp4, %sum.0.lcssa
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%div = sdiv i32 %tmp4, %add
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv2
store i32 %div, i32* %arrayidx9, align 4
br label %for.inc10
for.inc10: ; preds = %for.body5
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond2
%indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.end12: ; preds = %for.cond2
ret i32 %sum.0.lcssa, !dbg !215
for.end12: ; preds = %for.inc10
ret i32 %add, !dbg !76
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
attributes #0 = { nounwind readnone speculatable willreturn }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!11, !12, !13, !14}
!llvm.module.flags = !{!10, !11, !12, !13}
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true)
!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 46, type: !6, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
!3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
!4 = !{}
!5 = !{!0}
!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9)
!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!9 = !{!10}
!10 = !DISubrange(count: 1024)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !{i32 1, !"wchar_size", i32 4}
!14 = !{i32 7, !"PIC Level", i32 2}
!17 = !DISubroutineType(types: !18)
!18 = !{null, !19}
!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20)
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68)
!68 = !{!69, !70, !73}
!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19)
!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72)
!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3)
!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72)
!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3)
!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3)
!80 = !DILocation(line: 18, column: 3, scope: !71)
!86 = !DILocation(line: 18, column: 30, scope: !79)
!87 = !DILocation(line: 18, column: 3, scope: !79)
!88 = distinct !{!88, !80, !89}
!89 = !DILocation(line: 20, column: 3, scope: !71)
!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3)
!94 = !DILocation(line: 22, column: 3, scope: !74)
!100 = !DILocation(line: 22, column: 30, scope: !93)
!101 = !DILocation(line: 22, column: 3, scope: !93)
!102 = distinct !{!102, !94, !103}
!103 = !DILocation(line: 24, column: 3, scope: !74)
!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106)
!106 = !{!107, !108, !110}
!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19)
!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72)
!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3)
!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72)
!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3)
!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3)
!117 = !DILocation(line: 28, column: 3, scope: !109)
!123 = !DILocation(line: 28, column: 30, scope: !116)
!124 = !DILocation(line: 28, column: 3, scope: !116)
!125 = distinct !{!125, !117, !126}
!126 = !DILocation(line: 30, column: 3, scope: !109)
!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3)
!131 = !DILocation(line: 32, column: 3, scope: !111)
!138 = !DILocation(line: 32, column: 3, scope: !130)
!139 = distinct !{!139, !131, !140}
!140 = !DILocation(line: 34, column: 3, scope: !111)
!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143)
!143 = !{!144, !145, !147}
!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19)
!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8)
!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3)
!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8)
!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3)
!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3)
!154 = !DILocation(line: 38, column: 3, scope: !146)
!160 = !DILocation(line: 38, column: 3, scope: !153)
!161 = distinct !{!161, !154, !162}
!162 = !DILocation(line: 40, column: 3, scope: !146)
!163 = !DILocation(line: 0, scope: !148)
!164 = !DILocation(line: 42, column: 8, scope: !148)
!178 = !DILocation(line: 45, column: 1, scope: !142)
!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182)
!180 = !DISubroutineType(types: !181)
!181 = !{!8, !19}
!182 = !{!183, !184, !185, !187}
!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19)
!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8)
!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8)
!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3)
!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8)
!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3)
!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3)
!194 = !DILocation(line: 51, column: 3, scope: !186)
!199 = !DILocation(line: 51, column: 3, scope: !193)
!200 = distinct !{!200, !194, !201}
!201 = !DILocation(line: 52, column: 15, scope: !186)
!215 = !DILocation(line: 57, column: 3, scope: !179)
!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8)
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{!9}
!9 = !DISubrange(count: 1024)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{i32 1, !"wchar_size", i32 4}
!13 = !{i32 7, !"PIC Level", i32 2}
!14 = distinct !DISubprogram(name: "non_adjacent", scope: !3, file: !3, line: 17, type: !15, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
!15 = !DISubroutineType(types: !16)
!16 = !{null, !17}
!17 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !18)
!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64)
!19 = !{!20, !21, !24}
!20 = !DILocalVariable(name: "A", arg: 1, scope: !14, file: !3, line: 17, type: !17)
!21 = !DILocalVariable(name: "i", scope: !22, file: !3, line: 18, type: !23)
!22 = distinct !DILexicalBlock(scope: !14, file: !3, line: 18, column: 3)
!23 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
!24 = !DILocalVariable(name: "i", scope: !25, file: !3, line: 22, type: !23)
!25 = distinct !DILexicalBlock(scope: !14, file: !3, line: 22, column: 3)
!26 = !DILocation(line: 18, column: 30, scope: !27)
!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 18, column: 3)
!28 = distinct !{!28, !29, !30}
!29 = !DILocation(line: 18, column: 3, scope: !22)
!30 = !DILocation(line: 20, column: 3, scope: !22)
!31 = !DILocation(line: 22, column: 30, scope: !32)
!32 = distinct !DILexicalBlock(scope: !25, file: !3, line: 22, column: 3)
!33 = distinct !{!33, !34, !35}
!34 = !DILocation(line: 22, column: 3, scope: !25)
!35 = !DILocation(line: 24, column: 3, scope: !25)
!36 = distinct !DISubprogram(name: "different_bounds", scope: !3, file: !3, line: 27, type: !15, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !37)
!37 = !{!38, !39, !41}
!38 = !DILocalVariable(name: "A", arg: 1, scope: !36, file: !3, line: 27, type: !17)
!39 = !DILocalVariable(name: "i", scope: !40, file: !3, line: 28, type: !23)
!40 = distinct !DILexicalBlock(scope: !36, file: !3, line: 28, column: 3)
!41 = !DILocalVariable(name: "i", scope: !42, file: !3, line: 32, type: !23)
!42 = distinct !DILexicalBlock(scope: !36, file: !3, line: 32, column: 3)
!43 = !DILocation(line: 28, column: 30, scope: !44)
!44 = distinct !DILexicalBlock(scope: !40, file: !3, line: 28, column: 3)
!45 = distinct !{!45, !46, !47}
!46 = !DILocation(line: 28, column: 3, scope: !40)
!47 = !DILocation(line: 30, column: 3, scope: !40)
!48 = distinct !{!48, !49, !50}
!49 = !DILocation(line: 32, column: 3, scope: !42)
!50 = !DILocation(line: 34, column: 3, scope: !42)
!51 = distinct !DISubprogram(name: "negative_dependence", scope: !3, file: !3, line: 37, type: !15, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !52)
!52 = !{!53, !54, !56}
!53 = !DILocalVariable(name: "A", arg: 1, scope: !51, file: !3, line: 37, type: !17)
!54 = !DILocalVariable(name: "i", scope: !55, file: !3, line: 38, type: !7)
!55 = distinct !DILexicalBlock(scope: !51, file: !3, line: 38, column: 3)
!56 = !DILocalVariable(name: "i", scope: !57, file: !3, line: 42, type: !7)
!57 = distinct !DILexicalBlock(scope: !51, file: !3, line: 42, column: 3)
!58 = distinct !{!58, !59, !60}
!59 = !DILocation(line: 38, column: 3, scope: !55)
!60 = !DILocation(line: 40, column: 3, scope: !55)
!61 = !DILocation(line: 0, scope: !57)
!62 = !DILocation(line: 45, column: 1, scope: !51)
!63 = distinct !DISubprogram(name: "sumTest", scope: !3, file: !3, line: 48, type: !64, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !66)
!64 = !DISubroutineType(types: !65)
!65 = !{!7, !17}
!66 = !{!67, !68, !69, !71}
!67 = !DILocalVariable(name: "A", arg: 1, scope: !63, file: !3, line: 48, type: !17)
!68 = !DILocalVariable(name: "sum", scope: !63, file: !3, line: 49, type: !7)
!69 = !DILocalVariable(name: "i", scope: !70, file: !3, line: 51, type: !7)
!70 = distinct !DILexicalBlock(scope: !63, file: !3, line: 51, column: 3)
!71 = !DILocalVariable(name: "i", scope: !72, file: !3, line: 54, type: !7)
!72 = distinct !DILexicalBlock(scope: !63, file: !3, line: 54, column: 3)
!73 = distinct !{!73, !74, !75}
!74 = !DILocation(line: 51, column: 3, scope: !70)
!75 = !DILocation(line: 52, column: 15, scope: !70)
!76 = !DILocation(line: 57, column: 3, scope: !63)

View File

@ -9,127 +9,113 @@
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %exitcond12, label %[[LOOP1BODY:bb[0-9]+]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %exitcond9, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP3PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]]
; CHECK: [[LOOP2BODY]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP3PREHEADER]]
; CHECK: [[LOOP3PREHEADER]]
; CHECK: br i1 %exitcond6, label %[[LOOP3HEADER:bb[0-9]+]], label %[[LOOP4PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP3HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]]
; CHECK: [[LOOP3BODY]]
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
; CHECK: [[LOOP3LATCH]]
; CHECK: br label %[[LOOP4PREHEADER]]
; CHECK: [[LOOP4PREHEADER]]
; CHECK: br i1 %exitcond, label %[[LOOP4HEADER:bb[0-9]+]], label %[[LOOP4EXIT:bb[0-9]+]]
; CHECK: [[LOOP4EXIT]]
; CHECK: br label %[[FUNCEXIT:bb[0-9]+]]
; CHECK: [[LOOP4HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]]
; CHECK: [[LOOP4BODY]]
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
; CHECK: [[LOOP4LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: [[FUNCEXIT]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]]
; CHECK: ret void
define void @dep_free() {
bb:
br label %bb13
br label %bb15
bb13: ; preds = %bb22, %bb
%indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb22 ], [ 0, %bb ]
%.0 = phi i32 [ 0, %bb ], [ %tmp23, %bb22 ]
%exitcond12 = icmp ne i64 %indvars.iv10, 100
br i1 %exitcond12, label %bb15, label %bb25
bb25.preheader: ; preds = %bb22
br label %bb27
bb15: ; preds = %bb13
%tmp = add nsw i32 %.0, -3
%tmp16 = add nuw nsw i64 %indvars.iv10, 3
bb15: ; preds = %bb, %bb22
%.08 = phi i32 [ 0, %bb ], [ %tmp23, %bb22 ]
%indvars.iv107 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb22 ]
%tmp = add nsw i32 %.08, -3
%tmp16 = add nuw nsw i64 %indvars.iv107, 3
%tmp17 = trunc i64 %tmp16 to i32
%tmp18 = mul nsw i32 %tmp, %tmp17
%tmp19 = trunc i64 %indvars.iv10 to i32
%tmp19 = trunc i64 %indvars.iv107 to i32
%tmp20 = srem i32 %tmp18, %tmp19
%tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv10
%tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv107
store i32 %tmp20, i32* %tmp21, align 4
br label %bb22
bb22: ; preds = %bb15
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
%tmp23 = add nuw nsw i32 %.0, 1
br label %bb13
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv107, 1
%tmp23 = add nuw nsw i32 %.08, 1
%exitcond12 = icmp ne i64 %indvars.iv.next11, 100
br i1 %exitcond12, label %bb15, label %bb25.preheader
bb25: ; preds = %bb35, %bb13
%indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb35 ], [ 0, %bb13 ]
%.01 = phi i32 [ 0, %bb13 ], [ %tmp36, %bb35 ]
%exitcond9 = icmp ne i64 %indvars.iv7, 100
br i1 %exitcond9, label %bb27, label %bb38
bb38.preheader: ; preds = %bb35
br label %bb40
bb27: ; preds = %bb25
%tmp28 = add nsw i32 %.01, -3
%tmp29 = add nuw nsw i64 %indvars.iv7, 3
bb27: ; preds = %bb25.preheader, %bb35
%.016 = phi i32 [ 0, %bb25.preheader ], [ %tmp36, %bb35 ]
%indvars.iv75 = phi i64 [ 0, %bb25.preheader ], [ %indvars.iv.next8, %bb35 ]
%tmp28 = add nsw i32 %.016, -3
%tmp29 = add nuw nsw i64 %indvars.iv75, 3
%tmp30 = trunc i64 %tmp29 to i32
%tmp31 = mul nsw i32 %tmp28, %tmp30
%tmp32 = trunc i64 %indvars.iv7 to i32
%tmp32 = trunc i64 %indvars.iv75 to i32
%tmp33 = srem i32 %tmp31, %tmp32
%tmp34 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv7
%tmp34 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv75
store i32 %tmp33, i32* %tmp34, align 4
br label %bb35
bb35: ; preds = %bb27
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
%tmp36 = add nuw nsw i32 %.01, 1
br label %bb25
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv75, 1
%tmp36 = add nuw nsw i32 %.016, 1
%exitcond9 = icmp ne i64 %indvars.iv.next8, 100
br i1 %exitcond9, label %bb27, label %bb38.preheader
bb38: ; preds = %bb48, %bb25
%indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb48 ], [ 0, %bb25 ]
%.02 = phi i32 [ 0, %bb25 ], [ %tmp49, %bb48 ]
%exitcond6 = icmp ne i64 %indvars.iv4, 100
br i1 %exitcond6, label %bb40, label %bb51
bb51.preheader: ; preds = %bb48
br label %bb53
bb40: ; preds = %bb38
%tmp41 = add nsw i32 %.02, -3
%tmp42 = add nuw nsw i64 %indvars.iv4, 3
bb40: ; preds = %bb38.preheader, %bb48
%.024 = phi i32 [ 0, %bb38.preheader ], [ %tmp49, %bb48 ]
%indvars.iv43 = phi i64 [ 0, %bb38.preheader ], [ %indvars.iv.next5, %bb48 ]
%tmp41 = add nsw i32 %.024, -3
%tmp42 = add nuw nsw i64 %indvars.iv43, 3
%tmp43 = trunc i64 %tmp42 to i32
%tmp44 = mul nsw i32 %tmp41, %tmp43
%tmp45 = trunc i64 %indvars.iv4 to i32
%tmp45 = trunc i64 %indvars.iv43 to i32
%tmp46 = srem i32 %tmp44, %tmp45
%tmp47 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv4
%tmp47 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv43
store i32 %tmp46, i32* %tmp47, align 4
br label %bb48
bb48: ; preds = %bb40
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%tmp49 = add nuw nsw i32 %.02, 1
br label %bb38
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv43, 1
%tmp49 = add nuw nsw i32 %.024, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 100
br i1 %exitcond6, label %bb40, label %bb51.preheader
bb51: ; preds = %bb61, %bb38
%indvars.iv = phi i64 [ %indvars.iv.next, %bb61 ], [ 0, %bb38 ]
%.03 = phi i32 [ 0, %bb38 ], [ %tmp62, %bb61 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb53, label %bb52
bb52: ; preds = %bb51
bb52: ; preds = %bb61
br label %bb63
bb53: ; preds = %bb51
%tmp54 = add nsw i32 %.03, -3
%tmp55 = add nuw nsw i64 %indvars.iv, 3
bb53: ; preds = %bb51.preheader, %bb61
%.032 = phi i32 [ 0, %bb51.preheader ], [ %tmp62, %bb61 ]
%indvars.iv1 = phi i64 [ 0, %bb51.preheader ], [ %indvars.iv.next, %bb61 ]
%tmp54 = add nsw i32 %.032, -3
%tmp55 = add nuw nsw i64 %indvars.iv1, 3
%tmp56 = trunc i64 %tmp55 to i32
%tmp57 = mul nsw i32 %tmp54, %tmp56
%tmp58 = trunc i64 %indvars.iv to i32
%tmp58 = trunc i64 %indvars.iv1 to i32
%tmp59 = srem i32 %tmp57, %tmp58
%tmp60 = getelementptr inbounds [1024 x i32], [1024 x i32]* @D, i64 0, i64 %indvars.iv
%tmp60 = getelementptr inbounds [1024 x i32], [1024 x i32]* @D, i64 0, i64 %indvars.iv1
store i32 %tmp59, i32* %tmp60, align 4
br label %bb61
bb61: ; preds = %bb53
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%tmp62 = add nuw nsw i32 %.03, 1
br label %bb51
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%tmp62 = add nuw nsw i32 %.032, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb53, label %bb52
bb63: ; preds = %bb52
ret void

View File

@ -23,24 +23,21 @@
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %exitcond12, label %[[LOOP3PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2HEADER:bb[0-9]+]]
; CHECK: [[LOOP3PREHEADER]]
; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]]
; CHECK: [[LOOP3HEADER]]
; CHECK: br i1 %exitcond9, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
; CHECK: [[LOOP3LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br i1 %exitcond6, label %[[LOOP4PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2EXITBLOCK:bb[0-9]+]]
; CHECK: [[LOOP4PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]]
; CHECK: [[LOOP2EXITBLOCK]]
; CHECK-NEXT: br label %[[FUNCEXIT:bb[0-9]+]]
; CHECK: [[LOOP4HEADER]]
; CHECK: br i1 %exitcond, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
; CHECK: [[LOOP4LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER:bb[0-9]+]]
; CHECK: [[FUNCEXIT]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
; CHECK: ret void
; TODO: The current version of loop fusion does not allow the inner loops to be
@ -48,72 +45,69 @@
; limitations that can be addressed in future improvements to fusion.
define void @dep_free() {
bb:
br label %bb13
br label %bb16
bb13: ; preds = %bb27, %bb
%indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb27 ], [ 0, %bb ]
%.0 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ]
%exitcond12 = icmp ne i64 %indvars.iv10, 100
br i1 %exitcond12, label %bb16, label %bb30
bb16: ; preds = %bb, %bb27
%.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ]
%indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ]
br label %bb18
bb16: ; preds = %bb25, %bb13
%indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb25 ], [ 0, %bb13 ]
%exitcond9 = icmp ne i64 %indvars.iv7, 100
br i1 %exitcond9, label %bb18, label %bb27
bb30: ; preds = %bb27
br label %bb33
bb18: ; preds = %bb16
%tmp = add nsw i32 %.0, -3
%tmp19 = add nuw nsw i64 %indvars.iv10, 3
bb18: ; preds = %bb16, %bb25
%indvars.iv74 = phi i64 [ 0, %bb16 ], [ %indvars.iv.next8, %bb25 ]
%tmp = add nsw i32 %.06, -3
%tmp19 = add nuw nsw i64 %indvars.iv105, 3
%tmp20 = trunc i64 %tmp19 to i32
%tmp21 = mul nsw i32 %tmp, %tmp20
%tmp22 = trunc i64 %indvars.iv10 to i32
%tmp22 = trunc i64 %indvars.iv105 to i32
%tmp23 = srem i32 %tmp21, %tmp22
%tmp24 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv10, i64 %indvars.iv7
%tmp24 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv105, i64 %indvars.iv74
store i32 %tmp23, i32* %tmp24, align 4
br label %bb25
bb25: ; preds = %bb18
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
br label %bb16
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv74, 1
%exitcond9 = icmp ne i64 %indvars.iv.next8, 100
br i1 %exitcond9, label %bb18, label %bb27
bb27: ; preds = %bb16
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
%tmp28 = add nuw nsw i32 %.0, 1
br label %bb13
bb27: ; preds = %bb25
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1
%tmp28 = add nuw nsw i32 %.06, 1
%exitcond12 = icmp ne i64 %indvars.iv.next11, 100
br i1 %exitcond12, label %bb16, label %bb30
bb30: ; preds = %bb45, %bb13
%indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb45 ], [ 0, %bb13 ]
%.02 = phi i32 [ 0, %bb13 ], [ %tmp46, %bb45 ]
%exitcond6 = icmp ne i64 %indvars.iv4, 100
br i1 %exitcond6, label %bb33, label %bb31
bb33: ; preds = %bb30, %bb45
%.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ]
%indvars.iv42 = phi i64 [ 0, %bb30 ], [ %indvars.iv.next5, %bb45 ]
br label %bb35
bb31: ; preds = %bb30
bb31: ; preds = %bb45
br label %bb47
bb33: ; preds = %bb43, %bb30
%indvars.iv = phi i64 [ %indvars.iv.next, %bb43 ], [ 0, %bb30 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb35, label %bb45
bb35: ; preds = %bb33
%tmp36 = add nsw i32 %.02, -3
%tmp37 = add nuw nsw i64 %indvars.iv4, 3
bb35: ; preds = %bb33, %bb43
%indvars.iv1 = phi i64 [ 0, %bb33 ], [ %indvars.iv.next, %bb43 ]
%tmp36 = add nsw i32 %.023, -3
%tmp37 = add nuw nsw i64 %indvars.iv42, 3
%tmp38 = trunc i64 %tmp37 to i32
%tmp39 = mul nsw i32 %tmp36, %tmp38
%tmp40 = trunc i64 %indvars.iv4 to i32
%tmp40 = trunc i64 %indvars.iv42 to i32
%tmp41 = srem i32 %tmp39, %tmp40
%tmp42 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv
%tmp42 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv42, i64 %indvars.iv1
store i32 %tmp41, i32* %tmp42, align 4
br label %bb43
bb43: ; preds = %bb35
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb33
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb35, label %bb45
bb45: ; preds = %bb33
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%tmp46 = add nuw nsw i32 %.02, 1
br label %bb30
bb45: ; preds = %bb43
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv42, 1
%tmp46 = add nuw nsw i32 %.023, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 100
br i1 %exitcond6, label %bb33, label %bb31
bb47: ; preds = %bb31
ret void

View File

@ -6,68 +6,61 @@
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]]
; CHECK: [[LOOP2BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}}
; CHECK: ret void
define void @dep_free(i32* noalias %arg) {
bb:
br label %bb5
br label %bb7
bb5: ; preds = %bb14, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ]
%.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb17
bb7: ; preds = %bb5
%tmp = add nsw i32 %.01, -3
%tmp8 = add nuw nsw i64 %indvars.iv2, 3
bb7: ; preds = %bb, %bb14
%.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ]
%tmp = add nsw i32 %.014, -3
%tmp8 = add nuw nsw i64 %indvars.iv23, 3
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = mul nsw i32 %tmp, %tmp9
%tmp11 = trunc i64 %indvars.iv2 to i32
%tmp11 = trunc i64 %indvars.iv23 to i32
%tmp12 = srem i32 %tmp10, %tmp11
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
store i32 %tmp12, i32* %tmp13, align 4
br label %bb14
bb14: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%tmp15 = add nuw nsw i32 %.01, 1
br label %bb5
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
%tmp15 = add nuw nsw i32 %.014, 1
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
br i1 %exitcond4, label %bb7, label %bb17.preheader
bb17: ; preds = %bb27, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ]
%.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb19, label %bb18
bb17.preheader: ; preds = %bb14
br label %bb19
bb18: ; preds = %bb17
br label %bb29
bb19: ; preds = %bb17
%tmp20 = add nsw i32 %.0, -3
%tmp21 = add nuw nsw i64 %indvars.iv, 3
bb19: ; preds = %bb17.preheader, %bb27
%.02 = phi i32 [ 0, %bb17.preheader ], [ %tmp28, %bb27 ]
%indvars.iv1 = phi i64 [ 0, %bb17.preheader ], [ %indvars.iv.next, %bb27 ]
%tmp20 = add nsw i32 %.02, -3
%tmp21 = add nuw nsw i64 %indvars.iv1, 3
%tmp22 = trunc i64 %tmp21 to i32
%tmp23 = mul nsw i32 %tmp20, %tmp22
%tmp24 = trunc i64 %indvars.iv to i32
%tmp24 = trunc i64 %indvars.iv1 to i32
%tmp25 = srem i32 %tmp23, %tmp24
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
store i32 %tmp25, i32* %tmp26, align 4
br label %bb27
bb27: ; preds = %bb19
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%tmp28 = add nuw nsw i32 %.0, 1
br label %bb17
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%tmp28 = add nuw nsw i32 %.02, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb19, label %bb18
bb18: ; preds = %bb27
br label %bb29
bb29: ; preds = %bb18
ret void
@ -75,64 +68,58 @@ bb29: ; preds = %bb18
; CHECK: void @dep_free_parametric
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9.a-z]*]], label %[[EXITBLOCK:bb[0-9]*]]
; CHECK: [[LOOP1PREHEADER]]
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]]
; CHECK: [[LOOP2BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]]
; CHECK: ret void
define void @dep_free_parametric(i32* noalias %arg, i64 %arg2) {
bb:
br label %bb3
%tmp3 = icmp slt i64 0, %arg2
br i1 %tmp3, label %bb5, label %bb15.preheader
bb3: ; preds = %bb12, %bb
%.01 = phi i64 [ 0, %bb ], [ %tmp13, %bb12 ]
%tmp = icmp slt i64 %.01, %arg2
br i1 %tmp, label %bb5, label %bb15
bb5: ; preds = %bb3
%tmp6 = add nsw i64 %.01, -3
%tmp7 = add nuw nsw i64 %.01, 3
bb5: ; preds = %bb5, %bb12
%.014 = phi i64 [ 0, %bb ], [ %tmp13, %bb12 ]
%tmp6 = add nsw i64 %.014, -3
%tmp7 = add nuw nsw i64 %.014, 3
%tmp8 = mul nsw i64 %tmp6, %tmp7
%tmp9 = srem i64 %tmp8, %.01
%tmp9 = srem i64 %tmp8, %.014
%tmp10 = trunc i64 %tmp9 to i32
%tmp11 = getelementptr inbounds i32, i32* %arg, i64 %.01
%tmp11 = getelementptr inbounds i32, i32* %arg, i64 %.014
store i32 %tmp10, i32* %tmp11, align 4
br label %bb12
bb12: ; preds = %bb5
%tmp13 = add nuw nsw i64 %.01, 1
br label %bb3
%tmp13 = add nuw nsw i64 %.014, 1
%tmp = icmp slt i64 %tmp13, %arg2
br i1 %tmp, label %bb5, label %bb15.preheader
bb15: ; preds = %bb25, %bb3
%.0 = phi i64 [ 0, %bb3 ], [ %tmp26, %bb25 ]
%tmp16 = icmp slt i64 %.0, %arg2
br i1 %tmp16, label %bb18, label %bb17
bb15.preheader: ; preds = %bb12, %bb
%tmp161 = icmp slt i64 0, %arg2
br i1 %tmp161, label %bb18, label %bb27
bb17: ; preds = %bb15
br label %bb27
bb18: ; preds = %bb15
%tmp19 = add nsw i64 %.0, -3
%tmp20 = add nuw nsw i64 %.0, 3
bb18: ; preds = %bb15.preheader, %bb25
%.02 = phi i64 [ 0, %bb15.preheader ], [ %tmp26, %bb25 ]
%tmp19 = add nsw i64 %.02, -3
%tmp20 = add nuw nsw i64 %.02, 3
%tmp21 = mul nsw i64 %tmp19, %tmp20
%tmp22 = srem i64 %tmp21, %.0
%tmp22 = srem i64 %tmp21, %.02
%tmp23 = trunc i64 %tmp22 to i32
%tmp24 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0
%tmp24 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02
store i32 %tmp23, i32* %tmp24, align 4
br label %bb25
bb25: ; preds = %bb18
%tmp26 = add nuw nsw i64 %.0, 1
br label %bb15
%tmp26 = add nuw nsw i64 %.02, 1
%tmp16 = icmp slt i64 %tmp26, %arg2
br i1 %tmp16, label %bb18, label %bb27
bb27: ; preds = %bb17
ret void
@ -142,113 +129,87 @@ bb27: ; preds = %bb17
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]]
; CHECK: [[LOOP2BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}}
; CHECK: ret void
define void @raw_only(i32* noalias %arg) {
bb:
br label %bb5
br label %bb7
bb5: ; preds = %bb9, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb11
bb11.preheader: ; preds = %bb9
br label %bb13
bb7: ; preds = %bb5
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp8 = trunc i64 %indvars.iv2 to i32
bb7: ; preds = %bb, %bb9
%indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ]
%tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22
%tmp8 = trunc i64 %indvars.iv22 to i32
store i32 %tmp8, i32* %tmp, align 4
br label %bb9
bb9: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
br label %bb5
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
br i1 %exitcond4, label %bb7, label %bb11.preheader
bb11: ; preds = %bb18, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb13, label %bb19
bb13: ; preds = %bb11
%tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv
bb13: ; preds = %bb11.preheader, %bb18
%indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ]
%tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1
%tmp15 = load i32, i32* %tmp14, align 4
%tmp16 = shl nsw i32 %tmp15, 1
%tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
store i32 %tmp16, i32* %tmp17, align 4
br label %bb18
bb18: ; preds = %bb13
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb11
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100 br i1 %exitcond, label %bb13, label %bb19
bb19: ; preds = %bb11
bb19: ; preds = %bb18
ret void
}
; CHECK: void @raw_only_parametric
; CHECK-NEXT: bb:
; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9.a-z]*]], label %[[EXITBLOCK:bb[0-9]*]]
; CHECK: [[LOOP1PREHEADER]]
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]]
; CHECK: [[LOOP2BODY]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]]
; CHECK: ret void
define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) {
bb:
br label %bb5
bb5: ; preds = %bb11, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb11 ], [ 0, %bb ]
%tmp = sext i32 %arg4 to i64
%tmp6 = icmp slt i64 %indvars.iv2, %tmp
br i1 %tmp6, label %bb8, label %bb14
%tmp64 = icmp sgt i32 %arg4, 0
br i1 %tmp64, label %bb8, label %bb23
bb8: ; preds = %bb5
%tmp9 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp10 = trunc i64 %indvars.iv2 to i32
bb8: ; preds = %bb, %bb8
%indvars.iv25 = phi i64 [ %indvars.iv.next3, %bb8 ], [ 0, %bb ]
%tmp9 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv25
%tmp10 = trunc i64 %indvars.iv25 to i32
store i32 %tmp10, i32* %tmp9, align 4
br label %bb11
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv25, 1
%tmp6 = icmp slt i64 %indvars.iv.next3, %tmp
br i1 %tmp6, label %bb8, label %bb17
bb11: ; preds = %bb8
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
br label %bb5
bb14: ; preds = %bb22, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb22 ], [ 0, %bb5 ]
%tmp13 = sext i32 %arg4 to i64
%tmp15 = icmp slt i64 %indvars.iv, %tmp13
br i1 %tmp15, label %bb17, label %bb23
bb17: ; preds = %bb14
%tmp18 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv
bb17: ; preds = %bb8, %bb17
%indvars.iv3 = phi i64 [ %indvars.iv.next, %bb17 ], [ 0, %bb8 ]
%tmp18 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv3
%tmp19 = load i32, i32* %tmp18, align 4
%tmp20 = shl nsw i32 %tmp19, 1
%tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
%tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv3
store i32 %tmp20, i32* %tmp21, align 4
br label %bb22
%indvars.iv.next = add nuw nsw i64 %indvars.iv3, 1
%tmp15 = icmp slt i64 %indvars.iv.next, %tmp
br i1 %tmp15, label %bb17, label %bb23
bb22: ; preds = %bb17
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb14
bb23: ; preds = %bb14
bb23: ; preds = %bb17, %bb
ret void
}
@ -256,62 +217,52 @@ bb23: ; preds = %bb14
; CHECK-NEXT: bb:
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP1BODY]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]]
; CHECK: [[LOOP2BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br label %[[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}}
; CHECK: ret void
define void @forward_dep(i32* noalias %arg) {
bb:
br label %bb5
br label %bb7
bb5: ; preds = %bb14, %bb
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ]
%.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%exitcond4 = icmp ne i64 %indvars.iv2, 100
br i1 %exitcond4, label %bb7, label %bb17
bb7: ; preds = %bb5
%tmp = add nsw i32 %.01, -3
%tmp8 = add nuw nsw i64 %indvars.iv2, 3
bb7: ; preds = %bb, %bb14
%.013 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
%indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ]
%tmp = add nsw i32 %.013, -3
%tmp8 = add nuw nsw i64 %indvars.iv22, 3
%tmp9 = trunc i64 %tmp8 to i32
%tmp10 = mul nsw i32 %tmp, %tmp9
%tmp11 = trunc i64 %indvars.iv2 to i32
%tmp11 = trunc i64 %indvars.iv22 to i32
%tmp12 = srem i32 %tmp10, %tmp11
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22
store i32 %tmp12, i32* %tmp13, align 4
br label %bb14
bb14: ; preds = %bb7
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%tmp15 = add nuw nsw i32 %.01, 1
br label %bb5
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1
%tmp15 = add nuw nsw i32 %.013, 1
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
br i1 %exitcond4, label %bb7, label %bb19
bb17: ; preds = %bb25, %bb5
%indvars.iv = phi i64 [ %indvars.iv.next, %bb25 ], [ 0, %bb5 ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %bb19, label %bb26
bb19: ; preds = %bb17
%tmp20 = add nsw i64 %indvars.iv, -3
bb19: ; preds = %bb14, %bb25
%indvars.iv1 = phi i64 [ 0, %bb14 ], [ %indvars.iv.next, %bb25 ]
%tmp20 = add nsw i64 %indvars.iv1, -3
%tmp21 = getelementptr inbounds i32, i32* %arg, i64 %tmp20
%tmp22 = load i32, i32* %tmp21, align 4
%tmp23 = mul nsw i32 %tmp22, 3
%tmp24 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv
%tmp24 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1
store i32 %tmp23, i32* %tmp24, align 4
br label %bb25
bb25: ; preds = %bb19
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb17
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %bb19, label %bb26
bb26: ; preds = %bb17
bb26: ; preds = %bb25
ret void
}