1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[LoopFusion] Extend use of OptimizationRemarkEmitter

Summary:
This patch extends the use of the OptimizationRemarkEmitter to provide
information about loops that are not fused, and loops that are not eligible for
fusion. In particular, it uses the OptimizationRemarkAnalysis to identify loops
that are not eligible for fusion and the OptimizationRemarkMissed to identify
loops that cannot be fused.

It also reuses the statistics to provide the messages used in the
OptimizationRemarks. This provides common message strings between the
optimization remarks and the statistics.

I would like feedback on this approach, in general. If people are OK with this,
I will flesh out additional remarks in subsequent commits.

Subscribers: hiraditya, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63844

llvm-svn: 367327
This commit is contained in:
Kit Barton 2019-07-30 15:58:43 +00:00
parent 61ac6a1386
commit 89cddbb4c1
3 changed files with 538 additions and 73 deletions

View File

@ -66,7 +66,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-fusion"
STATISTIC(FuseCounter, "Count number of loop fusions performed");
STATISTIC(FuseCounter, "Loops fused");
STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
STATISTIC(InvalidPreheader, "Loop has invalid preheader");
STATISTIC(InvalidHeader, "Loop has invalid header");
@ -79,12 +79,12 @@ STATISTIC(MayThrowException, "Loop may throw an exception");
STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
STATISTIC(InvalidTripCount,
"Loop does not have invariant backedge taken count");
STATISTIC(UnknownTripCount, "Loop has unknown trip count");
STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
STATISTIC(NonAdjacent, "Candidates are not adjacent");
STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
STATISTIC(NonAdjacent, "Loops are not adjacent");
STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@ -151,11 +151,14 @@ struct FusionCandidate {
const DominatorTree *DT;
const PostDominatorTree *PDT;
OptimizationRemarkEmitter &ORE;
FusionCandidate(Loop *L, const DominatorTree *DT,
const PostDominatorTree *PDT)
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT),
ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@ -163,28 +166,28 @@ struct FusionCandidate {
// found, invalidate this object and return.
for (BasicBlock *BB : L->blocks()) {
if (BB->hasAddressTaken()) {
AddressTakenBB++;
invalidate();
reportInvalidCandidate(AddressTakenBB);
return;
}
for (Instruction &I : *BB) {
if (I.mayThrow()) {
MayThrowException++;
invalidate();
reportInvalidCandidate(MayThrowException);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->isVolatile()) {
ContainsVolatileAccess++;
invalidate();
reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->isVolatile()) {
ContainsVolatileAccess++;
invalidate();
reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
@ -227,6 +230,44 @@ struct FusionCandidate {
}
#endif
/// Determine if a fusion candidate (representing a loop) is eligible for
/// fusion. Note that this only checks whether a single loop can be fused - it
/// does not check whether it is *legal* to fuse two loops together.
bool isEligibleForFusion(ScalarEvolution &SE) const {
if (!isValid()) {
LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
if (!Preheader)
++InvalidPreheader;
if (!Header)
++InvalidHeader;
if (!ExitingBlock)
++InvalidExitingBlock;
if (!ExitBlock)
++InvalidExitBlock;
if (!Latch)
++InvalidLatch;
if (L->isInvalid())
++InvalidLoop;
return false;
}
// Require ScalarEvolution to be able to determine a trip count.
if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
LLVM_DEBUG(dbgs() << "Loop " << L->getName()
<< " trip count not computable!\n");
return reportInvalidCandidate(UnknownTripCount);
}
if (!L->isLoopSimplifyForm()) {
LLVM_DEBUG(dbgs() << "Loop " << L->getName()
<< " is not in simplified form!\n");
return reportInvalidCandidate(NotSimplifiedForm);
}
return true;
}
private:
// This is only used internally for now, to clear the MemWrites and MemReads
// list and setting Valid to false. I can't envision other uses of this right
@ -239,6 +280,17 @@ private:
MemReads.clear();
Valid = false;
}
bool reportInvalidCandidate(llvm::Statistic &Stat) const {
using namespace ore;
assert(L && Preheader && "Fusion candidate not initialized properly!");
++Stat;
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
L->getStartLoc(), Preheader)
<< "[" << Preheader->getParent()->getName() << "]: "
<< "Loop is not a candidate for fusion: " << Stat.getDesc());
return false;
}
};
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
@ -391,16 +443,6 @@ static void printLoopVector(const LoopVector &LV) {
}
#endif
static void reportLoopFusion(const FusionCandidate &FC0,
const FusionCandidate &FC1,
OptimizationRemarkEmitter &ORE) {
using namespace ore;
ORE.emit(
OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
<< "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
<< " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
}
struct LoopFuser {
private:
// Sets of control flow equivalent fusion candidates for a given nest level.
@ -506,53 +548,13 @@ private:
return false;
}
/// Determine if a fusion candidate (representing a loop) is eligible for
/// fusion. Note that this only checks whether a single loop can be fused - it
/// does not check whether it is *legal* to fuse two loops together.
bool eligibleForFusion(const FusionCandidate &FC) const {
if (!FC.isValid()) {
LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
if (!FC.Preheader)
InvalidPreheader++;
if (!FC.Header)
InvalidHeader++;
if (!FC.ExitingBlock)
InvalidExitingBlock++;
if (!FC.ExitBlock)
InvalidExitBlock++;
if (!FC.Latch)
InvalidLatch++;
if (FC.L->isInvalid())
InvalidLoop++;
return false;
}
// Require ScalarEvolution to be able to determine a trip count.
if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
<< " trip count not computable!\n");
InvalidTripCount++;
return false;
}
if (!FC.L->isLoopSimplifyForm()) {
LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
<< " is not in simplified form!\n");
NotSimplifiedForm++;
return false;
}
return true;
}
/// Iterate over all loops in the given loop set and identify the loops that
/// are eligible for fusion. Place all eligible fusion candidates into Control
/// Flow Equivalent sets, sorted by dominance.
void collectFusionCandidates(const LoopVector &LV) {
for (Loop *L : LV) {
FusionCandidate CurrCand(L, &DT, &PDT);
if (!eligibleForFusion(CurrCand))
FusionCandidate CurrCand(L, &DT, &PDT, ORE);
if (!CurrCand.isEligibleForFusion(SE))
continue;
// Go through each list in FusionCandidates and determine if L is control
@ -664,14 +666,15 @@ private:
if (!identicalTripCounts(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
"counts. Not fusing.\n");
NonEqualTripCount++;
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEqualTripCount);
continue;
}
if (!isAdjacent(*FC0, *FC1)) {
LLVM_DEBUG(dbgs()
<< "Fusion candidates are not adjacent. Not fusing.\n");
NonAdjacent++;
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
continue;
}
@ -683,12 +686,15 @@ private:
if (!isEmptyPreheader(*FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
"preheader. Not fusing.\n");
NonEmptyPreheader++;
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyPreheader);
continue;
}
if (!dependencesAllowFusion(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
InvalidDependencies);
continue;
}
@ -696,9 +702,11 @@ private:
LLVM_DEBUG(dbgs()
<< "\tFusion appears to be "
<< (BeneficialToFuse ? "" : "un") << "profitable!\n");
if (!BeneficialToFuse)
if (!BeneficialToFuse) {
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
FusionNotBeneficial);
continue;
}
// All analysis has completed and has determined that fusion is legal
// and profitable. At this point, start transforming the code and
// perform fusion.
@ -710,15 +718,14 @@ private:
// Note this needs to be done *before* performFusion because
// performFusion will change the original loops, making it not
// possible to identify them after fusion is complete.
reportLoopFusion(*FC0, *FC1, ORE);
reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
FusedCand.verify();
assert(eligibleForFusion(FusedCand) &&
assert(FusedCand.isEligibleForFusion(SE) &&
"Fused candidate should be eligible for fusion!");
// Notify the loop-depth-tree that these loops are not valid objects
// anymore.
LDT.removeLoop(FC1->L);
CandidateSet.erase(FC0);
@ -1137,6 +1144,33 @@ private:
return FC0.L;
}
/// Report details on loop fusion opportunities.
///
/// This template function can be used to report both successful and missed
/// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
/// be one of:
/// - OptimizationRemarkMissed to report when loop fusion is unsuccessful
/// given two valid fusion candidates.
/// - OptimizationRemark to report successful fusion of two fusion
/// candidates.
/// The remarks will be printed using the form:
/// <path/filename>:<line number>:<column number>: [<function name>]:
/// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
template <typename RemarkKind>
void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
llvm::Statistic &Stat) {
assert(FC0.Preheader && FC1.Preheader &&
"Expecting valid fusion candidates");
using namespace ore;
++Stat;
ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
FC0.Preheader)
<< "[" << FC0.Preheader->getParent()->getName()
<< "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
<< " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
<< ": " << Stat.getDesc());
}
};
struct LoopFuseLegacy : public FunctionPass {

View File

@ -0,0 +1,130 @@
; RUN: opt -S -loop-fusion -pass-remarks-analysis=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
@B = common global [1024 x i32] zeroinitializer, align 16
; CHECK: remark: diagnostics_analysis.c:6:3: [test]: Loop is not a candidate for fusion: Loop contains a volatile access
; CHECK: remark: diagnostics_analysis.c:10:3: [test]: Loop is not a candidate for fusion: Loop has unknown trip count
define void @test(i32* %A, i32 %n) !dbg !15 {
entry:
%A.addr = alloca i32*, align 8
%n.addr = alloca i32, align 4
%i = alloca i32, align 4
%i1 = alloca i32, align 4
store i32* %A, i32** %A.addr, align 8
store i32 %n, i32* %n.addr, align 4
%0 = bitcast i32* %i to i8*
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%1 = load i32, i32* %i, align 4
%2 = load i32, i32* %n.addr, align 4
%cmp = icmp slt i32 %1, %2
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
%3 = bitcast i32* %i to i8*, !dbg !42
br label %for.end
for.body: ; preds = %for.cond
%4 = load i32, i32* %i, align 4
%sub = sub nsw i32 %4, 3
%5 = load i32, i32* %i, align 4
%add = add nsw i32 %5, 3
%mul = mul nsw i32 %sub, %add
%6 = load i32, i32* %i, align 4
%rem = srem i32 %mul, %6
%7 = load i32*, i32** %A.addr, align 8
%8 = load i32, i32* %i, align 4
%idxprom = sext i32 %8 to i64
%arrayidx = getelementptr inbounds i32, i32* %7, i64 %idxprom
store volatile i32 %rem, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%9 = load i32, i32* %i, align 4, !dbg !49
%inc = add nsw i32 %9, 1, !dbg !49
store i32 %inc, i32* %i, align 4, !dbg !49
br label %for.cond, !dbg !42, !llvm.loop !50
for.end: ; preds = %for.cond.cleanup
%10 = bitcast i32* %i1 to i8*
store i32 0, i32* %i1, align 4
br label %for.cond2
for.cond2: ; preds = %for.inc12, %for.end
%11 = load i32, i32* %i1, align 4
%12 = load i32, i32* %n.addr, align 4
%cmp3 = icmp slt i32 %11, %12
br i1 %cmp3, label %for.body5, label %for.cond.cleanup4
for.cond.cleanup4: ; preds = %for.cond2
%13 = bitcast i32* %i1 to i8*
br label %for.end14
for.body5: ; preds = %for.cond2
%14 = load i32, i32* %i1, align 4
%sub6 = sub nsw i32 %14, 3
%15 = load i32, i32* %i1, align 4
%add7 = add nsw i32 %15, 3
%mul8 = mul nsw i32 %sub6, %add7
%16 = load i32, i32* %i1, align 4
%rem9 = srem i32 %mul8, %16
%17 = load i32, i32* %i1, align 4
%idxprom10 = sext i32 %17 to i64
%arrayidx11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %idxprom10
store i32 %rem9, i32* %arrayidx11, align 4
br label %for.inc12
for.inc12: ; preds = %for.body5
%18 = load i32, i32* %i1, align 4
%inc13 = add nsw i32 %18, 1
store i32 %inc13, i32* %i1, align 4
br label %for.cond2, !dbg !59, !llvm.loop !67
for.end14: ; preds = %for.cond.cleanup4
ret void
}
!llvm.module.flags = !{!10, !11, !13}
!llvm.ident = !{!14}
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
!3 = !DIFile(filename: "diagnostics_analysis.c", directory: "/tmp")
!4 = !{}
!5 = !{!0}
!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8)
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{!9}
!9 = !DISubrange(count: 1024)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !{i32 7, !"PIC Level", i32 2}
!14 = !{!"clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)"}
!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !20)
!16 = !DISubroutineType(types: !17)
!17 = !{null, !18, !7}
!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
!19 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
!20 = !{!21, !22, !23, !25}
!21 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !18)
!22 = !DILocalVariable(name: "n", arg: 2, scope: !15, file: !3, line: 5, type: !7)
!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 6, type: !7)
!24 = distinct !DILexicalBlock(scope: !15, file: !3, line: 6, column: 3)
!25 = !DILocalVariable(name: "i", scope: !26, file: !3, line: 10, type: !7)
!26 = distinct !DILexicalBlock(scope: !15, file: !3, line: 10, column: 3)
!38 = distinct !DILexicalBlock(scope: !24, file: !3, line: 6, column: 3)
!41 = !DILocation(line: 6, column: 3, scope: !24)
!42 = !DILocation(line: 6, column: 3, scope: !38)
!44 = distinct !DILexicalBlock(scope: !38, file: !3, line: 6, column: 31)
!49 = !DILocation(line: 6, column: 27, scope: !38)
!50 = distinct !{!50, !41, !51}
!51 = !DILocation(line: 8, column: 3, scope: !24)
!55 = distinct !DILexicalBlock(scope: !26, file: !3, line: 10, column: 3)
!58 = !DILocation(line: 10, column: 3, scope: !26)
!59 = !DILocation(line: 10, column: 3, scope: !55)
!67 = distinct !{!67, !58, !68}
!68 = !DILocation(line: 12, column: 3, scope: !26)
!69 = !DILocation(line: 13, column: 1, scope: !15)

View File

@ -0,0 +1,301 @@
; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
;
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
define void @non_adjacent(i32* noalias %A) !dbg !67 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
br label %for.end
for.body: ; preds = %for.cond
%sub = add nsw i64 %i.0, -3
%add = add nuw nsw i64 %i.0, 3
%mul = mul nsw i64 %sub, %add
%rem = srem i64 %mul, %i.0
%conv = trunc i64 %rem to i32
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
store i32 %conv, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nuw nsw i64 %i.0, 1, !dbg !86
br label %for.cond, !dbg !87, !llvm.loop !88
for.end: ; preds = %for.cond.cleanup
br label %for.cond2
for.cond2: ; preds = %for.inc13, %for.end
%i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%exitcond = icmp ne i64 %i1.0, 100
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
for.cond.cleanup5: ; preds = %for.cond2
br label %for.end15
for.body6: ; preds = %for.cond2
%sub7 = add nsw i64 %i1.0, -3
%add8 = add nuw nsw i64 %i1.0, 3
%mul9 = mul nsw i64 %sub7, %add8
%rem10 = srem i64 %mul9, %i1.0
%conv11 = trunc i64 %rem10 to i32
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
store i32 %conv11, i32* %arrayidx12, align 4
br label %for.inc13
for.inc13: ; preds = %for.body6
%inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100
br label %for.cond2, !dbg !101, !llvm.loop !102
for.end15: ; preds = %for.cond.cleanup5
ret void
}
; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same
define void @different_bounds(i32* noalias %A) !dbg !105 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
br label %for.end
for.body: ; preds = %for.cond
%sub = add nsw i64 %i.0, -3
%add = add nuw nsw i64 %i.0, 3
%mul = mul nsw i64 %sub, %add
%rem = srem i64 %mul, %i.0
%conv = trunc i64 %rem to i32
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
store i32 %conv, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nuw nsw i64 %i.0, 1, !dbg !123
br label %for.cond, !dbg !124, !llvm.loop !125
for.end: ; preds = %for.cond.cleanup
br label %for.cond2
for.cond2: ; preds = %for.inc13, %for.end
%i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
%exitcond = icmp ne i64 %i1.0, 200
br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
for.cond.cleanup5: ; preds = %for.cond2
br label %for.end15
for.body6: ; preds = %for.cond2
%sub7 = add nsw i64 %i1.0, -3
%add8 = add nuw nsw i64 %i1.0, 3
%mul9 = mul nsw i64 %sub7, %add8
%rem10 = srem i64 %mul9, %i1.0
%conv11 = trunc i64 %rem10 to i32
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
store i32 %conv11, i32* %arrayidx12, align 4
br label %for.inc13
for.inc13: ; preds = %for.body6
%inc14 = add nuw nsw i64 %i1.0, 1
br label %for.cond2, !dbg !138, !llvm.loop !139
for.end15: ; preds = %for.cond.cleanup5
ret void
}
; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader
define void @negative_dependence(i32* noalias %A) !dbg !142 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
%exitcond3 = icmp ne i64 %indvars.iv1, 100
br i1 %exitcond3, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
%tmp = trunc i64 %indvars.iv1 to i32
store i32 %tmp, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond, !dbg !160, !llvm.loop !161
for.end: ; preds = %for.cond
call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163
br label %for.cond2, !dbg !164
for.cond2: ; preds = %for.inc10, %for.end
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.body5: ; preds = %for.cond2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
%tmp4 = load i32, i32* %arrayidx7, align 4
%mul = shl nsw i32 %tmp4, 1
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %mul, i32* %arrayidx9, align 4
br label %for.inc10
for.inc10: ; preds = %for.body5
br label %for.cond2
for.end12: ; preds = %for.cond.
ret void, !dbg !178
}
; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: entry and for.cond2.preheader: Dependencies prevent fusion
define i32 @sumTest(i32* noalias %A) !dbg !179 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
%exitcond3 = icmp ne i64 %indvars.iv1, 100
br i1 %exitcond3, label %for.body, label %for.cond2
for.body: ; preds = %for.cond
br label %for.inc
for.inc: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
%tmp = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %sum.0, %tmp
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond, !dbg !199, !llvm.loop !200
for.cond2: ; preds = %for.inc10, %for.cond
%sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ]
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body5, label %for.end12
for.body5: ; preds = %for.cond2
%arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp4 = load i32, i32* %arrayidx7, align 4
%div = sdiv i32 %tmp4, %sum.0.lcssa
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %div, i32* %arrayidx9, align 4
br label %for.inc10
for.inc10: ; preds = %for.body5
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond2
for.end12: ; preds = %for.cond2
ret i32 %sum.0.lcssa, !dbg !215
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!11, !12, !13, !14}
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
!3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
!4 = !{}
!5 = !{!0}
!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9)
!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!9 = !{!10}
!10 = !DISubrange(count: 1024)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !{i32 1, !"wchar_size", i32 4}
!14 = !{i32 7, !"PIC Level", i32 2}
!17 = !DISubroutineType(types: !18)
!18 = !{null, !19}
!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20)
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68)
!68 = !{!69, !70, !73}
!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19)
!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72)
!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3)
!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72)
!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3)
!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3)
!80 = !DILocation(line: 18, column: 3, scope: !71)
!86 = !DILocation(line: 18, column: 30, scope: !79)
!87 = !DILocation(line: 18, column: 3, scope: !79)
!88 = distinct !{!88, !80, !89}
!89 = !DILocation(line: 20, column: 3, scope: !71)
!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3)
!94 = !DILocation(line: 22, column: 3, scope: !74)
!100 = !DILocation(line: 22, column: 30, scope: !93)
!101 = !DILocation(line: 22, column: 3, scope: !93)
!102 = distinct !{!102, !94, !103}
!103 = !DILocation(line: 24, column: 3, scope: !74)
!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106)
!106 = !{!107, !108, !110}
!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19)
!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72)
!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3)
!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72)
!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3)
!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3)
!117 = !DILocation(line: 28, column: 3, scope: !109)
!123 = !DILocation(line: 28, column: 30, scope: !116)
!124 = !DILocation(line: 28, column: 3, scope: !116)
!125 = distinct !{!125, !117, !126}
!126 = !DILocation(line: 30, column: 3, scope: !109)
!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3)
!131 = !DILocation(line: 32, column: 3, scope: !111)
!138 = !DILocation(line: 32, column: 3, scope: !130)
!139 = distinct !{!139, !131, !140}
!140 = !DILocation(line: 34, column: 3, scope: !111)
!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143)
!143 = !{!144, !145, !147}
!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19)
!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8)
!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3)
!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8)
!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3)
!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3)
!154 = !DILocation(line: 38, column: 3, scope: !146)
!160 = !DILocation(line: 38, column: 3, scope: !153)
!161 = distinct !{!161, !154, !162}
!162 = !DILocation(line: 40, column: 3, scope: !146)
!163 = !DILocation(line: 0, scope: !148)
!164 = !DILocation(line: 42, column: 8, scope: !148)
!178 = !DILocation(line: 45, column: 1, scope: !142)
!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182)
!180 = !DISubroutineType(types: !181)
!181 = !{!8, !19}
!182 = !{!183, !184, !185, !187}
!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19)
!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8)
!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8)
!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3)
!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8)
!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3)
!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3)
!194 = !DILocation(line: 51, column: 3, scope: !186)
!199 = !DILocation(line: 51, column: 3, scope: !193)
!200 = distinct !{!200, !194, !201}
!201 = !DILocation(line: 52, column: 15, scope: !186)
!215 = !DILocation(line: 57, column: 3, scope: !179)