mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
f5c6703ef6
Summary: This is part 1 of fixes to address the problems described in https://llvm.org/bugs/show_bug.cgi?id=22719. The restriction to limit loop scales to 4,096 does not really prevent overflows anymore, as the underlying algorithm has changed and does not seem to suffer from this problem. Additionally, artificially restricting loop scales to such a low number skews frequency information, making loops of equal hotness appear to have very different hotness properties. The only loops that are artificially restricted to a scale of 4096 are infinite loops (those loops with an exit mass of 0). This prevents infinite loops from skewing the frequencies of other regions in the CFG. At the end of propagation, frequencies are scaled to values that take no more than 64 bits to represent. When the range of frequencies to be represented fits within 61 bits, it pushes up the scaling factor to a minimum of 8 to better distinguish small frequency values. Otherwise, small frequency values are all saturated down at 1. Tested on x86_64. Reviewers: dexonsmith Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8718 llvm-svn: 233826
205 lines
6.3 KiB
LLVM
205 lines
6.3 KiB
LLVM
; RUN: opt < %s -analyze -block-freq | FileCheck %s
|
|
|
|
; This code contains three loops. One is triple-nested, the
|
|
; second is double nested and the third is a single loop. At
|
|
; runtime, all three loops execute 1,000,000 times each. We use to
|
|
; give different frequencies to each of the loops because loop
|
|
; scales were limited to no more than 4,096.
|
|
;
|
|
; This was penalizing the hotness of the second and third loops
|
|
; because BFI was reducing the loop scale for for.cond16 and
|
|
; for.cond26 to a max of 4,096.
|
|
;
|
|
; Without this restriction, all loops are now correctly given the same
|
|
; frequency values.
|
|
;
|
|
; Original C code:
|
|
;
|
|
;
|
|
; int g;
|
|
; __attribute__((noinline)) void bar() {
|
|
; g++;
|
|
; }
|
|
;
|
|
; extern int printf(const char*, ...);
|
|
;
|
|
; int main()
|
|
; {
|
|
; int i, j, k;
|
|
;
|
|
; g = 0;
|
|
; for (i = 0; i < 100; i++)
|
|
; for (j = 0; j < 100; j++)
|
|
; for (k = 0; k < 100; k++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
;
|
|
; for (i = 0; i < 100; i++)
|
|
; for (j = 0; j < 10000; j++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
;
|
|
;
|
|
; for (i = 0; i < 1000000; i++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
; }
|
|
|
|
@g = common global i32 0, align 4
|
|
@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
|
|
|
|
declare void @bar()
|
|
declare i32 @printf(i8*, ...)
|
|
|
|
; CHECK: Printing analysis {{.*}} for function 'main':
|
|
; CHECK-NEXT: block-frequency-info: main
|
|
define i32 @main() {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
%j = alloca i32, align 4
|
|
%k = alloca i32, align 4
|
|
store i32 0, i32* %retval
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc10, %entry
|
|
%0 = load i32, i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end12, !prof !1
|
|
|
|
for.body: ; preds = %for.cond
|
|
store i32 0, i32* %j, align 4
|
|
br label %for.cond1
|
|
|
|
for.cond1: ; preds = %for.inc7, %for.body
|
|
%1 = load i32, i32* %j, align 4
|
|
%cmp2 = icmp slt i32 %1, 100
|
|
br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
|
|
|
|
for.body3: ; preds = %for.cond1
|
|
store i32 0, i32* %k, align 4
|
|
br label %for.cond4
|
|
|
|
for.cond4: ; preds = %for.inc, %for.body3
|
|
%2 = load i32, i32* %k, align 4
|
|
%cmp5 = icmp slt i32 %2, 100
|
|
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
|
|
|
|
; CHECK: - for.body6: float = 500000.5, int = 4000003
|
|
for.body6: ; preds = %for.cond4
|
|
call void @bar()
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body6
|
|
%3 = load i32, i32* %k, align 4
|
|
%inc = add nsw i32 %3, 1
|
|
store i32 %inc, i32* %k, align 4
|
|
br label %for.cond4
|
|
|
|
for.end: ; preds = %for.cond4
|
|
br label %for.inc7
|
|
|
|
for.inc7: ; preds = %for.end
|
|
%4 = load i32, i32* %j, align 4
|
|
%inc8 = add nsw i32 %4, 1
|
|
store i32 %inc8, i32* %j, align 4
|
|
br label %for.cond1
|
|
|
|
for.end9: ; preds = %for.cond1
|
|
br label %for.inc10
|
|
|
|
for.inc10: ; preds = %for.end9
|
|
%5 = load i32, i32* %i, align 4
|
|
%inc11 = add nsw i32 %5, 1
|
|
store i32 %inc11, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end12: ; preds = %for.cond
|
|
%6 = load i32, i32* @g, align 4
|
|
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond13
|
|
|
|
for.cond13: ; preds = %for.inc22, %for.end12
|
|
%7 = load i32, i32* %i, align 4
|
|
%cmp14 = icmp slt i32 %7, 100
|
|
br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
|
|
|
|
for.body15: ; preds = %for.cond13
|
|
store i32 0, i32* %j, align 4
|
|
br label %for.cond16
|
|
|
|
for.cond16: ; preds = %for.inc19, %for.body15
|
|
%8 = load i32, i32* %j, align 4
|
|
%cmp17 = icmp slt i32 %8, 10000
|
|
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
|
|
|
|
; CHECK: - for.body18: float = 500000.5, int = 4000003
|
|
for.body18: ; preds = %for.cond16
|
|
call void @bar()
|
|
br label %for.inc19
|
|
|
|
for.inc19: ; preds = %for.body18
|
|
%9 = load i32, i32* %j, align 4
|
|
%inc20 = add nsw i32 %9, 1
|
|
store i32 %inc20, i32* %j, align 4
|
|
br label %for.cond16
|
|
|
|
for.end21: ; preds = %for.cond16
|
|
br label %for.inc22
|
|
|
|
for.inc22: ; preds = %for.end21
|
|
%10 = load i32, i32* %i, align 4
|
|
%inc23 = add nsw i32 %10, 1
|
|
store i32 %inc23, i32* %i, align 4
|
|
br label %for.cond13
|
|
|
|
for.end24: ; preds = %for.cond13
|
|
%11 = load i32, i32* @g, align 4
|
|
%call25 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond26
|
|
|
|
for.cond26: ; preds = %for.inc29, %for.end24
|
|
%12 = load i32, i32* %i, align 4
|
|
%cmp27 = icmp slt i32 %12, 1000000
|
|
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
|
|
|
|
; CHECK: - for.body28: float = 500000.5, int = 4000003
|
|
for.body28: ; preds = %for.cond26
|
|
call void @bar()
|
|
br label %for.inc29
|
|
|
|
for.inc29: ; preds = %for.body28
|
|
%13 = load i32, i32* %i, align 4
|
|
%inc30 = add nsw i32 %13, 1
|
|
store i32 %inc30, i32* %i, align 4
|
|
br label %for.cond26
|
|
|
|
for.end31: ; preds = %for.cond26
|
|
%14 = load i32, i32* @g, align 4
|
|
%call32 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
|
|
store i32 0, i32* @g, align 4
|
|
%15 = load i32, i32* %retval
|
|
ret i32 %15
|
|
}
|
|
|
|
!llvm.ident = !{!0}
|
|
|
|
!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
|
|
!1 = !{!"branch_weights", i32 101, i32 2}
|
|
!2 = !{!"branch_weights", i32 10001, i32 101}
|
|
!3 = !{!"branch_weights", i32 1000001, i32 10001}
|
|
!4 = !{!"branch_weights", i32 1000001, i32 101}
|
|
!5 = !{!"branch_weights", i32 1000001, i32 2}
|