mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
3919ffc012
BranchProbability now is represented by its numerator and denominator in uint32_t type. This patch changes this representation into a fixed point that is represented by the numerator in uint32_t type and a constant denominator 1<<31. This is quite similar to the representation of BlockMass in BlockFrequencyInfoImpl.h. There are several pros and cons of this change: Pros: 1. It uses only a half space of the current one. 2. Some operations are much faster like plus, subtraction, comparison, and scaling by an integer. Cons: 1. Constructing a probability using arbitrary numerator and denominator needs additional calculations. 2. It is a little less precise than before as we use a fixed denominator. For example, 1 - 1/3 may not be exactly identical to 1 / 3 (this will lead to many BranchProbability unit test failures). This should not matter when we only use it for branch probability. If we use it like a rational value for some precise calculations we may need another construct like ValueRatio. One important reason for this change is that we propose to store branch probabilities instead of edge weights in MachineBasicBlock. We also want clients to use probability instead of weight when adding successors to a MBB. The current BranchProbability has more space which may be a concern. Differential revision: http://reviews.llvm.org/D12603 llvm-svn: 248633
205 lines
6.3 KiB
LLVM
205 lines
6.3 KiB
LLVM
; RUN: opt < %s -analyze -block-freq | FileCheck %s
|
|
|
|
; This code contains three loops. One is triple-nested, the
|
|
; second is double nested and the third is a single loop. At
|
|
; runtime, all three loops execute 1,000,000 times each. We use to
|
|
; give different frequencies to each of the loops because loop
|
|
; scales were limited to no more than 4,096.
|
|
;
|
|
; This was penalizing the hotness of the second and third loops
|
|
; because BFI was reducing the loop scale for for.cond16 and
|
|
; for.cond26 to a max of 4,096.
|
|
;
|
|
; Without this restriction, all loops are now correctly given the same
|
|
; frequency values.
|
|
;
|
|
; Original C code:
|
|
;
|
|
;
|
|
; int g;
|
|
; __attribute__((noinline)) void bar() {
|
|
; g++;
|
|
; }
|
|
;
|
|
; extern int printf(const char*, ...);
|
|
;
|
|
; int main()
|
|
; {
|
|
; int i, j, k;
|
|
;
|
|
; g = 0;
|
|
; for (i = 0; i < 100; i++)
|
|
; for (j = 0; j < 100; j++)
|
|
; for (k = 0; k < 100; k++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
;
|
|
; for (i = 0; i < 100; i++)
|
|
; for (j = 0; j < 10000; j++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
;
|
|
;
|
|
; for (i = 0; i < 1000000; i++)
|
|
; bar();
|
|
;
|
|
; printf ("g = %d\n", g);
|
|
; g = 0;
|
|
; }
|
|
|
|
@g = common global i32 0, align 4
|
|
@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
|
|
|
|
declare void @bar()
|
|
declare i32 @printf(i8*, ...)
|
|
|
|
; CHECK: Printing analysis {{.*}} for function 'main':
|
|
; CHECK-NEXT: block-frequency-info: main
|
|
define i32 @main() {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
%j = alloca i32, align 4
|
|
%k = alloca i32, align 4
|
|
store i32 0, i32* %retval
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc10, %entry
|
|
%0 = load i32, i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end12, !prof !1
|
|
|
|
for.body: ; preds = %for.cond
|
|
store i32 0, i32* %j, align 4
|
|
br label %for.cond1
|
|
|
|
for.cond1: ; preds = %for.inc7, %for.body
|
|
%1 = load i32, i32* %j, align 4
|
|
%cmp2 = icmp slt i32 %1, 100
|
|
br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
|
|
|
|
for.body3: ; preds = %for.cond1
|
|
store i32 0, i32* %k, align 4
|
|
br label %for.cond4
|
|
|
|
for.cond4: ; preds = %for.inc, %for.body3
|
|
%2 = load i32, i32* %k, align 4
|
|
%cmp5 = icmp slt i32 %2, 100
|
|
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
|
|
|
|
; CHECK: - for.body6: float = 500000.5, int = 4000004
|
|
for.body6: ; preds = %for.cond4
|
|
call void @bar()
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body6
|
|
%3 = load i32, i32* %k, align 4
|
|
%inc = add nsw i32 %3, 1
|
|
store i32 %inc, i32* %k, align 4
|
|
br label %for.cond4
|
|
|
|
for.end: ; preds = %for.cond4
|
|
br label %for.inc7
|
|
|
|
for.inc7: ; preds = %for.end
|
|
%4 = load i32, i32* %j, align 4
|
|
%inc8 = add nsw i32 %4, 1
|
|
store i32 %inc8, i32* %j, align 4
|
|
br label %for.cond1
|
|
|
|
for.end9: ; preds = %for.cond1
|
|
br label %for.inc10
|
|
|
|
for.inc10: ; preds = %for.end9
|
|
%5 = load i32, i32* %i, align 4
|
|
%inc11 = add nsw i32 %5, 1
|
|
store i32 %inc11, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end12: ; preds = %for.cond
|
|
%6 = load i32, i32* @g, align 4
|
|
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond13
|
|
|
|
for.cond13: ; preds = %for.inc22, %for.end12
|
|
%7 = load i32, i32* %i, align 4
|
|
%cmp14 = icmp slt i32 %7, 100
|
|
br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
|
|
|
|
for.body15: ; preds = %for.cond13
|
|
store i32 0, i32* %j, align 4
|
|
br label %for.cond16
|
|
|
|
for.cond16: ; preds = %for.inc19, %for.body15
|
|
%8 = load i32, i32* %j, align 4
|
|
%cmp17 = icmp slt i32 %8, 10000
|
|
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
|
|
|
|
; CHECK: - for.body18: float = 499999.9, int = 3999998
|
|
for.body18: ; preds = %for.cond16
|
|
call void @bar()
|
|
br label %for.inc19
|
|
|
|
for.inc19: ; preds = %for.body18
|
|
%9 = load i32, i32* %j, align 4
|
|
%inc20 = add nsw i32 %9, 1
|
|
store i32 %inc20, i32* %j, align 4
|
|
br label %for.cond16
|
|
|
|
for.end21: ; preds = %for.cond16
|
|
br label %for.inc22
|
|
|
|
for.inc22: ; preds = %for.end21
|
|
%10 = load i32, i32* %i, align 4
|
|
%inc23 = add nsw i32 %10, 1
|
|
store i32 %inc23, i32* %i, align 4
|
|
br label %for.cond13
|
|
|
|
for.end24: ; preds = %for.cond13
|
|
%11 = load i32, i32* @g, align 4
|
|
%call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
|
|
store i32 0, i32* @g, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond26
|
|
|
|
for.cond26: ; preds = %for.inc29, %for.end24
|
|
%12 = load i32, i32* %i, align 4
|
|
%cmp27 = icmp slt i32 %12, 1000000
|
|
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
|
|
|
|
; CHECK: - for.body28: float = 499995.2, int = 3999961
|
|
for.body28: ; preds = %for.cond26
|
|
call void @bar()
|
|
br label %for.inc29
|
|
|
|
for.inc29: ; preds = %for.body28
|
|
%13 = load i32, i32* %i, align 4
|
|
%inc30 = add nsw i32 %13, 1
|
|
store i32 %inc30, i32* %i, align 4
|
|
br label %for.cond26
|
|
|
|
for.end31: ; preds = %for.cond26
|
|
%14 = load i32, i32* @g, align 4
|
|
%call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
|
|
store i32 0, i32* @g, align 4
|
|
%15 = load i32, i32* %retval
|
|
ret i32 %15
|
|
}
|
|
|
|
!llvm.ident = !{!0}
|
|
|
|
!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
|
|
!1 = !{!"branch_weights", i32 101, i32 2}
|
|
!2 = !{!"branch_weights", i32 10001, i32 101}
|
|
!3 = !{!"branch_weights", i32 1000001, i32 10001}
|
|
!4 = !{!"branch_weights", i32 1000001, i32 101}
|
|
!5 = !{!"branch_weights", i32 1000001, i32 2}
|