mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
865492560f
Current approach doesn't work well in cases when multiple paths are predicted to be "cold". By "cold" paths I mean those containing "unreachable" instruction, call marked with 'cold' attribute and 'unwind' handler of 'invoke' instruction. The issue is that heuristics are applied one by one until the first match and essentially ignores relative hotness/coldness of other paths. New approach unifies processing of "cold" paths by assigning predefined absolute weight to each block estimated to be "cold". Then we propagate these weights up/down IR similarly to existing approach. And finally set up edge probabilities based on estimated block weights. One important difference is how we propagate weight up. Existing approach propagates the same weight to all blocks that are post-dominated by a block with some "known" weight. This is useless at least because it always gives 50\50 distribution which is assumed by default anyway. Worse, it causes the algorithm to skip further heuristics and can miss setting more accurate probability. New algorithm propagates the weight up only to the blocks that dominates and post-dominated by a block with some "known" weight. In other words, those blocks that are either always executed or not executed together. In addition new approach processes loops in an uniform way as well. Essentially loop exit edges are estimated as "cold" paths relative to back edges and should be considered uniformly with other coldness/hotness markers. Reviewed By: yrouban Differential Revision: https://reviews.llvm.org/D79485
207 lines
4.7 KiB
LLVM
207 lines
4.7 KiB
LLVM
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
|
|
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7
|
|
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8
|
|
|
|
|
|
define i32 @f(i32 %a, i32 %b) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: f:
|
|
; CHECK: subs
|
|
; CHECK-NOT: cmp
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%sub = sub nsw i32 %a, %b
|
|
%sub. = select i1 %cmp, i32 %sub, i32 0
|
|
ret i32 %sub.
|
|
}
|
|
|
|
define i32 @g(i32 %a, i32 %b) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: g:
|
|
; CHECK: subs
|
|
; CHECK-NOT: cmp
|
|
%cmp = icmp slt i32 %a, %b
|
|
%sub = sub nsw i32 %b, %a
|
|
%sub. = select i1 %cmp, i32 %sub, i32 0
|
|
ret i32 %sub.
|
|
}
|
|
|
|
define i32 @h(i32 %a, i32 %b) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: h:
|
|
; CHECK: subs
|
|
; CHECK-NOT: cmp
|
|
%cmp = icmp sgt i32 %a, 3
|
|
%sub = sub nsw i32 %a, 3
|
|
%sub. = select i1 %cmp, i32 %sub, i32 %b
|
|
ret i32 %sub.
|
|
}
|
|
|
|
; rdar://11725965
|
|
define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
|
|
entry:
|
|
; CHECK-LABEL: i:
|
|
; CHECK: subs
|
|
; CHECK-NOT: cmp
|
|
%cmp = icmp ult i32 %a, %b
|
|
%sub = sub i32 %b, %a
|
|
%sub. = select i1 %cmp, i32 %sub, i32 0
|
|
ret i32 %sub.
|
|
}
|
|
; If CPSR is live-out, we can't remove cmp if there exists
|
|
; a swapped sub.
|
|
define i32 @j(i32 %a, i32 %b) nounwind {
|
|
entry:
|
|
; CHECK-LABEL: j:
|
|
; CHECK: sub
|
|
; CHECK: cmp
|
|
%cmp = icmp eq i32 %b, %a
|
|
%sub = sub nsw i32 %a, %b
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then:
|
|
%cmp2 = icmp sgt i32 %b, %a
|
|
%sel = select i1 %cmp2, i32 %sub, i32 %a
|
|
ret i32 %sel
|
|
|
|
if.else:
|
|
ret i32 %sub
|
|
}
|
|
|
|
; If the sub/rsb instruction is predicated, we can't use the flags.
|
|
; <rdar://problem/12263428>
|
|
; Test case from MultiSource/Benchmarks/Ptrdist/bc/number.s
|
|
; CHECK: bc_raise
|
|
; CHECK: rsbeq
|
|
; CHECK: cmp
|
|
define i32 @bc_raise(i1 %cond) nounwind ssp {
|
|
entry:
|
|
%val.2.i = select i1 %cond, i32 0, i32 1
|
|
%sub.i = sub nsw i32 0, %val.2.i
|
|
%retval.0.i = select i1 %cond, i32 %val.2.i, i32 %sub.i
|
|
%cmp1 = icmp eq i32 %retval.0.i, 0
|
|
br i1 %cmp1, label %land.lhs.true, label %if.end11
|
|
|
|
land.lhs.true: ; preds = %num2long.exit
|
|
ret i32 17
|
|
|
|
if.end11: ; preds = %num2long.exit
|
|
ret i32 23
|
|
}
|
|
|
|
; When considering the producer of cmp's src as the subsuming instruction,
|
|
; only consider that when the comparison is to 0.
|
|
define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) {
|
|
entry:
|
|
; CHECK-LABEL: cmp_src_nonzero:
|
|
; CHECK: sub
|
|
; CHECK: cmp
|
|
%sub = sub i32 %a, %b
|
|
%cmp = icmp eq i32 %sub, 17
|
|
%ret = select i1 %cmp, i32 %x, i32 %y
|
|
ret i32 %ret
|
|
}
|
|
|
|
define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
|
|
entry:
|
|
; CHECK-LABEL: float_sel:
|
|
; CHECK-NOT: cmp
|
|
; V8-LABEL: float_sel:
|
|
; V8-NOT: cmp
|
|
; V8: vseleq.f32
|
|
%sub = sub i32 %a, %b
|
|
%cmp = icmp eq i32 %sub, 0
|
|
%ret = select i1 %cmp, float %x, float %y
|
|
ret float %ret
|
|
}
|
|
|
|
define double @double_sel(i32 %a, i32 %b, double %x, double %y) {
|
|
entry:
|
|
; CHECK-LABEL: double_sel:
|
|
; CHECK-NOT: cmp
|
|
; V8-LABEL: double_sel:
|
|
; V8-NOT: cmp
|
|
; V8: vseleq.f64
|
|
%sub = sub i32 %a, %b
|
|
%cmp = icmp eq i32 %sub, 0
|
|
%ret = select i1 %cmp, double %x, double %y
|
|
ret double %ret
|
|
}
|
|
|
|
@t = common global i32 0
|
|
define double @double_sub(i32 %a, i32 %b, double %x, double %y) {
|
|
entry:
|
|
; CHECK-LABEL: double_sub:
|
|
; CHECK: subs
|
|
; CHECK-NOT: cmp
|
|
; V8-LABEL: double_sub:
|
|
; V8: vsel
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%sub = sub i32 %a, %b
|
|
store i32 %sub, i32* @t
|
|
%ret = select i1 %cmp, double %x, double %y
|
|
ret double %ret
|
|
}
|
|
|
|
define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) {
|
|
entry:
|
|
; V7-LABEL: double_sub_swap:
|
|
; V7-NOT: cmp
|
|
; V7: subs
|
|
; V8-LABEL: double_sub_swap:
|
|
; V8-NOT: subs
|
|
; V8: cmp
|
|
; V8: vsel
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%sub = sub i32 %b, %a
|
|
%ret = select i1 %cmp, double %x, double %y
|
|
store i32 %sub, i32* @t
|
|
ret double %ret
|
|
}
|
|
|
|
declare void @abort()
|
|
declare void @exit(i32)
|
|
|
|
; If the comparison uses the V bit (signed overflow/underflow), we can't
|
|
; omit the comparison.
|
|
define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) {
|
|
entry:
|
|
; CHECK-LABEL: cmp_slt0
|
|
; CHECK: sub
|
|
; CHECK: cmn
|
|
; CHECK: ble
|
|
%load = load i32, i32* @t, align 4
|
|
%sub = sub i32 %load, 17
|
|
%cmp = icmp slt i32 %sub, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then:
|
|
call void @abort()
|
|
unreachable
|
|
|
|
if.else:
|
|
call void @exit(i32 0)
|
|
unreachable
|
|
}
|
|
|
|
; Same for the C bit. (Note the ult X, 0 is trivially
|
|
; false, so the DAG combiner may or may not optimize it).
|
|
define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) {
|
|
entry:
|
|
; CHECK-LABEL: cmp_ult0
|
|
; CHECK: sub
|
|
; CHECK: cmp
|
|
; CHECK: bhs
|
|
%load = load i32, i32* @t, align 4
|
|
%sub = sub i32 %load, 17
|
|
%cmp = icmp ult i32 %sub, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then:
|
|
call void @abort()
|
|
unreachable
|
|
|
|
if.else:
|
|
call void @exit(i32 0)
|
|
unreachable
|
|
}
|