mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 13:33:37 +02:00
6e6c58680c
Summary: The current loop complete unroll algorithm checks if unrolling complete will reduce the runtime by a certain percentage. If yes, it will apply a fixed boosting factor to the threshold (by discounting cost). The problem for this approach is that the threshold abruptly. This patch makes the boosting factor a function of runtime reduction percentage, capped by a fixed threshold. In this way, the threshold changes continuously. The patch also simplified the code by reducing one parameter in UP. The patch only affects code-gen of two speccpu2006 benchmark: 445.gobmk binary size decreases 0.08%, no performance change. 464.h264ref binary size increases 0.24%, no performance change. Reviewers: mzolotukhin, chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D26989 llvm-svn: 290737
24 lines
667 B
LLVM
24 lines
667 B
LLVM
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
|
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
define i64 @propagate_loop_phis() {
|
|
; CHECK-LABEL: @propagate_loop_phis(
|
|
; CHECK-NOT: br i1
|
|
; CHECK: ret i64 3
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
|
|
%x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]
|
|
%x1 = or i64 %x0, 1
|
|
%x2 = or i64 %x1, 2
|
|
%inc = add nuw nsw i64 %iv, 1
|
|
%cond = icmp sge i64 %inc, 10
|
|
br i1 %cond, label %loop.end, label %loop
|
|
|
|
loop.end:
|
|
%x.lcssa = phi i64 [ %x2, %loop ]
|
|
ret i64 %x.lcssa
|
|
}
|