mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
e90406ee2a
Summary: The old pass manager separated speed optimization and size optimization levels into two unsigned values. Coallescing both in an enum in the new pass manager may lead to unintentional casts and comparisons. In particular, taking a look at how the loop unroll passes were constructed previously, the Os/Oz are now (==new pass manager) treated just like O3, likely unintentionally. This change disallows raw comparisons between optimization levels, to avoid such unintended effects. As an effect, the O{s|z} behavior changes for loop unrolling and loop unroll and jam, matching O2 rather than O3. The change also parameterizes the threshold values used for loop unrolling, primarily to aid testing. Reviewers: tejohnson, davidxl Reviewed By: tejohnson Subscribers: zzheng, ychen, mehdi_amini, hiraditya, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72547
62 lines
2.3 KiB
LLVM
62 lines
2.3 KiB
LLVM
; RUN: opt < %s -S -passes="default<O2>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
|
|
; RUN: opt < %s -S -passes="default<O3>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
|
|
; RUN: opt < %s -S -passes="default<Os>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
|
|
; RUN: opt < %s -S -passes="default<Oz>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
|
|
|
|
; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
|
|
; the behavior, we artificially disable unrolling for anything but O3 by setting
|
|
; the default threshold to 0.
|
|
|
|
; O3: for.inner.1
|
|
; O2-NOT: for.inner.1
|
|
; Os-NOT: for.inner.1
|
|
; Oz-NOT: for.inner.1
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
|
|
define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
|
|
entry:
|
|
%cmp = icmp ne i32 %J, 0
|
|
%cmpJ = icmp ne i32 %I, 0
|
|
%or.cond = and i1 %cmp, %cmpJ
|
|
br i1 %or.cond, label %for.outer.preheader, label %for.end
|
|
|
|
for.outer.preheader:
|
|
br label %for.outer
|
|
|
|
for.outer:
|
|
%i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
|
|
br label %for.inner
|
|
|
|
for.inner:
|
|
%j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
|
|
%sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
|
|
%0 = load i32, i32* %arrayidx, align 4, !tbaa !5
|
|
%add = add i32 %0, %sum
|
|
%inc = add nuw i32 %j, 1
|
|
%exitcond = icmp eq i32 %inc, %J
|
|
br i1 %exitcond, label %for.latch, label %for.inner
|
|
|
|
for.latch:
|
|
%add.lcssa = phi i32 [ %add, %for.inner ]
|
|
%arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
|
|
store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
|
|
%add8 = add nuw i32 %i, 1
|
|
%exitcond25 = icmp eq i32 %add8, %I
|
|
br i1 %exitcond25, label %for.end.loopexit, label %for.outer
|
|
|
|
for.end.loopexit:
|
|
br label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
!5 = !{!6, !6, i64 0}
|
|
!6 = !{!"int", !7, i64 0}
|
|
!7 = !{!"omnipotent char", !8, i64 0}
|
|
!8 = !{!"Simple C/C++ TBAA"}
|