1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-31 16:02:52 +01:00
llvm-mirror/test/CodeGen/X86/optimize-max-3.ll
Andrew Trick d09b64fc25 Instruction scheduling itinerary for Intel Atom.
Adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT.

Sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches.

Adds a test to verify that the scheduler is working.

Also changes the scheduling preference to "Hybrid" for i386 Atom, while leaving x86_64 as ILP.

Patch by Preston Gurd!

llvm-svn: 149558
2012-02-01 23:20:51 +00:00

77 lines
3.2 KiB
LLVM

; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
; LSR's OptimizeMax should eliminate the select (max).
; CHECK: foo:
; CHECK-NOT: cmov
; CHECK: jle
define void @foo(i64 %n, double* nocapture %p) nounwind {
entry:
%cmp6 = icmp slt i64 %n, 0 ; <i1> [#uses=1]
br i1 %cmp6, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %entry
%tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
%n.op = add i64 %n, 1 ; <i64> [#uses=1]
%tmp1 = select i1 %tmp, i64 %n.op, i64 1 ; <i64> [#uses=1]
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
%arrayidx = getelementptr double* %p, i64 %i ; <double*> [#uses=2]
%t4 = load double* %arrayidx ; <double> [#uses=1]
%mul = fmul double %t4, 2.200000e+00 ; <double> [#uses=1]
store double %mul, double* %arrayidx
%i.next = add nsw i64 %i, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %i.next, %tmp1 ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; In this case, one of the max operands is another max, which folds,
; leaving a two-operand max which doesn't fit the usual pattern.
; OptimizeMax should handle this case.
; PR7454
; CHECK: _Z18GenerateStatusPagei:
; CHECK: jle
; CHECK-NOT: cmov
; CHECK: xorl {{%edi, %edi|%ecx, %ecx|%eax, %eax}}
; CHECK-NEXT: align
; CHECK-NEXT: BB1_2:
; CHECK: callq
; CHECK-NEXT: incl [[BX:%[a-z0-9]+]]
; CHECK-NEXT: cmpl [[R14:%[a-z0-9]+]], [[BX]]
; CHECK: jl
define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
entry:
%cmp.i = icmp sgt i32 %jobs_to_display, 0 ; <i1> [#uses=1]
%tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3]
%cmp8 = icmp sgt i32 %tmp, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %bb.nph, label %for.end
bb.nph: ; preds = %entry
%tmp11 = icmp sgt i32 %tmp, 1 ; <i1> [#uses=1]
%smax = select i1 %tmp11, i32 %tmp, i32 1 ; <i32> [#uses=1]
br label %for.body
for.body: ; preds = %for.body, %bb.nph
%i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1]
%it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1]
%call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1]
%inc = add nsw i32 %i.010, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %inc, %smax ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*)