mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-01 16:33:37 +01:00
acaf9e9ce1
RAGreedy::tryAssign will now evict interference from the preferred register even when another register is free. To support this, add the EvictionCost struct that counts how many hints are broken by an eviction. We don't want to break one hint just to satisfy another. Rename canEvict to shouldEvict, and add the first bit of eviction policy that doesn't depend on spill weights: Always make room in the preferred register as long as the evictees can be split and aren't already assigned to their preferred register. Also make the CSR avoidance more accurate. When looking for a cheaper register it is OK to use a new volatile register. Only CSR aliases that have never been used before should be avoided. llvm-svn: 134735
63 lines
2.6 KiB
LLVM
63 lines
2.6 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
|
|
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
|
|
|
|
; Full strength reduction wouldn't reduce register pressure, so LSR should
|
|
; stick with indexing here.
|
|
|
|
; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
|
|
; CHECK: cvtdq2ps
|
|
; CHECK: orps {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
|
|
; CHECK: movaps [[X4]], (%{{rdi|rcx}},%rax,4)
|
|
; CHECK: addq $4, %rax
|
|
; CHECK: cmpl %eax, (%{{rdx|r8}})
|
|
; CHECK-NEXT: jg
|
|
|
|
define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
|
|
entry:
|
|
%0 = load i32* %n, align 4
|
|
%1 = icmp sgt i32 %0, 0
|
|
br i1 %1, label %bb, label %return
|
|
|
|
bb:
|
|
%indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
|
|
%tmp = shl i64 %indvar, 2
|
|
%scevgep = getelementptr float* %y, i64 %tmp
|
|
%scevgep9 = bitcast float* %scevgep to <4 x float>*
|
|
%scevgep10 = getelementptr float* %x, i64 %tmp
|
|
%scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
|
|
%2 = load <4 x float>* %scevgep1011, align 16
|
|
%3 = bitcast <4 x float> %2 to <4 x i32>
|
|
%4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
|
|
%5 = bitcast <4 x i32> %4 to <4 x float>
|
|
%6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
|
%7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
|
|
%tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
|
|
%8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
|
|
%10 = or <4 x i32> %9, %6
|
|
%11 = bitcast <4 x i32> %10 to <4 x float>
|
|
%12 = fadd <4 x float> %2, %11
|
|
%13 = fsub <4 x float> %12, %11
|
|
%14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
|
|
%15 = bitcast <4 x float> %14 to <4 x i32>
|
|
%16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
|
|
%17 = fadd <4 x float> %13, %16
|
|
%tmp.i = bitcast <4 x float> %17 to <4 x i32>
|
|
%18 = or <4 x i32> %tmp.i, %6
|
|
%19 = bitcast <4 x i32> %18 to <4 x float>
|
|
store <4 x float> %19, <4 x float>* %scevgep9, align 16
|
|
%tmp12 = add i64 %tmp, 4
|
|
%tmp13 = trunc i64 %tmp12 to i32
|
|
%20 = load i32* %n, align 4
|
|
%21 = icmp sgt i32 %20, %tmp13
|
|
%indvar.next = add i64 %indvar, 1
|
|
br i1 %21, label %bb, label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
|