mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
8024703a16
SSE transition penalty. The pass is enabled through the "x86-use-vzeroupper" llc command line option. This is only the first step (very naive and conservative one) to sketch out the idea, but proper DFA is coming next to allow smarter decisions. Comments and ideas now and in further commits will be very appreciated. llvm-svn: 138317
27 lines
882 B
LLVM
27 lines
882 B
LLVM
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
|
|
|
define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%add.i = fadd <4 x float> %a, %a
|
|
ret <4 x float> %add.i
|
|
}
|
|
|
|
; CHECK: _test00
|
|
define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
|
|
entry:
|
|
%add.i = fadd <4 x float> %a, %b
|
|
; CHECK: vzeroupper
|
|
; CHECK-NEXT: callq _do_sse
|
|
%call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
|
|
%sub.i = fsub <4 x float> %call3, %add.i
|
|
; CHECK-NOT: vzeroupper
|
|
; CHECK: callq _do_sse_local
|
|
%call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
|
|
; CHECK: vzeroupper
|
|
; CHECK-NEXT: jmp _do_sse
|
|
%call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
|
|
ret <4 x float> %call10
|
|
}
|
|
|
|
declare <4 x float> @do_sse(<4 x float>)
|