mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-01 00:12:50 +01:00
cf00a6d764
When local live range splitting creates a live range with the same number of instructions as the old range, mark it as RS_Local. When such a range is seen again, require that it be split in a way that reduces the number of instructions. That guarantees we are making progress while still being able to perform 3 -> 2+3 splits as required by PR10070. This also means that the PrevSlot map is no longer needed. This was also used to estimate new spill weights, but that is no longer necessary after slotIndexes::insertMachineInstrInMaps() got the extra Late insertion argument. llvm-svn: 132697
135 lines
5.1 KiB
LLVM
135 lines
5.1 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
|
|
; CHECK: fail
|
|
; CHECK-NOT: fail
|
|
|
|
declare float @test_f(float %f)
|
|
declare double @test_d(double %f)
|
|
declare <4 x float> @test_vf(<4 x float> %f)
|
|
declare <2 x double> @test_vd(<2 x double> %f)
|
|
declare float @llvm.sqrt.f32(float)
|
|
declare double @llvm.sqrt.f64(double)
|
|
|
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
|
|
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
|
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
|
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
|
|
declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
|
|
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
|
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
|
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
|
|
declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
|
|
declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
|
|
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
|
|
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
|
|
|
|
define float @foo(float %f) {
|
|
%a = call float @test_f(float %f)
|
|
%t = call float @llvm.sqrt.f32(float %f)
|
|
ret float %t
|
|
}
|
|
define double @doo(double %f) {
|
|
%a = call double @test_d(double %f)
|
|
%t = call double @llvm.sqrt.f64(double %f)
|
|
ret double %t
|
|
}
|
|
define <4 x float> @a0(<4 x float> %f) {
|
|
%a = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @a1(<4 x float> %f) {
|
|
%a = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @a2(<4 x float> %f) {
|
|
%a = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b3(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b4(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b5(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b6(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b7(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <4 x float> @b8(<4 x float> %f) {
|
|
%y = call <4 x float> @test_vf(<4 x float> %f)
|
|
%t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
|
|
ret <4 x float> %t
|
|
}
|
|
define <2 x double> @c1(<2 x double> %f) {
|
|
%a = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d3(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d4(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d5(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d6(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d7(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
define <2 x double> @d8(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
|
|
ret <2 x double> %t
|
|
}
|
|
|
|
; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
|
|
; it produces:
|
|
; callq test_vd
|
|
; movapd (%rsp), %xmm1 # 16-byte Reload
|
|
; hsubpd %xmm0, %xmm1
|
|
; movapd %xmm1, %xmm0
|
|
; addq $24, %rsp
|
|
; ret
|
|
; RABasic still tries to fold this one.
|
|
|
|
define <2 x double> @z0(<2 x double> %f) {
|
|
%y = call <2 x double> @test_vd(<2 x double> %f)
|
|
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
|
|
ret <2 x double> %t
|
|
}
|