llvm-mirror/test/CodeGen/X86/sse_reload_fold.ll

; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
; CHECK: fail
; CHECK-NOT: fail

declare float @test_f(float %f)
declare double @test_d(double %f)
declare <4 x float> @test_vf(<4 x float> %f)
declare <2 x double> @test_vd(<2 x double> %f)
declare float @llvm.sqrt.f32(float)
declare double @llvm.sqrt.f64(double)

declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)

define float @foo(float %f) {
  %a = call float @test_f(float %f)
  %t = call float @llvm.sqrt.f32(float %f)
  ret float %t
}
define double @doo(double %f) {
  %a = call double @test_d(double %f)
  %t = call double @llvm.sqrt.f64(double %f)
  ret double %t
}
define <4 x float> @a0(<4 x float> %f) {
  %a = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @a1(<4 x float> %f) {
  %a = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @a2(<4 x float> %f) {
  %a = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @b3(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @b4(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @b5(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
  ret <4 x float> %t
}
define <4 x float> @b6(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @b7(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
  ret <4 x float> %t
}
define <4 x float> @b8(<4 x float> %f) {
  %y = call <4 x float> @test_vf(<4 x float> %f)
  %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
  ret <4 x float> %t
}
define <2 x double> @c1(<2 x double> %f) {
  %a = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
  ret <2 x double> %t
}
define <2 x double> @d3(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
  ret <2 x double> %t
}
define <2 x double> @d4(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
  ret <2 x double> %t
}
define <2 x double> @d5(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
  ret <2 x double> %t
}
define <2 x double> @d6(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
  ret <2 x double> %t
}
define <2 x double> @d7(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
  ret <2 x double> %t
}
define <2 x double> @d8(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
  ret <2 x double> %t
}

; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
; it produces:
;   callq	test_vd
;   movapd	(%rsp), %xmm1           # 16-byte Reload
;   hsubpd	%xmm0, %xmm1
;   movapd	%xmm1, %xmm0
;   addq	$24, %rsp
;   ret
; RABasic still tries to fold this one.

define <2 x double> @z0(<2 x double> %f) {
  %y = call <2 x double> @test_vd(<2 x double> %f)
  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
  ret <2 x double> %t
}
Simplify local live range splitting's safeguard to fix PR10070. When local live range splitting creates a live range with the same number of instructions as the old range, mark it as RS_Local. When such a range is seen again, require that it be split in a way that reduces the number of instructions. That guarantees we are making progress while still being able to perform 3 -> 2+3 splits as required by PR10070. This also means that the PrevSlot map is no longer needed. This was also used to estimate new spill weights, but that is no longer necessary after slotIndexes::insertMachineInstrInMaps() got the extra Late insertion argument. llvm-svn: 132697 2011-06-07 01:55:20 +02:00			`; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic \|& FileCheck %s`
test/CodeGen/X86: FileCheck-ize and add explicit -mtriple=x86_64-linux. They are useless to Win64 target. llvm-svn: 127732 2011-03-16 14:52:38 +01:00			`; CHECK: fail`
			`; CHECK-NOT: fail`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`declare float @test_f(float %f)`
			`declare double @test_d(double %f)`
			`declare <4 x float> @test_vf(<4 x float> %f)`
			`declare <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`declare float @llvm.sqrt.f32(float)`
			`declare double @llvm.sqrt.f64(double)`

			`declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)`
			`declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)`
			`declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)`
			`declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)`
			`declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)`
			`declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)`
			`declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)`
			`declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)`
			`declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)`
			`declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)`
			`declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)`
			`declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)`
			`declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)`
			`declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)`
			`declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)`
			`declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)`

			`define float @foo(float %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call float @test_f(float %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call float @llvm.sqrt.f32(float %f)`
			`ret float %t`
			`}`
			`define double @doo(double %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call double @test_d(double %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call double @llvm.sqrt.f64(double %f)`
			`ret double %t`
			`}`
			`define <4 x float> @a0(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @a1(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @a2(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b3(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b4(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b5(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b6(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b7(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <4 x float> @b8(<4 x float> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <4 x float> @test_vf(<4 x float> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)`
			`ret <4 x float> %t`
			`}`
			`define <2 x double> @c1(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%a = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d3(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d4(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d5(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d6(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d7(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)`
			`ret <2 x double> %t`
			`}`
			`define <2 x double> @d8(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)`
			`ret <2 x double> %t`
			`}`

Simplify local live range splitting's safeguard to fix PR10070. When local live range splitting creates a live range with the same number of instructions as the old range, mark it as RS_Local. When such a range is seen again, require that it be split in a way that reduces the number of instructions. That guarantees we are making progress while still being able to perform 3 -> 2+3 splits as required by PR10070. This also means that the PrevSlot map is no longer needed. This was also used to estimate new spill weights, but that is no longer necessary after slotIndexes::insertMachineInstrInMaps() got the extra Late insertion argument. llvm-svn: 132697 2011-06-07 01:55:20 +02:00			`; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead`
			`; it produces:`
			`; callq test_vd`
			`; movapd (%rsp), %xmm1 # 16-byte Reload`
			`; hsubpd %xmm0, %xmm1`
			`; movapd %xmm1, %xmm0`
			`; addq $24, %rsp`
			`; ret`
			`; RABasic still tries to fold this one.`

Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`define <2 x double> @z0(<2 x double> %f) {`
Don't use special heuristics for nodes with no data predecessors unless they actually have data successors, and likewise for nodes with no data successors unless they actually have data precessors. llvm-svn: 64327 2009-02-11 22:29:39 +01:00			`%y = call <2 x double> @test_vd(<2 x double> %f)`
Fix several more entries in the x86 reload/remat folding tables. llvm-svn: 42162 2007-09-20 16:17:21 +02:00			`%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)`
			`ret <2 x double> %t`
			`}`