1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-26 14:33:02 +02:00
llvm-mirror/test/CodeGen/X86/fp-fast.ll
Sanjay Patel 6b15a1a605 [x86] Add a reassociation optimization to increase ILP via the MachineCombiner pass
This is a reimplementation of D9780 at the machine instruction level rather than the DAG.

Use the MachineCombiner pass to reassociate scalar single-precision AVX additions (just a
starting point; see the TODO comments) to increase ILP when it's safe to do so.

The code is closely based on the existing MachineCombiner optimization that is implemented
for AArch64.

This patch should not cause the kind of spilling tragedy that led to the reversion of r236031.

Differential Revision: http://reviews.llvm.org/D10321

llvm-svn: 239486
2015-06-10 20:32:21 +00:00

195 lines
5.1 KiB
LLVM

; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
define float @test1(float %a) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%r = fadd float %t1, %t1
ret float %r
}
define float @test2(float %a) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fmul float 4.0, %a
%t2 = fadd float %a, %a
%r = fadd float %t1, %t2
ret float %r
}
define float @test3(float %a) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fmul float %a, 4.0
%t2 = fadd float %a, %a
%r = fadd float %t1, %t2
ret float %r
}
define float @test4(float %a) {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%t2 = fmul float 4.0, %a
%r = fadd float %t1, %t2
ret float %r
}
define float @test5(float %a) {
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%t2 = fmul float %a, 4.0
%r = fadd float %t1, %t2
ret float %r
}
define float @test6(float %a) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fmul float 2.0, %a
%t2 = fadd float %a, %a
%r = fsub float %t1, %t2
ret float %r
}
define float @test7(float %a) {
; CHECK-LABEL: test7:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fmul float %a, 2.0
%t2 = fadd float %a, %a
%r = fsub float %t1, %t2
ret float %r
}
define float @test8(float %a) {
; CHECK-LABEL: test8:
; CHECK: # BB#0:
; CHECK-NEXT: retq
%t1 = fmul float %a, 0.0
%t2 = fadd float %a, %t1
ret float %t2
}
define float @test9(float %a) {
; CHECK-LABEL: test9:
; CHECK: # BB#0:
; CHECK-NEXT: retq
%t1 = fmul float 0.0, %a
%t2 = fadd float %t1, %a
ret float %t2
}
define float @test10(float %a) {
; CHECK-LABEL: test10:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fsub float -0.0, %a
%t2 = fadd float %a, %t1
ret float %t2
}
define float @test11(float %a) {
; CHECK-LABEL: test11:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%t1 = fsub float -0.0, %a
%t2 = fadd float %a, %t1
ret float %t2
}
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds1:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
ret float %t2
}
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds2:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
ret float %t2
}
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds3:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
ret float %t2
}
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds4:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
; produced because that would cost more compile time.
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
; CHECK-LABEL: reassociate_adds5:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm7, %xmm6, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
%t3 = fadd float %t2, %x4
%t4 = fadd float %t3, %x5
%t5 = fadd float %t4, %x6
%t6 = fadd float %t5, %x7
ret float %t6
}