1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/pr31088.ll
Craig Topper 0524badb10 [X86] Custom lower ISD::FP16_TO_FP and ISD::FP_TO_FP16 on f16c targets instead of using isel patterns.
We need to use vector instructions for these operations. Previously
we handled this with isel patterns that used extra instructions
and copies to handle the the conversions.

Now we use custom lowering to emit the conversions. This allows
them to be pattern matched and optimized on their own. For
example we can now emit vpextrw to store the result if its going
directly to memory.

I've forced the upper elements to VCVTPHS2PS to zero to keep some
code similar. Zeroes will be needed for strictfp. I've added a
DAG combine for (fp16_to_fp (fp_to_fp16 X)) to avoid extra
instructions in between to be closer to the previous codegen.

This is a step towards strictfp support for f16 conversions.
2020-02-10 22:01:48 -08:00

177 lines
6.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
; X86-LABEL: ir_fadd_v1f16:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: movl %esi, (%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movss %xmm0, (%esp)
; X86-NEXT: calll __gnu_f2h_ieee
; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: ir_fadd_v1f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: subq $16, %rsp
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: movzwl %si, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movzwl %bx, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; X64-NEXT: callq __gnu_f2h_ieee
; X64-NEXT: addq $16, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: retq
;
; F16C-LABEL: ir_fadd_v1f16:
; F16C: # %bb.0:
; F16C-NEXT: movzwl %si, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movzwl %di, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: # kill: def $ax killed $ax killed $eax
; F16C-NEXT: retq
%retval = fadd <1 x half> %arg0, %arg1
ret <1 x half> %retval
}
define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
; X86-LABEL: ir_fadd_v2f16:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $80, %esp
; X86-NEXT: movzwl 8(%ebp), %esi
; X86-NEXT: movzwl 12(%ebp), %edi
; X86-NEXT: movzwl 20(%ebp), %ebx
; X86-NEXT: movzwl 16(%ebp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-NEXT: movl %ebx, (%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-NEXT: movl %edi, (%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: movl %esi, (%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: calll __gnu_h2f_ieee
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movss %xmm0, (%esp)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: calll __gnu_f2h_ieee
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movss %xmm0, (%esp)
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: calll __gnu_f2h_ieee
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: pextrw $1, %xmm0, %edx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: # kill: def $dx killed $dx killed $edx
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: ir_fadd_v2f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %rbx
; X64-NEXT: subq $32, %rsp
; X64-NEXT: movl %edx, %ebx
; X64-NEXT: movl %esi, %ebp
; X64-NEXT: movl %edi, %r14d
; X64-NEXT: movzwl %cx, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movzwl %bp, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; X64-NEXT: callq __gnu_f2h_ieee
; X64-NEXT: movw %ax, {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl %bx, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movzwl %r14w, %edi
; X64-NEXT: callq __gnu_h2f_ieee
; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; X64-NEXT: callq __gnu_f2h_ieee
; X64-NEXT: movw %ax, {{[0-9]+}}(%rsp)
; X64-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: pextrw $1, %xmm0, %edx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: # kill: def $dx killed $dx killed $edx
; X64-NEXT: addq $32, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r14
; X64-NEXT: popq %rbp
; X64-NEXT: retq
;
; F16C-LABEL: ir_fadd_v2f16:
; F16C: # %bb.0:
; F16C-NEXT: movzwl %cx, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movzwl %si, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
; F16C-NEXT: movzwl %dx, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movzwl %di, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
; F16C-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: vpextrw $1, %xmm0, %edx
; F16C-NEXT: # kill: def $ax killed $ax killed $eax
; F16C-NEXT: # kill: def $dx killed $dx killed $edx
; F16C-NEXT: retq
%retval = fadd <2 x half> %arg0, %arg1
ret <2 x half> %retval
}