mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
7065f0a696
loadRegFromStackSlot()/storeRegToStackSlot() can generate aligned access instructions for stack slots even if the stack is unaligned, based on the assumption that the stack can be realigned. However, this doesn't work for fixed slots, which are e.g. used for spilling XMM registers in a non-leaf function with `__attribute__((preserve_all))`. When compiling such code with `-mstack-alignment=8`, this causes general protection faults. Fix it by only considering stack realignment for non-fixed slots. Note that this changes the output of three existing tests which spill AVX registers, since AVX requires higher alignment than the ABI provides on stack frame entry. Reviewed By: rnk, jyknight Differential Revision: https://reviews.llvm.org/D73126
521 lines
24 KiB
LLVM
521 lines
24 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32
|
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32
|
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32
|
|
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
|
|
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
|
|
|
|
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
|
|
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
|
|
declare i32 @func_int(i32, i32)
|
|
|
|
;test calling conventions - input parameters
|
|
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: testf16_inp:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-64, %esp
|
|
; X32-NEXT: subl $192, %esp
|
|
; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl %eax, (%esp)
|
|
; X32-NEXT: calll _func_float16_ptr
|
|
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_inp:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: pushl %ebp
|
|
; WIN32-NEXT: movl %esp, %ebp
|
|
; WIN32-NEXT: andl $-64, %esp
|
|
; WIN32-NEXT: subl $128, %esp
|
|
; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %esp, %eax
|
|
; WIN32-NEXT: pushl %eax
|
|
; WIN32-NEXT: calll _func_float16_ptr
|
|
; WIN32-NEXT: addl $4, %esp
|
|
; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %ebp, %esp
|
|
; WIN32-NEXT: popl %ebp
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf16_inp:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: subq $176, %rsp
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-NEXT: andq $-64, %rsp
|
|
; WIN64-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-NEXT: vaddps (%rdx), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-NEXT: callq func_float16_ptr
|
|
; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq 48(%rbp), %rsp
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: testf16_inp:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rbp
|
|
; X64-NEXT: movq %rsp, %rbp
|
|
; X64-NEXT: pushq %r13
|
|
; X64-NEXT: pushq %r12
|
|
; X64-NEXT: andq $-64, %rsp
|
|
; X64-NEXT: subq $128, %rsp
|
|
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X64-NEXT: movq %rsp, %rdi
|
|
; X64-NEXT: callq _func_float16_ptr
|
|
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
|
|
; X64-NEXT: leaq -16(%rbp), %rsp
|
|
; X64-NEXT: popq %r12
|
|
; X64-NEXT: popq %r13
|
|
; X64-NEXT: popq %rbp
|
|
; X64-NEXT: retq
|
|
%y = alloca <16 x float>, align 64
|
|
%x = fadd <16 x float> %a, %b
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
|
%2 = load <16 x float>, <16 x float>* %y, align 16
|
|
%3 = fadd <16 x float> %2, %1
|
|
ret <16 x float> %3
|
|
}
|
|
|
|
;test calling conventions - preserved registers
|
|
|
|
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: testf16_regs:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-64, %esp
|
|
; X32-NEXT: subl $256, %esp ## imm = 0x100
|
|
; X32-NEXT: vmovaps %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
|
|
; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl %eax, (%esp)
|
|
; X32-NEXT: calll _func_float16_ptr
|
|
; X32-NEXT: vaddps {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 ## 64-byte Folded Reload
|
|
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_regs:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: pushl %ebp
|
|
; WIN32-NEXT: movl %esp, %ebp
|
|
; WIN32-NEXT: andl $-64, %esp
|
|
; WIN32-NEXT: subl $192, %esp
|
|
; WIN32-NEXT: vmovaps %zmm1, (%esp) # 64-byte Spill
|
|
; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; WIN32-NEXT: pushl %eax
|
|
; WIN32-NEXT: calll _func_float16_ptr
|
|
; WIN32-NEXT: addl $4, %esp
|
|
; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0 # 64-byte Folded Reload
|
|
; WIN32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %ebp, %esp
|
|
; WIN32-NEXT: popl %ebp
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf16_regs:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: subq $176, %rsp
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-NEXT: andq $-64, %rsp
|
|
; WIN64-NEXT: vmovaps (%rdx), %zmm16
|
|
; WIN64-NEXT: vaddps (%rcx), %zmm16, %zmm0
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-NEXT: callq func_float16_ptr
|
|
; WIN64-NEXT: vaddps %zmm16, %zmm0, %zmm0
|
|
; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq 48(%rbp), %rsp
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: testf16_regs:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rbp
|
|
; X64-NEXT: movq %rsp, %rbp
|
|
; X64-NEXT: pushq %r13
|
|
; X64-NEXT: pushq %r12
|
|
; X64-NEXT: andq $-64, %rsp
|
|
; X64-NEXT: subq $128, %rsp
|
|
; X64-NEXT: vmovaps %zmm1, %zmm16
|
|
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X64-NEXT: movq %rsp, %rdi
|
|
; X64-NEXT: callq _func_float16_ptr
|
|
; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
|
|
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
|
|
; X64-NEXT: leaq -16(%rbp), %rsp
|
|
; X64-NEXT: popq %r12
|
|
; X64-NEXT: popq %r13
|
|
; X64-NEXT: popq %rbp
|
|
; X64-NEXT: retq
|
|
%y = alloca <16 x float>, align 64
|
|
%x = fadd <16 x float> %a, %b
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
|
%2 = load <16 x float>, <16 x float>* %y, align 16
|
|
%3 = fadd <16 x float> %1, %b
|
|
%4 = fadd <16 x float> %2, %3
|
|
ret <16 x float> %4
|
|
}
|
|
|
|
; test calling conventions - prolog and epilog
|
|
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: test_prolog_epilog:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: calll _func_float16
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: test_prolog_epilog:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: calll _func_float16
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-KNL-LABEL: test_prolog_epilog:
|
|
; WIN64-KNL: # %bb.0:
|
|
; WIN64-KNL-NEXT: pushq %rbp
|
|
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: andq $-64, %rsp
|
|
; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
|
; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; WIN64-KNL-NEXT: callq func_float16
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
|
|
; WIN64-KNL-NEXT: popq %rbp
|
|
; WIN64-KNL-NEXT: retq
|
|
;
|
|
; WIN64-SKX-LABEL: test_prolog_epilog:
|
|
; WIN64-SKX: # %bb.0:
|
|
; WIN64-SKX-NEXT: pushq %rbp
|
|
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: andq $-64, %rsp
|
|
; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
|
; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; WIN64-SKX-NEXT: callq func_float16
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
|
|
; WIN64-SKX-NEXT: popq %rbp
|
|
; WIN64-SKX-NEXT: retq
|
|
;
|
|
; X64-KNL-LABEL: test_prolog_epilog:
|
|
; X64-KNL: ## %bb.0:
|
|
; X64-KNL-NEXT: pushq %rsi
|
|
; X64-KNL-NEXT: subq $1072, %rsp ## imm = 0x430
|
|
; X64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
|
; X64-KNL-NEXT: callq _func_float16
|
|
; X64-KNL-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
|
|
; X64-KNL-NEXT: addq $1072, %rsp ## imm = 0x430
|
|
; X64-KNL-NEXT: popq %rsi
|
|
; X64-KNL-NEXT: retq
|
|
;
|
|
; X64-SKX-LABEL: test_prolog_epilog:
|
|
; X64-SKX: ## %bb.0:
|
|
; X64-SKX-NEXT: pushq %rsi
|
|
; X64-SKX-NEXT: subq $1072, %rsp ## imm = 0x430
|
|
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
|
; X64-SKX-NEXT: callq _func_float16
|
|
; X64-SKX-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
|
; X64-SKX-NEXT: addq $1072, %rsp ## imm = 0x430
|
|
; X64-SKX-NEXT: popq %rsi
|
|
; X64-SKX-NEXT: retq
|
|
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
|
|
ret <16 x float> %c
|
|
}
|
|
|
|
|
|
declare <16 x float> @func_float16_mask(<16 x float>, <16 x i1>)
|
|
|
|
define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
|
|
; X32-LABEL: testf16_inp_mask:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: .cfi_def_cfa_offset 16
|
|
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X32-NEXT: calll _func_float16_mask
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_inp_mask:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; WIN32-NEXT: calll _func_float16_mask
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-KNL-LABEL: testf16_inp_mask:
|
|
; WIN64-KNL: # %bb.0:
|
|
; WIN64-KNL-NEXT: subq $40, %rsp
|
|
; WIN64-KNL-NEXT: .seh_stackalloc 40
|
|
; WIN64-KNL-NEXT: .seh_endprologue
|
|
; WIN64-KNL-NEXT: # kill: def $dx killed $dx def $edx
|
|
; WIN64-KNL-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-KNL-NEXT: kmovw %edx, %k1
|
|
; WIN64-KNL-NEXT: callq func_float16_mask
|
|
; WIN64-KNL-NEXT: nop
|
|
; WIN64-KNL-NEXT: addq $40, %rsp
|
|
; WIN64-KNL-NEXT: retq
|
|
; WIN64-KNL-NEXT: .seh_endproc
|
|
;
|
|
; WIN64-SKX-LABEL: testf16_inp_mask:
|
|
; WIN64-SKX: # %bb.0:
|
|
; WIN64-SKX-NEXT: subq $40, %rsp
|
|
; WIN64-SKX-NEXT: .seh_stackalloc 40
|
|
; WIN64-SKX-NEXT: .seh_endprologue
|
|
; WIN64-SKX-NEXT: # kill: def $dx killed $dx def $edx
|
|
; WIN64-SKX-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-SKX-NEXT: kmovd %edx, %k1
|
|
; WIN64-SKX-NEXT: callq func_float16_mask
|
|
; WIN64-SKX-NEXT: nop
|
|
; WIN64-SKX-NEXT: addq $40, %rsp
|
|
; WIN64-SKX-NEXT: retq
|
|
; WIN64-SKX-NEXT: .seh_endproc
|
|
;
|
|
; X64-KNL-LABEL: testf16_inp_mask:
|
|
; X64-KNL: ## %bb.0:
|
|
; X64-KNL-NEXT: pushq %rbp
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-KNL-NEXT: pushq %r13
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 24
|
|
; X64-KNL-NEXT: pushq %r12
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 32
|
|
; X64-KNL-NEXT: .cfi_offset %r12, -32
|
|
; X64-KNL-NEXT: .cfi_offset %r13, -24
|
|
; X64-KNL-NEXT: .cfi_offset %rbp, -16
|
|
; X64-KNL-NEXT: kmovw %edi, %k1
|
|
; X64-KNL-NEXT: callq _func_float16_mask
|
|
; X64-KNL-NEXT: popq %r12
|
|
; X64-KNL-NEXT: popq %r13
|
|
; X64-KNL-NEXT: popq %rbp
|
|
; X64-KNL-NEXT: retq
|
|
;
|
|
; X64-SKX-LABEL: testf16_inp_mask:
|
|
; X64-SKX: ## %bb.0:
|
|
; X64-SKX-NEXT: pushq %rbp
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-SKX-NEXT: pushq %r13
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 24
|
|
; X64-SKX-NEXT: pushq %r12
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 32
|
|
; X64-SKX-NEXT: .cfi_offset %r12, -32
|
|
; X64-SKX-NEXT: .cfi_offset %r13, -24
|
|
; X64-SKX-NEXT: .cfi_offset %rbp, -16
|
|
; X64-SKX-NEXT: kmovd %edi, %k1
|
|
; X64-SKX-NEXT: callq _func_float16_mask
|
|
; X64-SKX-NEXT: popq %r12
|
|
; X64-SKX-NEXT: popq %r13
|
|
; X64-SKX-NEXT: popq %rbp
|
|
; X64-SKX-NEXT: retq
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)
|
|
ret <16 x float> %1
|
|
}
|
|
|
|
define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a, <16 x i32> %x1, <16 x i32>%x2, <16 x i1> %mask) nounwind {
|
|
; X32-LABEL: test_prolog_epilog_with_mask:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; X32-NEXT: kxorw %k1, %k0, %k1
|
|
; X32-NEXT: calll _func_float16_mask
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: test_prolog_epilog_with_mask:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; WIN32-NEXT: kxorw %k1, %k0, %k1
|
|
; WIN32-NEXT: calll _func_float16_mask
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_prolog_epilog_with_mask:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $40, %rsp
|
|
; WIN64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; WIN64-NEXT: kxorw %k1, %k0, %k1
|
|
; WIN64-NEXT: callq func_float16_mask
|
|
; WIN64-NEXT: addq $40, %rsp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: test_prolog_epilog_with_mask:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; X64-NEXT: kxorw %k1, %k0, %k1
|
|
; X64-NEXT: callq _func_float16_mask
|
|
; X64-NEXT: popq %rax
|
|
; X64-NEXT: retq
|
|
%cmp_res = icmp eq <16 x i32>%x1, %x2
|
|
%mask1 = xor <16 x i1> %cmp_res, %mask
|
|
%c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1)
|
|
ret <16 x float> %c
|
|
}
|