1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/X86/arg-copy-elide-win64.ll
Craig Topper a84e232f6d [X86] Disable argument copy elision for arguments passed via pointers
Summary:
If you pass two 1024 bit vectors in IR with AVX2 on Windows 64. Both vectors will be split in four 256 bit pieces. The four pieces of the first argument will be passed indirectly using 4 gprs. The second argument will get passed via pointers in memory.

The PartOffsets stored for the second argument are all in terms of its original 1024 bit size. So the PartOffsets for each piece are 32 bytes apart. So if we consider it for copy elision we'll only load an 8 byte pointer, but we'll move the address 32 bytes. The stack object size we create for the first part is probably wrong too.

This issue was encountered by ISPC. I'm working on getting a reduce test case, but wanted to go ahead and get feedback on the fix.

Reviewers: rnk

Reviewed By: rnk

Subscribers: dbabokin, llvm-commits, hiraditya

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60801

llvm-svn: 358817
2019-04-20 15:26:44 +00:00

50 lines
1.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-windows-msvc -mattr=avx2 | FileCheck %s
; Make sure we don't try to copy elide these arguments since they will be
; passed indirectly.
define void @baz(<16 x double> %arg, <16 x double> %arg1) #0 {
; CHECK-LABEL: baz:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: subq $368, %rsp # imm = 0x170
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
; CHECK-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: andq $-128, %rsp
; CHECK-NEXT: movq 288(%rbp), %rax
; CHECK-NEXT: vmovaps (%rax), %ymm0
; CHECK-NEXT: movq 296(%rbp), %rax
; CHECK-NEXT: vmovaps (%rax), %ymm1
; CHECK-NEXT: movq 304(%rbp), %rax
; CHECK-NEXT: vmovaps (%rax), %ymm2
; CHECK-NEXT: movq 312(%rbp), %rax
; CHECK-NEXT: vmovaps (%rax), %ymm3
; CHECK-NEXT: vmovaps (%rcx), %ymm4
; CHECK-NEXT: vmovaps (%rdx), %ymm5
; CHECK-NEXT: vmovaps (%r8), %ymm6
; CHECK-NEXT: vmovaps (%r9), %ymm7
; CHECK-NEXT: vmovaps %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm2, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; CHECK-NEXT: leaq 240(%rbp), %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
bb:
%tmp = alloca <16 x double>
%tmp2 = alloca <16 x double>
store <16 x double> %arg, <16 x double>* %tmp
store <16 x double> %arg1, <16 x double>* %tmp2
ret void
}
attributes #0 = { nounwind }