mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86] Disable argument copy elision for arguments passed via pointers
Summary: If you pass two 1024 bit vectors in IR with AVX2 on Windows 64. Both vectors will be split in four 256 bit pieces. The four pieces of the first argument will be passed indirectly using 4 gprs. The second argument will get passed via pointers in memory. The PartOffsets stored for the second argument are all in terms of its original 1024 bit size. So the PartOffsets for each piece are 32 bytes apart. So if we consider it for copy elision we'll only load an 8 byte pointer, but we'll move the address 32 bytes. The stack object size we create for the first part is probably wrong too. This issue was encountered by ISPC. I'm working on getting a reduce test case, but wanted to go ahead and get feedback on the fix. Reviewers: rnk Reviewed By: rnk Subscribers: dbabokin, llvm-commits, hiraditya Tags: #llvm Differential Revision: https://reviews.llvm.org/D60801 llvm-svn: 358817
This commit is contained in:
parent
3de4f211cc
commit
a84e232f6d
@ -3011,7 +3011,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
|
||||
}
|
||||
|
||||
// This is an argument in memory. We might be able to perform copy elision.
|
||||
if (Flags.isCopyElisionCandidate()) {
|
||||
// If the argument is passed directly in memory without any extension, then we
|
||||
// can perform copy elision. Large vector types, for example, may be passed
|
||||
// indirectly by pointer.
|
||||
if (Flags.isCopyElisionCandidate() &&
|
||||
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
|
||||
EVT ArgVT = Ins[i].ArgVT;
|
||||
SDValue PartAddr;
|
||||
if (Ins[i].PartOffset == 0) {
|
||||
|
@ -13,12 +13,12 @@ define void @baz(<16 x double> %arg, <16 x double> %arg1) #0 {
|
||||
; CHECK-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: andq $-128, %rsp
|
||||
; CHECK-NEXT: movq 288(%rbp), %rax
|
||||
; CHECK-NEXT: movq 320(%rbp), %r10
|
||||
; CHECK-NEXT: vmovaps (%rax), %ymm0
|
||||
; CHECK-NEXT: vmovaps (%r10), %ymm1
|
||||
; CHECK-NEXT: movq 352(%rbp), %rax
|
||||
; CHECK-NEXT: movq 296(%rbp), %rax
|
||||
; CHECK-NEXT: vmovaps (%rax), %ymm1
|
||||
; CHECK-NEXT: movq 304(%rbp), %rax
|
||||
; CHECK-NEXT: vmovaps (%rax), %ymm2
|
||||
; CHECK-NEXT: movq 384(%rbp), %rax
|
||||
; CHECK-NEXT: movq 312(%rbp), %rax
|
||||
; CHECK-NEXT: vmovaps (%rax), %ymm3
|
||||
; CHECK-NEXT: vmovaps (%rcx), %ymm4
|
||||
; CHECK-NEXT: vmovaps (%rdx), %ymm5
|
||||
|
Loading…
Reference in New Issue
Block a user