mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Disable copy elision in LowerMemArgument for scalarized vectors when the loc VT is a different size than the original element.
For example a v4f16 argument is scalarized to 4 i32 values. So the values are spread out instead of being packed tightly like in the original vector. Fixes PR47000.
This commit is contained in:
parent
ce39c28b26
commit
a04fa612fc
@ -3203,13 +3203,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
|
||||
return DAG.getFrameIndex(FI, PtrVT);
|
||||
}
|
||||
|
||||
EVT ArgVT = Ins[i].ArgVT;
|
||||
|
||||
// If this is a vector that has been split into multiple parts, and the
|
||||
// scalar size of the parts don't match the vector element size, then we can't
|
||||
// elide the copy. The parts will have padding between them instead of being
|
||||
// packed like a vector.
|
||||
bool ScalarizedAndExtendedVector =
|
||||
ArgVT.isVector() && !VA.getLocVT().isVector() &&
|
||||
VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
|
||||
|
||||
// This is an argument in memory. We might be able to perform copy elision.
|
||||
// If the argument is passed directly in memory without any extension, then we
|
||||
// can perform copy elision. Large vector types, for example, may be passed
|
||||
// indirectly by pointer.
|
||||
if (Flags.isCopyElisionCandidate() &&
|
||||
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
|
||||
EVT ArgVT = Ins[i].ArgVT;
|
||||
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
|
||||
!ScalarizedAndExtendedVector) {
|
||||
SDValue PartAddr;
|
||||
if (Ins[i].PartOffset == 0) {
|
||||
// If this is a one-part value or the first part of a multi-part value,
|
||||
|
@ -16,17 +16,15 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: movw 176(%esp), %dx
|
||||
; CHECK-NEXT: movw 172(%esp), %si
|
||||
; CHECK-NEXT: movw 164(%esp), %di
|
||||
; CHECK-NEXT: movw 166(%esp), %bx
|
||||
; CHECK-NEXT: movw 168(%esp), %di
|
||||
; CHECK-NEXT: movw 164(%esp), %bx
|
||||
; CHECK-NEXT: movw 160(%esp), %bp
|
||||
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movw 156(%esp), %ax
|
||||
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
||||
; CHECK-NEXT: movw 152(%esp), %ax
|
||||
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
||||
; CHECK-NEXT: movw 148(%esp), %ax
|
||||
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
||||
; CHECK-NEXT: movw 150(%esp), %ax
|
||||
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
||||
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
|
||||
; CHECK-NEXT: movw %ax, 112(%esp)
|
||||
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
|
||||
; CHECK-NEXT: movw %ax, 114(%esp)
|
||||
@ -35,8 +33,8 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
|
||||
; CHECK-NEXT: movw %bp, 118(%esp)
|
||||
; CHECK-NEXT: movw %dx, 110(%esp)
|
||||
; CHECK-NEXT: movw %si, 108(%esp)
|
||||
; CHECK-NEXT: movw %bx, 106(%esp)
|
||||
; CHECK-NEXT: movw %di, 104(%esp)
|
||||
; CHECK-NEXT: movw %di, 106(%esp)
|
||||
; CHECK-NEXT: movw %bx, 104(%esp)
|
||||
; CHECK-NEXT: movzwl 118(%esp), %edx
|
||||
; CHECK-NEXT: movzwl 116(%esp), %esi
|
||||
; CHECK-NEXT: movzwl 114(%esp), %edi
|
||||
|
Loading…
x
Reference in New Issue
Block a user