1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[SystemZ] Avoid inserting same value after replication

A minor improvement of buildVector() that skips creating an
INSERT_VECTOR_ELT for a Value which has already been used for the
REPLICATE.

Review: Ulrich Weigand
https://reviews.llvm.org/D54315

llvm-svn: 346504
This commit is contained in:
Jonas Paulsson 2018-11-09 15:44:28 +00:00
parent b56d4d7977
commit ec1b2ec29d
2 changed files with 23 additions and 1 deletions

View File

@ -4479,6 +4479,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Constants with undefs to get a full vector constant and use that
// as the starting point.
SDValue Result;
SDValue ReplicatedVal;
if (NumConstants > 0) {
for (unsigned I = 0; I < NumElements; ++I)
if (!Constants[I].getNode())
@ -4500,6 +4501,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (LoadElIdx != UINT_MAX) {
Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
Done[LoadElIdx] = true;
ReplicatedVal = Elems[LoadElIdx];
} else {
// Try to use VLVGP.
unsigned I1 = NumElements / 2 - 1;
@ -4520,7 +4522,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Use VLVGx to insert the other elements.
for (unsigned I = 0; I < NumElements; ++I)
if (!Done[I] && !Elems[I].isUndef())
if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
DAG.getConstant(I, DL, MVT::i32));
return Result;

View File

@ -0,0 +1,20 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test that a loaded value which is replicated is not inserted also in any
; elements.
; CHECK: vlvgp %v0, %r0, %r0
; CHECK-NEXT: vrepf %v24, %v0, 1
; CHECK-NOT: vlvgf %v24, %r0, 1
; CHECK-NOT: vlvgf %v24, %r0, 2
define <4 x i32> @fun(i32 %arg, i32* %dst) {
%tmp = load i32, i32* undef
%tmp8 = insertelement <4 x i32> undef, i32 %tmp, i32 0
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp, i32 1
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp, i32 2
%tmp11 = insertelement <4 x i32> %tmp10, i32 %arg, i32 3
store i32 %tmp, i32* %dst
ret <4 x i32> %tmp11
}