mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[SystemZ] Replicate the load with most uses in buildVector()
Iterate over all elements and count the number of uses among them for each used load. Then make sure to REPLICATE the load which has the most uses in order to minimize the number of needed element insertions. Review: Ulrich Weigand https://reviews.llvm.org/D54322 llvm-svn: 346637
This commit is contained in:
parent
eaae0059be
commit
a41af2e2bb
@ -4490,18 +4490,21 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
||||
// avoid a false dependency on any previous contents of the vector
|
||||
// register.
|
||||
|
||||
// Use a VLREP if at least one element is a load.
|
||||
unsigned LoadElIdx = UINT_MAX;
|
||||
// Use a VLREP if at least one element is a load. Make sure to replicate
|
||||
// the load with the most elements having its value.
|
||||
std::map<const SDNode*, unsigned> UseCounts;
|
||||
SDNode *LoadMaxUses = nullptr;
|
||||
for (unsigned I = 0; I < NumElements; ++I)
|
||||
if (Elems[I].getOpcode() == ISD::LOAD &&
|
||||
cast<LoadSDNode>(Elems[I])->isUnindexed()) {
|
||||
LoadElIdx = I;
|
||||
break;
|
||||
SDNode *Ld = Elems[I].getNode();
|
||||
UseCounts[Ld]++;
|
||||
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
|
||||
LoadMaxUses = Ld;
|
||||
}
|
||||
if (LoadElIdx != UINT_MAX) {
|
||||
Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
|
||||
Done[LoadElIdx] = true;
|
||||
ReplicatedVal = Elems[LoadElIdx];
|
||||
if (LoadMaxUses != nullptr) {
|
||||
ReplicatedVal = SDValue(LoadMaxUses, 0);
|
||||
Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
|
||||
} else {
|
||||
// Try to use VLVGP.
|
||||
unsigned I1 = NumElements / 2 - 1;
|
||||
|
19
test/CodeGen/SystemZ/vec-move-20.ll
Normal file
19
test/CodeGen/SystemZ/vec-move-20.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test a vector which is built with elements from two loads replicates the
|
||||
; load with most elements having its value.
|
||||
|
||||
; CHECK: vlef
|
||||
; CHECK-NOT: vlvgf
|
||||
|
||||
define void @update(i32* %src1, i32* %src2, <4 x i32>* %dst) {
|
||||
bb:
|
||||
%tmp = load i32, i32* %src1
|
||||
%tmp1 = load i32, i32* %src2
|
||||
%tmp2 = insertelement <4 x i32> undef, i32 %tmp, i32 0
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
|
||||
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
|
||||
%tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
|
||||
store <4 x i32> %tmp5, <4 x i32>* %dst
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user