1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[DAGCombine] Permit combining of shuffles of equivalent splat BUILD_VECTORs

combineShuffleOfScalars is very conservative about shuffled BUILD_VECTORs that can be combined together.

This patch adds one additional case - if both BUILD_VECTORs represent splats of the same scalar value but with different UNDEF elements, then we should create a single splat BUILD_VECTOR, sharing only the UNDEF elements defined by the shuffle mask.

Differential Revision: https://reviews.llvm.org/D38696

llvm-svn: 316331
This commit is contained in:
Simon Pilgrim 2017-10-23 15:48:08 +00:00
parent cde259879b
commit 93f91e166d
2 changed files with 17 additions and 9 deletions

View File

@ -15453,7 +15453,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
// fold shuffles if the resulting BUILD_VECTOR would have duplicate
// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
@ -15477,6 +15477,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return SDValue();
}
// If both inputs are splats of the same value then we can safely merge this
// to a single BUILD_VECTOR with undef elements based on the shuffle mask.
bool IsSplat = false;
auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
if (BV0 && BV1)
if (SDValue Splat0 = BV0->getSplatValue())
IsSplat = (Splat0 == BV1->getSplatValue());
SmallVector<SDValue, 8> Ops;
SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
@ -15495,11 +15504,12 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
}
}
// Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
// fine, but it's likely to generate low-quality code if the target can't
// reconstruct an appropriate shuffle.
// Don't duplicate a non-constant BUILD_VECTOR operand unless we're
// generating a splat; semantically, this is fine, but it's likely to
// generate low-quality code if the target can't reconstruct an appropriate
// shuffle.
if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
if (!DuplicateOps.insert(Op).second)
if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
Ops.push_back(Op);

View File

@ -853,14 +853,12 @@ define <4 x double> @broadcast_shuffle1032(double* %p) {
; X32-LABEL: broadcast_shuffle1032:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: vbroadcastsd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_shuffle1032:
; X64: ## BB#0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: vbroadcastsd (%rdi), %ymm0
; X64-NEXT: retq
%1 = load double, double* %p
%2 = insertelement <2 x double> undef, double %1, i32 1