1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[DAGCombine] Preserve shuffles when one of the vector operands is constant

Summary:
Do *not* perform combines such as:

    vector_shuffle<4,1,2,3>(build_vector(Ud, C0, C1 C2), scalar_to_vector(X))
    ->
    build_vector(X, C0, C1, C2)

Keeping the shuffle allows lowering the constant build_vector to a materialized
constant vector (such as a vector-load from the constant-pool or some other idiom).

Reviewers: delena, igorb, spatel, mkuper, andreadb, RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D25524

llvm-svn: 285063
This commit is contained in:
Zvi Rackover 2016-10-25 12:14:19 +00:00
parent f8f8e59cdd
commit d92eb1132c
2 changed files with 101 additions and 88 deletions

View File

@ -834,6 +834,13 @@ static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
return false;
}
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
static bool isAnyConstantBuildVector(const SDNode *N) {
return ISD::isBuildVectorOfConstantSDNodes(N) ||
ISD::isBuildVectorOfConstantFPSDNodes(N);
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@ -13744,6 +13751,71 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
// This combine is done in the following cases:
// 1. Both N0,N1 are BUILD_VECTOR's composed of constants or undefs.
// 2. Only one of N0,N1 is a BUILD_VECTOR composed of constants or undefs -
// Combine iff that node is ALL_ZEROS. We prefer not to combine a
// BUILD_VECTOR of all constants to allow efficient materialization of
// constant vectors, but the ALL_ZEROS is an exception because
// zero-extension matching seems to rely on having BUILD_VECTOR nodes with
// zero padding between elements. FIXME: Eliminate this exception for
// ALL_ZEROS constant vectors.
// 3. Neither N0,N1 are composed of only constants.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT = SVN->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = SVN->getOperand(0);
SDValue N1 = SVN->getOperand(1);
if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
return SDValue();
}
SmallVector<SDValue, 8> Ops;
for (int M : SVN->getMask()) {
SDValue Op = DAG.getUNDEF(VT.getScalarType());
if (M >= 0) {
int Idx = M < (int)NumElts ? M : M - NumElts;
SDValue &S = (M < (int)NumElts ? N0 : N1);
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
if (Idx == 0)
Op = S.getOperand(0);
} else {
// Operand can't be combined - bail out.
return SDValue();
}
}
Ops.push_back(Op);
}
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
if (SVT.isInteger())
for (SDValue &Op : Ops)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
Op = TLI.isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
: DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@ -13859,40 +13931,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
SmallVector<SDValue, 8> Ops;
for (int M : SVN->getMask()) {
SDValue Op = DAG.getUNDEF(VT.getScalarType());
if (M >= 0) {
int Idx = M % NumElts;
SDValue &S = (M < (int)NumElts ? N0 : N1);
if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
if (Idx == 0)
Op = S.getOperand(0);
} else {
// Operand can't be combined - bail out.
break;
}
}
Ops.push_back(Op);
}
if (Ops.size() == VT.getVectorNumElements()) {
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
if (SVT.isInteger())
for (SDValue &Op : Ops)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
Op = TLI.isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
: DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
return DAG.getBuildVector(VT, SDLoc(N), Ops);
}
}
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
return Res;
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output

View File

@ -2839,36 +2839,26 @@ define void @combine_scalar_load_with_blend_with_zero(double* %a0, <4 x float>*
define <4 x float> @combine_constant_insertion_v4f32(float %f) {
; SSE2-LABEL: combine_constant_insertion_v4f32:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movaps {{.*#+}} xmm1 = <u,4,5,3>
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_constant_insertion_v4f32:
; SSSE3: # BB#0:
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: movaps {{.*#+}} xmm1 = <u,4,5,3>
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_constant_insertion_v4f32:
; SSE41: # BB#0:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_constant_insertion_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
; AVX-NEXT: retq
%a0 = insertelement <4 x float> undef, float %f, i32 0
%ret = shufflevector <4 x float> %a0, <4 x float> <float undef, float 4.0, float 5.0, float 3.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@ -2878,53 +2868,35 @@ define <4 x float> @combine_constant_insertion_v4f32(float %f) {
define <4 x i32> @combine_constant_insertion_v4i32(i32 %f) {
; SSE2-LABEL: combine_constant_insertion_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movl $30, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movl $4, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movl $5, %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movd %edi, %xmm1
; SSE2-NEXT: movaps {{.*#+}} xmm0 = <u,4,5,30>
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_constant_insertion_v4i32:
; SSSE3: # BB#0:
; SSSE3-NEXT: movl $30, %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: movl $4, %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movl $5, %eax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movd %edi, %xmm1
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = <u,4,5,30>
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_constant_insertion_v4i32:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: movl $4, %eax
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: movl $5, %eax
; SSE41-NEXT: pinsrd $2, %eax, %xmm0
; SSE41-NEXT: movl $30, %eax
; SSE41-NEXT: pinsrd $3, %eax, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_constant_insertion_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: movl $4, %eax
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-NEXT: movl $5, %eax
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX-NEXT: movl $30, %eax
; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: combine_constant_insertion_v4i32:
; AVX1: # BB#0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_constant_insertion_v4i32:
; AVX2: # BB#0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
; AVX2-NEXT: retq
%a0 = insertelement <4 x i32> undef, i32 %f, i32 0
%ret = shufflevector <4 x i32> %a0, <4 x i32> <i32 undef, i32 4, i32 5, i32 30>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i32> %ret