1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] XFormVExtractWithShuffleIntoLoad - handle shuffle mask scaling

If the target shuffle mask is from a wider type, attempt to scale the mask so that the extraction can attempt to peek through.

Fixes the regression mentioned in rL368307

llvm-svn: 368308
This commit is contained in:
Simon Pilgrim 2019-08-08 16:05:23 +00:00
parent d43cdb1e90
commit eb76021ab0
2 changed files with 29 additions and 16 deletions

View File

@ -34757,8 +34757,9 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been custom lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
static SDValue
XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@ -34770,13 +34771,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT OriginalVT = InVec.getValueType();
unsigned NumOriginalElts = OriginalVT.getVectorNumElements();
// Peek through bitcasts, don't duplicate a load with other uses.
InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
if (!CurrentVT.isVector() ||
CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
if (!CurrentVT.isVector())
return SDValue();
unsigned NumCurrentElts = CurrentVT.getVectorNumElements();
if ((NumOriginalElts % NumCurrentElts) != 0)
return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
@ -34793,10 +34798,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
ShuffleOps, ShuffleMask, UnaryShuffle))
return SDValue();
unsigned Scale = NumOriginalElts / NumCurrentElts;
if (Scale > 1) {
SmallVector<int, 16> ScaledMask;
scaleShuffleMask<int>(Scale, ShuffleMask, ScaledMask);
ShuffleMask = std::move(ScaledMask);
}
assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch");
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt];
if (Idx == SM_SentinelZero)
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
@ -34809,8 +34821,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
return SDValue();
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) &&
"Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
@ -34830,7 +34843,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
// If there's a bitcast before the shuffle, check if the load type and
@ -34848,10 +34861,11 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
ShuffleMask);
Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT)
: DAG.getBitcast(OriginalVT, ShuffleOps[1]);
Shuffle = DAG.getVectorShuffle(OriginalVT, dl,
DAG.getBitcast(OriginalVT, ShuffleOps[0]),
Shuffle, ShuffleMask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}

View File

@ -285,13 +285,12 @@ define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
define float @extract_lane_insertps_6123(<4 x float> %a0, <4 x float> *%p1) {
; SSE-LABEL: extract_lane_insertps_6123:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: extract_lane_insertps_6123:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
%a1 = load <4 x float>, <4 x float> *%p1
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)