1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Simplify the code at the end of lowerShuffleAsBroadcast.

The original code could create a bitcast from f64 to i64 and back
on 32-bit targets. This was only working because getBitcast was
able to fold the casts away to avoid leaving the illegal i64 type.

Now we handle the scalar case directly by broadcasting using the
scalar type as the element type. Then bitcasting to the final VT.
This works since we ensure the scalar type is the same size as
the final VT element type. No more casts to i64.

For the vector case, we cast to VT or subvector of VT. And then
do the broadcast.

I think this all matches what we generated before, just in a more
readable way.
This commit is contained in:
Craig Topper 2020-03-04 20:38:44 -08:00
parent 7d0a2e9be8
commit cac4f78ec3

View File

@ -12995,8 +12995,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
DL, VT, V, BroadcastIdx, Subtarget, DAG)) DL, VT, V, BroadcastIdx, Subtarget, DAG))
return TruncBroadcast; return TruncBroadcast;
MVT BroadcastVT = VT;
// Also check the simpler case, where we can directly reuse the scalar. // Also check the simpler case, where we can directly reuse the scalar.
if (!BitCastSrc && if (!BitCastSrc &&
((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) || ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
@ -13015,7 +13013,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// Reduce the vector load and shuffle to a broadcasted scalar load. // Reduce the vector load and shuffle to a broadcasted scalar load.
LoadSDNode *Ld = cast<LoadSDNode>(V); LoadSDNode *Ld = cast<LoadSDNode>(V);
SDValue BaseAddr = Ld->getOperand(1); SDValue BaseAddr = Ld->getOperand(1);
EVT SVT = BroadcastVT.getScalarType(); EVT SVT = VT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize(); unsigned Offset = BroadcastIdx * SVT.getStoreSize();
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
@ -13024,7 +13022,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// than MOVDDUP. // than MOVDDUP.
// FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX? // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?
if (Opcode == X86ISD::VBROADCAST) { if (Opcode == X86ISD::VBROADCAST) {
SDVTList Tys = DAG.getVTList(BroadcastVT, MVT::Other); SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = {Ld->getChain(), NewAddr}; SDValue Ops[] = {Ld->getChain(), NewAddr};
V = DAG.getMemIntrinsicNode( V = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT,
@ -13068,38 +13066,26 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
DAG.getBitcast(MVT::f64, V)); DAG.getBitcast(MVT::f64, V));
// Bitcast back to the same scalar type as BroadcastVT. // If this is a scalar, do the broadcast on this type and bitcast.
if (V.getValueType().getScalarType() != BroadcastVT.getScalarType()) { if (!V.getValueType().isVector()) {
assert(NumEltBits == BroadcastVT.getScalarSizeInBits() && assert(V.getScalarValueSizeInBits() == NumEltBits &&
"Unexpected vector element size"); "Unexpected scalar size");
MVT ExtVT; MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
if (V.getValueType().isVector()) { VT.getVectorNumElements());
unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
ExtVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
} else {
ExtVT = BroadcastVT.getScalarType();
}
V = DAG.getBitcast(ExtVT, V);
}
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && V.getValueType() == MVT::i64) {
V = DAG.getBitcast(MVT::f64, V);
unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
} }
// We only support broadcasting from 128-bit vectors to minimize the // We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to // number of patterns we need to deal with in isel. So extract down to
// 128-bits, removing as many bitcasts as possible. // 128-bits, removing as many bitcasts as possible.
if (V.getValueSizeInBits() > 128) { if (V.getValueSizeInBits() > 128)
MVT ExtVT = V.getSimpleValueType().getScalarType();
ExtVT = MVT::getVectorVT(ExtVT, 128 / ExtVT.getScalarSizeInBits());
V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
V = DAG.getBitcast(ExtVT, V);
}
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); // Otherwise cast V to a vector with the same element type as VT, but
// possibly narrower than VT. Then perform the broadcast.
unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts);
return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V));
} }
// Check for whether we can use INSERTPS to perform the shuffle. We only use // Check for whether we can use INSERTPS to perform the shuffle. We only use