1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[X86] Remove isel patterns for (X86VBroadcast (i16 (trunc (i32 (load))))). Replace with a DAG combine to form VBROADCAST_LOAD.

isTypeDesirableForOp prevents loads from being shrunk to i16 by DAG
combine. Because of this we can't just match the broadcast and a
scalar load. So look for broadcast+truncate+load and form a
vbroadcast_load during DAG combine. This replaces what was
previously done as an isel pattern and I think fixes it so we
won't change the size of a volatile load. But my main motivation
is just to clean up our isel patterns.
This commit is contained in:
Craig Topper 2020-03-10 00:00:26 -07:00
parent aa2a6dc76f
commit bf3b71f64b
3 changed files with 22 additions and 10 deletions

View File

@ -35171,6 +35171,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return N; // Return N so it doesn't get rechecked!
}
// Due to isTypeDesirableForOp, we won't always shrink a load truncated to
// i16. So shrink it ourselves if we can make a broadcast_load.
if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE &&
Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
Src.getOperand(0).hasOneUse()) {
assert(Subtarget.hasAVX2() && "Expected AVX2");
LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
MVT::i16, LN->getPointerInfo(),
LN->getAlignment(),
LN->getMemOperand()->getFlags());
DCI.CombineTo(N.getNode(), BcastLd);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return N; // Return N so it doesn't get rechecked!
}
}
// vbroadcast(vzload X) -> vbroadcast_load X
if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);

View File

@ -1426,10 +1426,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ128rm addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ256rm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWZ128rm addr:$src)>;
@ -1446,8 +1442,6 @@ let Predicates = [HasVLX, HasBWI] in {
let Predicates = [HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZrm addr:$src)>;
def : Pat<(v32i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWZrm addr:$src)>;

View File

@ -7517,10 +7517,6 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastl
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWrm addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
(VPBROADCASTWrm addr:$src)>;