mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[X86] Remove isel patterns for (X86VBroadcast (i16 (trunc (i32 (load))))). Replace with a DAG combine to form VBROADCAST_LOAD.
isTypeDesirableForOp prevents loads from being shrunk to i16 by DAG combine. Because of this we can't just match the broadcast and a scalar load. So look for broadcast+truncate+load and form a vbroadcast_load during DAG combine. This replaces what was previously done as an isel pattern and I think fixes it so we won't change the size of a volatile load. But my main motivation is just to clean up our isel patterns.
This commit is contained in:
parent
aa2a6dc76f
commit
bf3b71f64b
@ -35171,6 +35171,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
||||
return N; // Return N so it doesn't get rechecked!
|
||||
}
|
||||
|
||||
// Due to isTypeDesirableForOp, we won't always shrink a load truncated to
|
||||
// i16. So shrink it ourselves if we can make a broadcast_load.
|
||||
if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE &&
|
||||
Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
|
||||
Src.getOperand(0).hasOneUse()) {
|
||||
assert(Subtarget.hasAVX2() && "Expected AVX2");
|
||||
LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
|
||||
if (LN->isSimple()) {
|
||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
|
||||
SDValue BcastLd =
|
||||
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
|
||||
MVT::i16, LN->getPointerInfo(),
|
||||
LN->getAlignment(),
|
||||
LN->getMemOperand()->getFlags());
|
||||
DCI.CombineTo(N.getNode(), BcastLd);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
|
||||
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||
return N; // Return N so it doesn't get rechecked!
|
||||
}
|
||||
}
|
||||
|
||||
// vbroadcast(vzload X) -> vbroadcast_load X
|
||||
if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
|
||||
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
|
||||
|
@ -1426,10 +1426,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
|
||||
// This means we'll encounter truncated i32 loads; match that here.
|
||||
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWZ128rm addr:$src)>;
|
||||
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWZ256rm addr:$src)>;
|
||||
def : Pat<(v8i16 (X86VBroadcast
|
||||
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
|
||||
(VPBROADCASTWZ128rm addr:$src)>;
|
||||
@ -1446,8 +1442,6 @@ let Predicates = [HasVLX, HasBWI] in {
|
||||
let Predicates = [HasBWI] in {
|
||||
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
|
||||
// This means we'll encounter truncated i32 loads; match that here.
|
||||
def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWZrm addr:$src)>;
|
||||
def : Pat<(v32i16 (X86VBroadcast
|
||||
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
|
||||
(VPBROADCASTWZrm addr:$src)>;
|
||||
|
@ -7517,10 +7517,6 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastl
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
|
||||
// This means we'll encounter truncated i32 loads; match that here.
|
||||
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWrm addr:$src)>;
|
||||
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWYrm addr:$src)>;
|
||||
def : Pat<(v8i16 (X86VBroadcast
|
||||
(i16 (trunc (i32 (extloadi16 addr:$src)))))),
|
||||
(VPBROADCASTWrm addr:$src)>;
|
||||
|
Loading…
Reference in New Issue
Block a user