1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.

If we're zeroing the other elements then we don't need the broadcast.
This commit is contained in:
Craig Topper 2020-03-08 00:15:26 -08:00
parent bf96dd7026
commit c5b36c0e65
2 changed files with 26 additions and 6 deletions

View File

@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
VT.getVectorElementType(),
LN->getPointerInfo(),
LN->getAlignment(),
MachineMemOperand::MOLoad);
LN->getMemOperand()->getFlags());
DCI.CombineTo(N, VZLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
return VZLoad;
DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}
// If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
// can just use a VZEXT_LOAD.
// FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
auto *LN = cast<MemSDNode>(N->getOperand(0));
if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue VZLoad =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
DCI.CombineTo(N, VZLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}

View File

@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-AVX-NEXT: andl $-128, %esp
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
; X32-AVX-NEXT: movl 40(%ebp), %ecx
; X32-AVX-NEXT: vpbroadcastq 32(%ebp), %ymm0
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
; X32-AVX-NEXT: andl $31, %eax
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax