mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.
If we're zeroing the other elements then we don't need the broadcast.
This commit is contained in:
parent
bf96dd7026
commit
c5b36c0e65
@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
||||
VT.getVectorElementType(),
|
||||
LN->getPointerInfo(),
|
||||
LN->getAlignment(),
|
||||
MachineMemOperand::MOLoad);
|
||||
LN->getMemOperand()->getFlags());
|
||||
DCI.CombineTo(N, VZLoad);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||
return VZLoad;
|
||||
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
|
||||
// can just use a VZEXT_LOAD.
|
||||
// FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
|
||||
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
|
||||
N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
|
||||
auto *LN = cast<MemSDNode>(N->getOperand(0));
|
||||
if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
|
||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
|
||||
SDValue VZLoad =
|
||||
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
|
||||
LN->getMemoryVT(), LN->getMemOperand());
|
||||
DCI.CombineTo(N, VZLoad);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||
; X32-AVX-NEXT: andl $-128, %esp
|
||||
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
|
||||
; X32-AVX-NEXT: movl 40(%ebp), %ecx
|
||||
; X32-AVX-NEXT: vpbroadcastq 32(%ebp), %ymm0
|
||||
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
|
||||
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
|
||||
; X32-AVX-NEXT: andl $31, %eax
|
||||
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
|
||||
|
Loading…
Reference in New Issue
Block a user