1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[X86][AVX] Fold vt1 concat_vectors(vt2 undef, vt2 broadcast(x)) --> vt1 broadcast(x)

If we're not inserting the broadcast into the lowest subvector then we can avoid the insertion by just performing a larger broadcast.

Avoids a regression when we enable AVX1 broadcasts in shuffle combining

llvm-svn: 352742
This commit is contained in:
Simon Pilgrim 2019-01-31 11:15:05 +00:00
parent e6a4e7131e
commit 4fa00a57a9
2 changed files with 9 additions and 9 deletions

View File

@ -41593,6 +41593,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// If this is subv_broadcast insert into both halves, use a larger
// subv_broadcast.
// TODO - handle X86ISD::VBROADCAST as well?
if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2)
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
SubVec.getOperand(0));
@ -41614,11 +41615,14 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
SubVec2, Vec.getOperand(2));
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
N->getOperand(2));
}
}
}
// If this is a broadcast insert into an upper undef, use a larger broadcast.
if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
return SDValue();
}

View File

@ -289,31 +289,27 @@ define <8 x i32> @elt7_v8i32(i32 %x) {
;
; X32AVX2-LABEL: elt7_v8i32:
; X32AVX2: # %bb.0:
; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
; X32AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
; X32AVX2-NEXT: retl
;
; X64AVX2-LABEL: elt7_v8i32:
; X64AVX2: # %bb.0:
; X64AVX2-NEXT: vmovd %edi, %xmm0
; X64AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; X64AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; X64AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
; X64AVX2-NEXT: retq
;
; X32AVX512F-LABEL: elt7_v8i32:
; X32AVX512F: # %bb.0:
; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
; X32AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
; X32AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
; X32AVX512F-NEXT: retl
;
; X64AVX512F-LABEL: elt7_v8i32:
; X64AVX512F: # %bb.0:
; X64AVX512F-NEXT: vmovd %edi, %xmm0
; X64AVX512F-NEXT: vpbroadcastd %xmm0, %xmm0
; X64AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
; X64AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
; X64AVX512F-NEXT: retq
%ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7