mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-21 20:12:56 +02:00
[X86] Add a combine to recognize when we have two insert subvectors that together write the whole vector, but the starting vector isn't undef.
In this case we should replace the starting vector with undef. llvm-svn: 312462
This commit is contained in:
parent
86c2b6cc78
commit
a18221589e
@ -35750,6 +35750,18 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
|
||||
getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2,
|
||||
Vec.getOperand(2));
|
||||
|
||||
// If we are inserting into both halves of the vector, the starting
|
||||
// vector should be undef. If it isn't, make it so. Only do this if the
|
||||
// the early insert has no other uses.
|
||||
// TODO: Should this be a generic DAG combine?
|
||||
if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) {
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
|
||||
SubVec2, Vec.getOperand(2));
|
||||
DCI.AddToWorklist(Vec.getNode());
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, Idx);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -758,7 +758,6 @@ define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind {
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2
|
||||
; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
|
@ -473,7 +473,6 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
|
||||
@ -485,7 +484,6 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
|
||||
|
Loading…
Reference in New Issue
Block a user