mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Improve (vzmovl (insert_subvector)) combine to handle a bitcast between the vzmovl and insert
This combine tries shrink a vzmovl if its input is an insert_subvector. This patch improves it to turn (vzmovl (bitcast (insert_subvector))) into (insert_subvector (vzmovl (bitcast))) potentially allowing the bitcast to be folded with a load.
This commit is contained in:
parent
6d167c498f
commit
c3746fcd71
@ -36576,16 +36576,21 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
||||
// insert into a zero vector. This helps get VZEXT_MOVL closer to
|
||||
// scalar_to_vectors where 256/512 are canonicalized to an insert and a
|
||||
// 128-bit scalar_to_vector. This reduces the number of isel patterns.
|
||||
if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
|
||||
N->getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
N->getOperand(0).hasOneUse() &&
|
||||
N->getOperand(0).getOperand(0).isUndef() &&
|
||||
isNullConstant(N->getOperand(0).getOperand(2))) {
|
||||
SDValue In = N->getOperand(0).getOperand(1);
|
||||
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, In.getValueType(), In);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
|
||||
getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
|
||||
Movl, N->getOperand(0).getOperand(2));
|
||||
if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps()) {
|
||||
SDValue V = peekThroughOneUseBitcasts(N->getOperand(0));
|
||||
|
||||
if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.hasOneUse() &&
|
||||
V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) {
|
||||
SDValue In = V.getOperand(1);
|
||||
MVT SubVT =
|
||||
MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
|
||||
In.getValueSizeInBits() / VT.getScalarSizeInBits());
|
||||
In = DAG.getBitcast(SubVT, In);
|
||||
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, SubVT, In);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
|
||||
getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
|
||||
Movl, V.getOperand(2));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -5,12 +5,12 @@ define void @endless_loop() {
|
||||
; CHECK-LABEL: endless_loop:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmovaps (%eax), %xmm0
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%eax)
|
||||
; CHECK-NEXT: vmovaps %ymm1, (%eax)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
@ -687,7 +687,6 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
@ -866,8 +865,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) s
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
@ -875,8 +873,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) s
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-64-NEXT: vzeroupper
|
||||
@ -1201,7 +1198,6 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
@ -1528,8 +1524,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) s
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
@ -1537,8 +1532,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) s
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-64-NEXT: vzeroupper
|
||||
|
Loading…
x
Reference in New Issue
Block a user