mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] combineScalarToVector - handle (v2i64 scalar_to_vector(aextload)) as well as (v2i64 scalar_to_vector(aext))
We already fold (v2i64 scalar_to_vector(aext)) -> (v2i64 bitcast(v4i32 scalar_to_vector(x))), this adds support for similar aextload cases and also handles v2f64 cases that wrap the i64 extension behind bitcasts. Fixes the remaining issue with PR39016
This commit is contained in:
parent
56cd330ee7
commit
eefd5fecb7
@ -48038,13 +48038,25 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
|
||||
Src.getOperand(1));
|
||||
|
||||
// Reduce v2i64 to v4i32 if we don't need the upper bits.
|
||||
// TODO: Move to DAGCombine?
|
||||
if (VT == MVT::v2i64 && Src.getOpcode() == ISD::ANY_EXTEND &&
|
||||
Src.getValueType() == MVT::i64 && Src.hasOneUse() &&
|
||||
Src.getOperand(0).getScalarValueSizeInBits() <= 32)
|
||||
// TODO: Move to DAGCombine/SimplifyDemandedBits?
|
||||
if (VT == MVT::v2i64 || VT == MVT::v2f64) {
|
||||
auto IsAnyExt64 = [](SDValue Op) {
|
||||
if (Op.getValueType() != MVT::i64 || !Op.hasOneUse())
|
||||
return SDValue();
|
||||
if (Op.getOpcode() == ISD::ANY_EXTEND &&
|
||||
Op.getOperand(0).getScalarValueSizeInBits() <= 32)
|
||||
return Op.getOperand(0);
|
||||
if (auto *Ld = dyn_cast<LoadSDNode>(Op))
|
||||
if (Ld->getExtensionType() == ISD::EXTLOAD &&
|
||||
Ld->getMemoryVT().getScalarSizeInBits() <= 32)
|
||||
return Op;
|
||||
return SDValue();
|
||||
};
|
||||
if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src)))
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
|
||||
DAG.getAnyExtOrTrunc(Src.getOperand(0), DL, MVT::i32)));
|
||||
DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32)));
|
||||
}
|
||||
|
||||
// Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.
|
||||
if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST &&
|
||||
|
@ -204,9 +204,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
|
||||
;
|
||||
; AVX1-LABEL: ext_i4_4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
@ -432,9 +431,8 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
|
||||
;
|
||||
; AVX1-LABEL: ext_i8_8i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
|
@ -260,9 +260,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
|
||||
;
|
||||
; AVX1-LABEL: ext_i4_4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
@ -555,9 +554,8 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
|
||||
;
|
||||
; AVX1-LABEL: ext_i8_8i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -18,8 +18,6 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n
|
||||
; X64-NEXT: addl %edx, %esi
|
||||
; X64-NEXT: movslq %esi, %rax
|
||||
; X64-NEXT: movl (%rdi,%rax), %eax
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2]
|
||||
|
@ -3570,7 +3570,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(i16* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movzwl (%rdi), %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
@ -3578,7 +3578,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(i16* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
@ -3591,7 +3591,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(i16* %ptr) {
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX1-NEXT: vmovq %rax, %xmm0
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: retq
|
||||
|
@ -7683,7 +7683,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16_i64(i16* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
@ -7697,7 +7697,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16_i64(i16* %ptr) {
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX1-NEXT: vmovq %rax, %xmm0
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
|
Loading…
x
Reference in New Issue
Block a user