1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[DAG] reduceBuildVecExtToExtBuildVec - don't combine if it would break a splat.

reduceBuildVecExtToExtBuildVec was breaking a splat(zext(x)) pattern into buildvector(x, 0, x, 0, ..) resulting in much more complex insert+shuffle codegen.

We already go to some lengths to avoid this in SimplifyDemandedVectorElts etc. when we encounter splat buildvectors.

It should be OK to fold all splat(aext(x)) patterns - we might need to tighten this if we find a case where we mustn't introduce a buildvector(x, undef, x, undef, ..) but I can't find one.

Fixes PR46461.
This commit is contained in:
Simon Pilgrim 2020-06-27 11:03:57 +01:00
parent 78d47ed6bf
commit 652fd662fd
3 changed files with 23 additions and 29 deletions

View File

@ -17908,6 +17908,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!ValidTypes)
return SDValue();
// If we already have a splat buildvector, then don't fold it if it means
// introducing zeros.
if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
return SDValue();
bool isLE = DAG.getDataLayout().isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");

View File

@ -10,9 +10,9 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512CD-NEXT: kmovw %k0, %eax
; AVX512CD-NEXT: movzbl %al, %ecx
; AVX512CD-NEXT: vmovd %ecx, %xmm0
; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512CD-NEXT: movzbl %al, %eax
; AVX512CD-NEXT: vmovq %rax, %xmm0
; AVX512CD-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX512CD-NEXT: vzeroupper
; AVX512CD-NEXT: retq
;
@ -157,12 +157,8 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512CD-NEXT: kmovw %k0, %eax
; AVX512CD-NEXT: kmovw %k0, %ecx
; AVX512CD-NEXT: vmovd %eax, %xmm0
; AVX512CD-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
; AVX512CD-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
; AVX512CD-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512CD-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX512CD-NEXT: retq
;
; AVX512VLCDBW-LABEL: test_mm256_epi32:

View File

@ -723,18 +723,16 @@ define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6,
ret <16 x i8> %ins15
}
; TODO - PR46461 - reduceBuildVecExtToExtBuildVec is breaking the splat(zero_extend)
; pattern, resulting in th BUILD_VECTOR lowering to individual insertions into zero vector.
; PR46461 - Don't let reduceBuildVecExtToExtBuildVec break splat(zero_extend) patterns,
; resulting in the BUILD_VECTOR lowering to individual insertions into a zero vector.
define void @PR46461(i16 %x, <16 x i32>* %y) {
; SSE-LABEL: PR46461:
; SSE: # %bb.0:
; SSE-NEXT: shrl %edi
; SSE-NEXT: andl $32767, %edi # imm = 0x7FFF
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pinsrw $2, %edi, %xmm0
; SSE-NEXT: pinsrw $4, %edi, %xmm0
; SSE-NEXT: pinsrw $6, %edi, %xmm0
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: shrl %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: movdqa %xmm0, 48(%rsi)
; SSE-NEXT: movdqa %xmm0, 32(%rsi)
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
@ -743,12 +741,10 @@ define void @PR46461(i16 %x, <16 x i32>* %y) {
;
; AVX1-LABEL: PR46461:
; AVX1: # %bb.0:
; AVX1-NEXT: shrl %edi
; AVX1-NEXT: andl $32767, %edi # imm = 0x7FFF
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
; AVX1-NEXT: vpinsrw $6, %edi, %xmm0, %xmm0
; AVX1-NEXT: movzwl %di, %eax
; AVX1-NEXT: shrl %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps %ymm0, 32(%rsi)
; AVX1-NEXT: vmovaps %ymm0, (%rsi)
@ -757,13 +753,10 @@ define void @PR46461(i16 %x, <16 x i32>* %y) {
;
; AVX2-LABEL: PR46461:
; AVX2: # %bb.0:
; AVX2-NEXT: shrl %edi
; AVX2-NEXT: andl $32767, %edi # imm = 0x7FFF
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
; AVX2-NEXT: vpinsrw $6, %edi, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX2-NEXT: movzwl %di, %eax
; AVX2-NEXT: shrl %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, 32(%rsi)
; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper