mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DAG] reduceBuildVecExtToExtBuildVec - don't combine if it would break a splat.
reduceBuildVecExtToExtBuildVec was breaking a splat(zext(x)) pattern into buildvector(x, 0, x, 0, ..) resulting in much more complex insert+shuffle codegen. We already go to some lengths to avoid this in SimplifyDemandedVectorElts etc. when we encounter splat buildvectors. It should be OK to fold all splat(aext(x)) patterns - we might need to tighten this if we find a case where we mustn't introduce a buildvector(x, undef, x, undef, ..) but I can't find one. Fixes PR46461.
This commit is contained in:
parent
78d47ed6bf
commit
652fd662fd
@ -17908,6 +17908,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
|
||||
if (!ValidTypes)
|
||||
return SDValue();
|
||||
|
||||
// If we already have a splat buildvector, then don't fold it if it means
|
||||
// introducing zeros.
|
||||
if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
|
||||
return SDValue();
|
||||
|
||||
bool isLE = DAG.getDataLayout().isLittleEndian();
|
||||
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
|
||||
assert(ElemRatio > 1 && "Invalid element size ratio");
|
||||
|
@ -10,9 +10,9 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: movzbl %al, %ecx
|
||||
; AVX512CD-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: movzbl %al, %eax
|
||||
; AVX512CD-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512CD-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vzeroupper
|
||||
; AVX512CD-NEXT: retq
|
||||
;
|
||||
@ -157,12 +157,8 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
|
||||
; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: kmovw %k0, %ecx
|
||||
; AVX512CD-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512CD-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX512CD-NEXT: retq
|
||||
;
|
||||
; AVX512VLCDBW-LABEL: test_mm256_epi32:
|
||||
|
@ -723,18 +723,16 @@ define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6,
|
||||
ret <16 x i8> %ins15
|
||||
}
|
||||
|
||||
; TODO - PR46461 - reduceBuildVecExtToExtBuildVec is breaking the splat(zero_extend)
|
||||
; pattern, resulting in th BUILD_VECTOR lowering to individual insertions into zero vector.
|
||||
; PR46461 - Don't let reduceBuildVecExtToExtBuildVec break splat(zero_extend) patterns,
|
||||
; resulting in the BUILD_VECTOR lowering to individual insertions into a zero vector.
|
||||
|
||||
define void @PR46461(i16 %x, <16 x i32>* %y) {
|
||||
; SSE-LABEL: PR46461:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: shrl %edi
|
||||
; SSE-NEXT: andl $32767, %edi # imm = 0x7FFF
|
||||
; SSE-NEXT: movd %edi, %xmm0
|
||||
; SSE-NEXT: pinsrw $2, %edi, %xmm0
|
||||
; SSE-NEXT: pinsrw $4, %edi, %xmm0
|
||||
; SSE-NEXT: pinsrw $6, %edi, %xmm0
|
||||
; SSE-NEXT: movzwl %di, %eax
|
||||
; SSE-NEXT: shrl %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: movdqa %xmm0, 48(%rsi)
|
||||
; SSE-NEXT: movdqa %xmm0, 32(%rsi)
|
||||
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
|
||||
@ -743,12 +741,10 @@ define void @PR46461(i16 %x, <16 x i32>* %y) {
|
||||
;
|
||||
; AVX1-LABEL: PR46461:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: shrl %edi
|
||||
; AVX1-NEXT: andl $32767, %edi # imm = 0x7FFF
|
||||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrw $6, %edi, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movzwl %di, %eax
|
||||
; AVX1-NEXT: shrl %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovaps %ymm0, 32(%rsi)
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsi)
|
||||
@ -757,13 +753,10 @@ define void @PR46461(i16 %x, <16 x i32>* %y) {
|
||||
;
|
||||
; AVX2-LABEL: PR46461:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: shrl %edi
|
||||
; AVX2-NEXT: andl $32767, %edi # imm = 0x7FFF
|
||||
; AVX2-NEXT: vmovd %edi, %xmm0
|
||||
; AVX2-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrw $6, %edi, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: movzwl %di, %eax
|
||||
; AVX2-NEXT: shrl %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa %ymm0, 32(%rsi)
|
||||
; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
|
Loading…
x
Reference in New Issue
Block a user