mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[X86][AVX] computeKnownBitsForTargetNode - add X86ISD::VBROADCAST handling for scalar sources
The target shuffle code handles vector sources, but X86ISD::VBROADCAST can also accept a scalar source for splatting. Suggested by @craig.topper on PR49658
This commit is contained in:
parent
ee17e81726
commit
db3cbc0a8e
@ -34303,6 +34303,14 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
Known = Known.trunc(BitWidth);
|
||||
break;
|
||||
}
|
||||
case X86ISD::VBROADCAST: {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
if (!Src.getSimpleValueType().isVector()) {
|
||||
Known = DAG.computeKnownBits(Src, Depth + 1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::ANDNP: {
|
||||
KnownBits Known2;
|
||||
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
|
||||
|
@ -324,24 +324,17 @@ define <8 x i32> @PR49658(i32* %ptr, i32 %mul) {
|
||||
; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm2
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB7_1: # %loop
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm5
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm2, %ymm4
|
||||
; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4
|
||||
; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm5
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm2, %ymm3
|
||||
; AVX2-NEXT: vpsllq $32, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm5, %ymm3
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm5 = ymm4[2,3],ymm3[2,3]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
|
||||
; AVX2-NEXT: vshufps {{.*#+}} ymm3 = ymm3[1,3],ymm5[1,3],ymm3[5,7],ymm5[5,7]
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm3, %ymm0
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm4 = ymm3[2,3],ymm2[2,3]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm2[1,3],ymm4[1,3],ymm2[5,7],ymm4[5,7]
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: subq $-128, %rax
|
||||
; AVX2-NEXT: jne .LBB7_1
|
||||
; AVX2-NEXT: # %bb.2: # %end
|
||||
@ -353,18 +346,14 @@ define <8 x i32> @PR49658(i32* %ptr, i32 %mul) {
|
||||
; AVX512VL-NEXT: vpbroadcastq %rax, %zmm1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
|
||||
; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2
|
||||
; AVX512VL-NEXT: .p2align 4, 0x90
|
||||
; AVX512VL-NEXT: .LBB7_1: # %loop
|
||||
; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} zmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm4
|
||||
; AVX512VL-NEXT: vpmuludq %zmm3, %zmm2, %zmm3
|
||||
; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpaddq %zmm3, %zmm4, %zmm3
|
||||
; AVX512VL-NEXT: vpsrlq $32, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpmovqd %zmm3, %ymm3
|
||||
; AVX512VL-NEXT: vpaddd %ymm0, %ymm3, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} zmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX512VL-NEXT: vpmuludq %zmm2, %zmm1, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlq $32, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpmovqd %zmm2, %ymm2
|
||||
; AVX512VL-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: subq $-128, %rax
|
||||
; AVX512VL-NEXT: jne .LBB7_1
|
||||
; AVX512VL-NEXT: # %bb.2: # %end
|
||||
@ -380,7 +369,7 @@ define <8 x i32> @PR49658(i32* %ptr, i32 %mul) {
|
||||
; AVX512DQVL-NEXT: .LBB7_1: # %loop
|
||||
; AVX512DQVL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} zmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX512DQVL-NEXT: vpmullq %zmm2, %zmm1, %zmm2
|
||||
; AVX512DQVL-NEXT: vpmuludq %zmm2, %zmm1, %zmm2
|
||||
; AVX512DQVL-NEXT: vpsrlq $32, %zmm2, %zmm2
|
||||
; AVX512DQVL-NEXT: vpmovqd %zmm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
|
Loading…
Reference in New Issue
Block a user