mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86][SSE] LowerVectorAllZeroTest - add support for >256-bit vectors
Reduce by splitting the vector until we reach the target size for PTEST/MOVMSK_PCMPEQ. There might be some cases where AVX512 can perform this with 512-bit vectors but so far I haven't encountered any such pattern that reaches LowerVectorAllZeroTest. Prep work for D81547
This commit is contained in:
parent
35f84c1504
commit
085e5b1dfa
@ -21366,9 +21366,9 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
|
||||
}) &&
|
||||
"Reduction source vector mismatch");
|
||||
|
||||
// Quit if not 128/256-bit vector.
|
||||
// Quit if less than 128-bits or not splittable to 128/256-bit vector.
|
||||
EVT VT = VecIns[0].getValueType();
|
||||
if (!VT.is128BitVector() && !VT.is256BitVector())
|
||||
if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits()))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Op);
|
||||
@ -21382,18 +21382,28 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
|
||||
VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS));
|
||||
}
|
||||
|
||||
SDValue V = VecIns.back();
|
||||
|
||||
// Split down to 128/256-bit vector.
|
||||
unsigned TestSize = Subtarget.hasAVX()? 256 : 128;
|
||||
while (VT.getSizeInBits() > TestSize) {
|
||||
auto Split = DAG.SplitVector(V, DL);
|
||||
VT = Split.first.getValueType();
|
||||
V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
|
||||
}
|
||||
|
||||
X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
|
||||
DL, MVT::i8);
|
||||
|
||||
bool UsePTEST = Subtarget.hasSSE41();
|
||||
if (UsePTEST) {
|
||||
MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
|
||||
SDValue V = DAG.getBitcast(TestVT, VecIns.back());
|
||||
V = DAG.getBitcast(TestVT, V);
|
||||
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V);
|
||||
}
|
||||
|
||||
SDValue Result = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8,
|
||||
DAG.getBitcast(MVT::v16i8, VecIns.back()),
|
||||
DAG.getBitcast(MVT::v16i8, V),
|
||||
getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
|
||||
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
|
||||
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
|
||||
|
@ -148,24 +148,9 @@ define i32 @veccond512(<16 x i32> %input) {
|
||||
;
|
||||
; AVX512-LABEL: veccond512:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
||||
; AVX512-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512-NEXT: vmovq %xmm2, %rcx
|
||||
; AVX512-NEXT: orq %rax, %rcx
|
||||
; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
||||
; AVX512-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512-NEXT: orq %rcx, %rax
|
||||
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512-NEXT: orq %rax, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rdx
|
||||
; AVX512-NEXT: orq %rax, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512-NEXT: orq %rdx, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rdx
|
||||
; AVX512-NEXT: orq %rax, %rdx
|
||||
; AVX512-NEXT: orq %rcx, %rdx
|
||||
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vptest %ymm0, %ymm0
|
||||
; AVX512-NEXT: je .LBB2_2
|
||||
; AVX512-NEXT: # %bb.1: # %if-true-block
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
@ -283,25 +268,10 @@ define i32 @vectest512(<16 x i32> %input) {
|
||||
;
|
||||
; AVX512-LABEL: vectest512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
||||
; AVX512-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512-NEXT: vmovq %xmm2, %rcx
|
||||
; AVX512-NEXT: orq %rax, %rcx
|
||||
; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
||||
; AVX512-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512-NEXT: orq %rcx, %rax
|
||||
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512-NEXT: orq %rax, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rdx
|
||||
; AVX512-NEXT: orq %rax, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512-NEXT: orq %rdx, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rdx
|
||||
; AVX512-NEXT: orq %rax, %rdx
|
||||
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: orq %rcx, %rdx
|
||||
; AVX512-NEXT: vptest %ymm0, %ymm0
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -410,24 +380,9 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
|
||||
; AVX512-LABEL: vecsel512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movl %edi, %eax
|
||||
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
||||
; AVX512-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512-NEXT: orq %rcx, %rdx
|
||||
; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
||||
; AVX512-NEXT: vmovq %xmm3, %rcx
|
||||
; AVX512-NEXT: orq %rdx, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm0, %rdx
|
||||
; AVX512-NEXT: orq %rcx, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rdi
|
||||
; AVX512-NEXT: orq %rcx, %rdi
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; AVX512-NEXT: orq %rdi, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rdi
|
||||
; AVX512-NEXT: orq %rcx, %rdi
|
||||
; AVX512-NEXT: orq %rdx, %rdi
|
||||
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vptest %ymm0, %ymm0
|
||||
; AVX512-NEXT: cmovel %esi, %eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user