1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[X86][SSE] Add OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1))) -> MOVMSK+CMP reduction combine

llvm-svn: 375463
This commit is contained in:
Simon Pilgrim 2019-10-21 22:36:31 +00:00
parent afb57e058c
commit ed58374dc3
2 changed files with 30 additions and 18 deletions

View File

@ -39527,6 +39527,24 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
DAG.getBitcast(MVT::v4f32, N1)));
}
// Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
SrcOps.size() == 1) {
SDLoc dl(N);
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
if (Mask) {
APInt AllBits = APInt::getNullValue(NumElts);
return DAG.getSetCC(dl, MVT::i1, Mask,
DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
}
}
}
if (DCI.isBeforeLegalizeOps())
return SDValue();

View File

@ -4513,21 +4513,17 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: movmskpd %xmm1, %ecx
; SSE2-NEXT: xorl $3, %ecx
; SSE2-NEXT: movl %ecx, %eax
; SSE2-NEXT: shrb %al
; SSE2-NEXT: orb %cl, %al
; SSE2-NEXT: movmskpd %xmm1, %eax
; SSE2-NEXT: xorb $3, %al
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_or_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovmskpd %xmm0, %ecx
; AVX-NEXT: xorl $3, %ecx
; AVX-NEXT: movl %ecx, %eax
; AVX-NEXT: shrb %al
; AVX-NEXT: orb %cl, %al
; AVX-NEXT: vmovmskpd %xmm0, %eax
; AVX-NEXT: xorb $3, %al
; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_or_v2i64:
@ -4668,19 +4664,17 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
; SSE2-LABEL: movmsk_or_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmplepd %xmm0, %xmm1
; SSE2-NEXT: movmskpd %xmm1, %ecx
; SSE2-NEXT: movl %ecx, %eax
; SSE2-NEXT: shrb %al
; SSE2-NEXT: orb %cl, %al
; SSE2-NEXT: movmskpd %xmm1, %eax
; SSE2-NEXT: testb %al, %al
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_or_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovmskpd %xmm0, %ecx
; AVX-NEXT: movl %ecx, %eax
; AVX-NEXT: shrb %al
; AVX-NEXT: orb %cl, %al
; AVX-NEXT: vmovmskpd %xmm0, %eax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_or_v2f64: