1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Move avx512 code that forces zeros to the false side of vselects above a check for legal types.

This helps this transform occur earlier so we can fold the not
with setcc. If we delay it until after type legalization we might
have introduced instructions to widen the mask if the vselect was
widened. This can prevent the not from making it to the setcc.

We could of course add more DAG combines to handle that, but
moving this earlier is easier.
This commit is contained in:
Craig Topper 2020-02-17 21:32:29 -08:00
parent c1b2f58c10
commit 982740ed98
2 changed files with 35 additions and 10 deletions

View File

@ -38161,8 +38161,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
assert(CondVT.isVector() && "Vector select expects a vector selector!");
// Check if the first operand is all zeros and Cond type is vXi1.
// This situation only applies to avx512.
// TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
// TODO: Can we assert that both operands are not zeros (because that should
// get simplified at node creation time)?
@ -38177,14 +38175,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(0, DL, VT);
}
if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
}
// To use the condition operand as a bitwise mask, it must have elements that
// are the same size as the select elements. Ie, the condition operand must
// have already been promoted from the IR select condition type <N x i1>.
@ -38928,6 +38918,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
// Check if the first operand is all zeros and Cond type is vXi1.
// If this an avx512 target we can improve the use of zero masking by
// swapping the operands and inverting the condition.
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
}
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();

View File

@ -1567,3 +1567,25 @@ entry:
%3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
ret <8 x i64> %3
}
define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
; AVX512-LABEL: narrow_cmp_select_reverse:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x25,0xc0,0x0f]
; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1]
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: narrow_cmp_select_reverse:
; SKX: ## %bb.0:
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i64> %x, zeroinitializer
%res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
ret <2 x i32> %res
}