mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86] Move avx512 code that forces zeros to the false side of vselects above a check for legal types.
This helps this transform occur earlier so we can fold the not with setcc. If we delay it until after type legalization we might have introduced instructions to widen the mask if the vselect was widened. This can prevent the not from making it to the setcc. We could of course add more DAG combines to handle that, but moving this earlier is easier.
This commit is contained in:
parent
c1b2f58c10
commit
982740ed98
@ -38161,8 +38161,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
assert(CondVT.isVector() && "Vector select expects a vector selector!");
|
||||
|
||||
// Check if the first operand is all zeros and Cond type is vXi1.
|
||||
// This situation only applies to avx512.
|
||||
// TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
|
||||
// TODO: Can we assert that both operands are not zeros (because that should
|
||||
// get simplified at node creation time)?
|
||||
@ -38177,14 +38175,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getConstant(0, DL, VT);
|
||||
}
|
||||
|
||||
if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
|
||||
Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
|
||||
// Invert the cond to not(cond) : xor(op,allones)=not(op)
|
||||
SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
|
||||
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
|
||||
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
|
||||
}
|
||||
|
||||
// To use the condition operand as a bitwise mask, it must have elements that
|
||||
// are the same size as the select elements. Ie, the condition operand must
|
||||
// have already been promoted from the IR select condition type <N x i1>.
|
||||
@ -38928,6 +38918,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the first operand is all zeros and Cond type is vXi1.
|
||||
// If this an avx512 target we can improve the use of zero masking by
|
||||
// swapping the operands and inverting the condition.
|
||||
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
|
||||
Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
|
||||
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
|
||||
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
|
||||
// Invert the cond to not(cond) : xor(op,allones)=not(op)
|
||||
SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
|
||||
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
|
||||
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
|
||||
}
|
||||
|
||||
// Early exit check
|
||||
if (!TLI.isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
@ -1567,3 +1567,25 @@ entry:
|
||||
%3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
|
||||
define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
|
||||
; AVX512-LABEL: narrow_cmp_select_reverse:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
|
||||
; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
|
||||
; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x25,0xc0,0x0f]
|
||||
; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
|
||||
; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1]
|
||||
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: narrow_cmp_select_reverse:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
|
||||
; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
|
||||
; SKX-NEXT: retq ## encoding: [0xc3]
|
||||
%mask = icmp eq <2 x i64> %x, zeroinitializer
|
||||
%res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user