diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d60a0c7e51b..fee2b19794d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -38161,8 +38161,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, assert(CondVT.isVector() && "Vector select expects a vector selector!"); - // Check if the first operand is all zeros and Cond type is vXi1. - // This situation only applies to avx512. // TODO: Use isNullOrNullSplat() to distinguish constants with undefs? // TODO: Can we assert that both operands are not zeros (because that should // get simplified at node creation time)? @@ -38177,14 +38175,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, return DAG.getConstant(0, DL, VT); } - if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() && - Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { - // Invert the cond to not(cond) : xor(op,allones)=not(op) - SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); - // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 - return DAG.getSelect(DL, VT, CondNew, RHS, LHS); - } - // To use the condition operand as a bitwise mask, it must have elements that // are the same size as the select elements. Ie, the condition operand must // have already been promoted from the IR select condition type . @@ -38928,6 +38918,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } } + // Check if the first operand is all zeros and Cond type is vXi1. + // If this an avx512 target we can improve the use of zero masking by + // swapping the operands and inverting the condition. + if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && + Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && + ISD::isBuildVectorAllZeros(LHS.getNode()) && + !ISD::isBuildVectorAllZeros(RHS.getNode())) { + // Invert the cond to not(cond) : xor(op,allones)=not(op) + SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); + // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 + return DAG.getSelect(DL, VT, CondNew, RHS, LHS); + } + // Early exit check if (!TLI.isTypeLegal(VT)) return SDValue(); diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 843e37e6c45..8b9a7a03d50 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1567,3 +1567,25 @@ entry: %3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z ret <8 x i64> %3 } + +define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind { +; AVX512-LABEL: narrow_cmp_select_reverse: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2] +; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2] +; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x25,0xc0,0x0f] +; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8] +; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3] +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1] +; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; SKX-LABEL: narrow_cmp_select_reverse: +; SKX: ## %bb.0: +; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] +; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1] +; SKX-NEXT: retq ## encoding: [0xc3] + %mask = icmp eq <2 x i64> %x, zeroinitializer + %res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y + ret <2 x i32> %res +}