diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 19ef12b6ae2..42988688111 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3560,6 +3560,10 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { if (isNullConstant(N1)) return N0; + // fold (sub_sat x, y) -> (and x, ~y) for bool types. + if (VT.getScalarType() == MVT::i1) + return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, N1, VT)); + return SDValue(); } diff --git a/test/CodeGen/AArch64/ssub_sat_vec.ll b/test/CodeGen/AArch64/ssub_sat_vec.ll index c5a55f23913..6ee1f3523f1 100644 --- a/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -287,14 +287,9 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-NEXT: sshr v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v2.16b, #1 +; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/test/CodeGen/AArch64/usub_sat_vec.ll b/test/CodeGen/AArch64/usub_sat_vec.ll index a361314126a..a0ab8040e8f 100644 --- a/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/test/CodeGen/AArch64/usub_sat_vec.ll @@ -275,9 +275,8 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b +; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/test/CodeGen/X86/ssub_sat_vec.ll b/test/CodeGen/X86/ssub_sat_vec.ll index 91198d0bf98..484a8bba8fd 100644 --- a/test/CodeGen/X86/ssub_sat_vec.ll +++ b/test/CodeGen/X86/ssub_sat_vec.ll @@ -575,62 +575,31 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; SSE-LABEL: v16i1: ; SSE: # %bb.0: -; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: psllw $7, %xmm0 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: psubsb %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpgtb %xmm0, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps {{.*}}(%rip), %xmm1 +; SSE-NEXT: andps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: v16i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: v16i1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: v16i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: v16i1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k0 -; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k1 -; AVX512BW-NEXT: kandnw %k0, %k1, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %xmm0 +; AVX512BW-NEXT: vpternlogq $96, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512BW-NEXT: retq %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/test/CodeGen/X86/usub_sat_vec.ll b/test/CodeGen/X86/usub_sat_vec.ll index 247f09d0464..263ce19dd9b 100644 --- a/test/CodeGen/X86/usub_sat_vec.ll +++ b/test/CodeGen/X86/usub_sat_vec.ll @@ -501,49 +501,31 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; SSE-LABEL: v16i1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: psubusb %xmm1, %xmm0 +; SSE-NEXT: xorps {{.*}}(%rip), %xmm1 +; SSE-NEXT: andps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: v16i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: v16i1: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: v16i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: v16i1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k0 -; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k1 -; AVX512BW-NEXT: kandnw %k0, %k1, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %xmm0 +; AVX512BW-NEXT: vpternlogq $96, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512BW-NEXT: retq %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z