From 998d22c33264f03e80c8c4ccde5fcafe05441f7b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 13 Feb 2021 15:01:48 +0000 Subject: [PATCH] [DAG] Fold i1/vXi1 saddsat/uaddsat(x,y) -> or(x,y) Alive2: https://alive2.llvm.org/ce/z/FzcrpH --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 + test/CodeGen/AArch64/sadd_sat_vec.ll | 9 +-- test/CodeGen/AArch64/uadd_sat_vec.ll | 8 +- test/CodeGen/X86/avx512-mask-op.ll | 94 ++++-------------------- test/CodeGen/X86/sadd_sat_vec.ll | 60 ++------------- test/CodeGen/X86/uadd_sat_vec.ll | 59 ++------------- 6 files changed, 32 insertions(+), 202 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 42988688111..31d78a6fd67 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2540,6 +2540,10 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { if (isNullConstant(N1)) return N0; + // fold (add_sat x, y) -> (or x, y) for bool types. + if (VT.getScalarType() == MVT::i1) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // If it cannot overflow, transform into an add. if (Opcode == ISD::UADDSAT) if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) diff --git a/test/CodeGen/AArch64/sadd_sat_vec.ll b/test/CodeGen/AArch64/sadd_sat_vec.ll index 1ae1ee43bee..2f3cae1aa07 100644 --- a/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -286,14 +286,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-NEXT: sshr v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/test/CodeGen/AArch64/uadd_sat_vec.ll b/test/CodeGen/AArch64/uadd_sat_vec.ll index 5f92f713573..43a32b43b58 100644 --- a/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -285,13 +285,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ushr v0.16b, v0.16b, #7 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 684bebaa85d..9b81809962a 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -5472,48 +5472,17 @@ define i1 @test_v1i1_mul(i1 %x, i1 %y) { } define <1 x i1> @uadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { -; KNL-LABEL: uadd_sat_v1i1: -; KNL: ## %bb.0: -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: ## kill: def $al killed $al killed $eax -; KNL-NEXT: retq -; -; SKX-LABEL: uadd_sat_v1i1: -; SKX: ## %bb.0: -; SKX-NEXT: kmovd %edi, %k0 -; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: ## kill: def $al killed $al killed $eax -; SKX-NEXT: retq -; -; AVX512BW-LABEL: uadd_sat_v1i1: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: kmovd %edi, %k0 -; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: uadd_sat_v1i1: -; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: kmovw %edi, %k0 -; AVX512DQ-NEXT: kmovw %esi, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax -; AVX512DQ-NEXT: retq +; CHECK-LABEL: uadd_sat_v1i1: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: retq ; ; X86-LABEL: uadd_sat_v1i1: ; X86: ## %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X86-NEXT: korw %k1, %k0, %k0 -; X86-NEXT: kmovd %k0, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) @@ -5572,48 +5541,17 @@ define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { declare <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y) define <1 x i1> @sadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { -; KNL-LABEL: sadd_sat_v1i1: -; KNL: ## %bb.0: -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: ## kill: def $al killed $al killed $eax -; KNL-NEXT: retq -; -; SKX-LABEL: sadd_sat_v1i1: -; SKX: ## %bb.0: -; SKX-NEXT: kmovd %edi, %k0 -; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: ## kill: def $al killed $al killed $eax -; SKX-NEXT: retq -; -; AVX512BW-LABEL: sadd_sat_v1i1: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: kmovd %edi, %k0 -; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: sadd_sat_v1i1: -; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: kmovw %edi, %k0 -; AVX512DQ-NEXT: kmovw %esi, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax -; AVX512DQ-NEXT: retq +; CHECK-LABEL: sadd_sat_v1i1: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: retq ; ; X86-LABEL: sadd_sat_v1i1: ; X86: ## %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X86-NEXT: korw %k1, %k0, %k0 -; X86-NEXT: kmovd %k0, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) diff --git a/test/CodeGen/X86/sadd_sat_vec.ll b/test/CodeGen/X86/sadd_sat_vec.ll index d5fdfe9b4ae..9eacc459b3f 100644 --- a/test/CodeGen/X86/sadd_sat_vec.ll +++ b/test/CodeGen/X86/sadd_sat_vec.ll @@ -579,63 +579,13 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; SSE-LABEL: v16i1: ; SSE: # %bb.0: -; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: psllw $7, %xmm0 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: paddsb %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpgtb %xmm0, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: v16i1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: v16i1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v16i1: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v16i1: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovb2m %xmm1, %k0 -; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k1 -; AVX512BW-NEXT: korw %k0, %k1, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %xmm0 -; AVX512BW-NEXT: retq +; AVX-LABEL: v16i1: +; AVX: # %bb.0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z } diff --git a/test/CodeGen/X86/uadd_sat_vec.ll b/test/CodeGen/X86/uadd_sat_vec.ll index 633238f0b1e..5624dfd8084 100644 --- a/test/CodeGen/X86/uadd_sat_vec.ll +++ b/test/CodeGen/X86/uadd_sat_vec.ll @@ -509,62 +509,13 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; SSE-LABEL: v16i1: ; SSE: # %bb.0: -; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: psllw $7, %xmm0 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: paddusb %xmm1, %xmm0 -; SSE-NEXT: psrlw $7, %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: v16i1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: v16i1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v16i1: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v16i1: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovb2m %xmm1, %k0 -; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovb2m %xmm0, %k1 -; AVX512BW-NEXT: korw %k0, %k1, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %xmm0 -; AVX512BW-NEXT: retq +; AVX-LABEL: v16i1: +; AVX: # %bb.0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z }