[X86] Update SSE/AVX ABS intrinsics to emit llvm.abs.* (PR46851)

We're now getting close to having the necessary analysis/combines etc. for the new generic llvm.abs.* intrinsics. This patch updates the SSE/AVX ABS vector intrinsics to emit the generic equivalents instead of the icmp+sub+select code pattern. Differential Revision: https://reviews.llvm.org/D87101
2024-11-25 12:12:47 +01:00 · 2020-09-07 13:44:35 +01:00 · 2020-09-07 13:44:35 +01:00 · 2a377c9521
commit 2a377c9521
parent beb0cd66c2
2 changed files with 18 additions and 30 deletions
--- a/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
@ -10,13 +10,11 @@ define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) {
 ; CHECK-NEXT:    vpabsb %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
  %arg = bitcast <4 x i64> %a0 to <32 x i8>
-  %sub = sub <32 x i8> zeroinitializer, %arg
-  %cmp = icmp sgt <32 x i8> %arg, zeroinitializer
-  %sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub
-  %res = bitcast <32 x i8> %sel to <4 x i64>
+  %abs = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %arg, i1 false)
+  %res = bitcast <32 x i8> %abs to <4 x i64>
  ret <4 x i64> %res
 }
-declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) nounwind readnone

 define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) {
 ; CHECK-LABEL: test_mm256_abs_epi16:
@ -24,13 +22,11 @@ define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) {
 ; CHECK-NEXT:    vpabsw %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
  %arg = bitcast <4 x i64> %a0 to <16 x i16>
-  %sub = sub <16 x i16> zeroinitializer, %arg
-  %cmp = icmp sgt <16 x i16> %arg, zeroinitializer
-  %sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub
-  %res = bitcast <16 x i16> %sel to <4 x i64>
+  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false)
+  %res = bitcast <16 x i16> %abs to <4 x i64>
  ret <4 x i64> %res
 }
-declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) nounwind readnone

 define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) {
 ; CHECK-LABEL: test_mm256_abs_epi32:
@ -38,13 +34,11 @@ define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) {
 ; CHECK-NEXT:    vpabsd %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
  %arg = bitcast <4 x i64> %a0 to <8 x i32>
-  %sub = sub <8 x i32> zeroinitializer, %arg
-  %cmp = icmp sgt <8 x i32> %arg, zeroinitializer
-  %sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub
-  %res = bitcast <8 x i32> %sel to <4 x i64>
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %arg, i1 false)
+  %res = bitcast <8 x i32> %abs to <4 x i64>
  ret <4 x i64> %res
 }
-declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) nounwind readnone

 define <4 x i64> @test_mm256_add_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
 ; CHECK-LABEL: test_mm256_add_epi8:
--- a/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
@ -19,13 +19,11 @@ define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
 ; AVX-NEXT:    vpabsb %xmm0, %xmm0
 ; AVX-NEXT:    ret{{[l|q]}}
  %arg = bitcast <2 x i64> %a0 to <16 x i8>
-  %sub = sub <16 x i8> zeroinitializer, %arg
-  %cmp = icmp sgt <16 x i8> %arg, zeroinitializer
-  %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub
-  %res = bitcast <16 x i8> %sel to <2 x i64>
+  %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %arg, i1 false)
+  %res = bitcast <16 x i8> %abs to <2 x i64>
  ret <2 x i64> %res
 }
-declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone

 define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
 ; SSE-LABEL: test_mm_abs_epi16:
@ -38,13 +36,11 @@ define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
 ; AVX-NEXT:    vpabsw %xmm0, %xmm0
 ; AVX-NEXT:    ret{{[l|q]}}
  %arg = bitcast <2 x i64> %a0 to <8 x i16>
-  %sub = sub <8 x i16> zeroinitializer, %arg
-  %cmp = icmp sgt <8 x i16> %arg, zeroinitializer
-  %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub
-  %res = bitcast <8 x i16> %sel to <2 x i64>
+  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false)
+  %res = bitcast <8 x i16> %abs to <2 x i64>
  ret <2 x i64> %res
 }
-declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone

 define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
 ; SSE-LABEL: test_mm_abs_epi32:
@ -57,13 +53,11 @@ define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
 ; AVX-NEXT:    vpabsd %xmm0, %xmm0
 ; AVX-NEXT:    ret{{[l|q]}}
  %arg = bitcast <2 x i64> %a0 to <4 x i32>
-  %sub = sub <4 x i32> zeroinitializer, %arg
-  %cmp = icmp sgt <4 x i32> %arg, zeroinitializer
-  %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub
-  %res = bitcast <4 x i32> %sel to <2 x i64>
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
+  %res = bitcast <4 x i32> %abs to <2 x i64>
  ret <2 x i64> %res
 }
-declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone

 define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
 ; SSE-LABEL: test_mm_alignr_epi8: