[PowerPC] Exploit VSX neg, abs and nabs for f32

xsnegdp, xsabsdp and xsnabsdp can be used to operate on f32 operand. This patch adds the missing patterns since we prefer VSX instructions when available. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D75344
2025-01-31 12:41:49 +01:00 · 2020-05-12 14:29:40 +08:00 · 2020-05-12 14:29:40 +08:00 · e1d61b7961
commit e1d61b7961
parent bd45b479b7
5 changed files with 34 additions and 10 deletions
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -2602,6 +2602,16 @@ def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
          (v2f64 (XVMINDP $src1, $src2))>;

+// f32 abs
+def : Pat<(f32 (fabs f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSABSDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 nabs
+def : Pat<(f32 (fneg (fabs f32:$S))),
+          (f32 (COPY_TO_REGCLASS (XSNABSDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
 // f32 Min.
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
          (f32 FpMinMax.F32Min)>;
@ -2999,6 +3009,14 @@ def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
 def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
          (XSNMSUBASP $C, $A, $B)>;

+// f32 neg
+// Although XSNEGDP is available in P7, we want to select it starting from P8,
+// so that FNMSUBS can be selected for fneg-fmsub pattern on P7. (VSX version,
+// XSNMSUBASP, is available since P8)
+def : Pat<(f32 (fneg f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSNEGDP
+               (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
 // Instructions for converting float to i32 feeding a store.
 def : Pat<(PPCstore_scal_int_from_vsr
            (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
--- a/test/CodeGen/PowerPC/float-logic-ops.ll
+++ b/test/CodeGen/PowerPC/float-logic-ops.ll
@ -5,7 +5,7 @@
 define float @absf(float %a) {
 ; CHECK-LABEL: absf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fabs f1, f1
+; CHECK-NEXT:    xsabsdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
  %conv = bitcast float %a to i32
@ -80,7 +80,7 @@ entry:
 define float @negf(float %a) {
 ; CHECK-LABEL: negf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fneg f1, f1
+; CHECK-NEXT:    xsnegdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
  %conv = bitcast float %a to i32
@ -127,7 +127,7 @@ entry:
 define float @nabsf(float %a) {
 ; CHECK-LABEL: nabsf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fnabs f1, f1
+; CHECK-NEXT:    xsnabsdp f1, f1
 ; CHECK-NEXT:    blr
 entry:
  %conv = bitcast float %a to i32
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@ -198,6 +198,9 @@ define float @test_XSNMADDASP(float %A, float %B, float %C) {
 	ret float %F
 ; CHECK-P8-LABEL: test_XSNMADDASP:
 ; CHECK-P8: xsnmaddasp
+
+; CHECK-VSX-LABEL: test_XSNMADDASP:
+; CHECK-VSX: fnmadds
 }

 define float @test_XSNMSUBASP(float %A, float %B, float %C) {
@ -208,4 +211,7 @@ define float @test_XSNMSUBASP(float %A, float %B, float %C) {
 	ret float %F
 ; CHECK-P8-LABEL: test_XSNMSUBASP:
 ; CHECK-P8: xsnmsubasp
+
+; CHECK-VSX-LABEL: test_XSNMSUBASP:
+; CHECK-VSX: fnmsubs
 }
--- a/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/test/CodeGen/PowerPC/fmf-propagation.ll
@ -280,8 +280,8 @@ define float @fmul_fma_fast2(float %x) {
 define float @sqrt_afn_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_afn_ieee:
 ; FMF:       # %bb.0:
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
-; FMF-NEXT:    fabs 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
@ -303,8 +303,8 @@ define float @sqrt_afn_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_afn_ieee:
 ; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
-; GLOBAL-NEXT:    fabs 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
@ -418,8 +418,8 @@ define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
 define float @sqrt_fast_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_fast_ieee:
 ; FMF:       # %bb.0:
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; FMF-NEXT:    fabs 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
@ -440,8 +440,8 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_fast_ieee:
 ; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; GLOBAL-NEXT:    fabs 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
--- a/test/CodeGen/PowerPC/fsub-fneg.ll
+++ b/test/CodeGen/PowerPC/fsub-fneg.ll
@ -8,9 +8,9 @@
 define double @neg_ext_op1_extra_use(float %x, double %y) nounwind {
 ; CHECK-LABEL: neg_ext_op1_extra_use:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xsadddp 0, 2, 1
-; CHECK-NEXT:    fneg 1, 1
-; CHECK-NEXT:    xsdivdp 1, 1, 0
+; CHECK-NEXT:    xsnegdp 0, 1
+; CHECK-NEXT:    xsadddp 1, 2, 1
+; CHECK-NEXT:    xsdivdp 1, 0, 1
 ; CHECK-NEXT:    blr
  %t1 = fsub float -0.0, %x
  %t2 = fpext float %t1 to double