[RISCV] Add patterns for scalable-vector fabs & fcopysign

The patterns mostly follow the scalar counterparts, save for some extra optimizations to match the vector/scalar forms. The patch adds a DAGCombine for ISD::FCOPYSIGN to try and reorder ISD::FNEG around any ISD::FP_EXTEND or ISD::FP_TRUNC of the second operand. This helps us achieve better codegen to match vfsgnjn. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D96028
2025-01-31 12:41:49 +01:00 · 2021-02-04 12:07:59 +00:00 · 2021-02-04 12:07:59 +00:00 · 7d1ce513fb
commit 7d1ce513fb
parent cc791cffbb
4 changed files with 1699 additions and 1 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -497,6 +497,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

      setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
      setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+      setOperationAction(ISD::FCOPYSIGN, VT, Legal);
    };

    if (Subtarget.hasStdExtZfh())
@ -604,6 +605,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  if (Subtarget.hasStdExtZbp()) {
    setTargetDAGCombine(ISD::OR);
  }
+  if (Subtarget.hasStdExtV())
+    setTargetDAGCombine(ISD::FCOPYSIGN);
 }

 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
@ -2966,6 +2969,30 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
    }
    break;
  }
+  case ISD::FCOPYSIGN: {
+    EVT VT = N->getValueType(0);
+    if (!VT.isVector())
+      break;
+    // There is a form of VFSGNJ which injects the negated sign of its second
+    // operand. Try and bubble any FNEG up after the extend/round to produce
+    // this optimized pattern. Avoid modifying cases where FP_ROUND and
+    // TRUNC=1.
+    SDValue In2 = N->getOperand(1);
+    // Avoid cases where the extend/round has multiple uses, as duplicating
+    // those is typically more expensive than removing a fneg.
+    if (!In2.hasOneUse())
+      break;
+    if (In2.getOpcode() != ISD::FP_EXTEND &&
+        (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
+      break;
+    In2 = In2.getOperand(0);
+    if (In2.getOpcode() != ISD::FNEG)
+      break;
+    SDLoc DL(N);
+    SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
+    return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
+                       DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
+  }
  }

  return SDValue();
--- a/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@ -642,11 +642,32 @@ foreach vti = AllFloatVectors in {
            (!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX)
                 vti.RegClass:$rs2, vti.AVL, vti.SEW)>;

-  // 14.10. Vector Floating-Point Sign-Injection Instructions
+  // 14.12. Vector Floating-Point Sign-Injection Instructions
+  def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+            (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
+                 vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
  // Handle fneg with VFSGNJN using the same input for both operands.
  def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
            (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
                 vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
+
+  def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                   (vti.Vector vti.RegClass:$rs2))),
+            (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
+                 vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
+  def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                   (vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))),
+            (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+                 vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
+
+  def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                   (vti.Vector (fneg vti.RegClass:$rs2)))),
+            (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+                 vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
+  def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                   (vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))),
+            (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+                 vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
 }

 // 14.11. Vector Floating-Point Compare Instructions
--- a/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
+; CHECK-LABEL: vfabs_nxv1f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
+  ret <vscale x 1 x half> %r
+}
+
+declare <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
+; CHECK-LABEL: vfabs_nxv2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
+  ret <vscale x 2 x half> %r
+}
+
+declare <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
+; CHECK-LABEL: vfabs_nxv4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
+  ret <vscale x 4 x half> %r
+}
+
+declare <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
+; CHECK-LABEL: vfabs_nxv8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
+  ret <vscale x 8 x half> %r
+}
+
+declare <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
+; CHECK-LABEL: vfabs_nxv16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
+  ret <vscale x 16 x half> %r
+}
+
+declare <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half>)
+
+define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
+; CHECK-LABEL: vfabs_nxv32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
+  ret <vscale x 32 x half> %r
+}
+
+declare <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float>)
+
+define <vscale x 1 x float> @vfabs_nxv1f32(<vscale x 1 x float> %v) {
+; CHECK-LABEL: vfabs_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> %v)
+  ret <vscale x 1 x float> %r
+}
+
+declare <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float>)
+
+define <vscale x 2 x float> @vfabs_nxv2f32(<vscale x 2 x float> %v) {
+; CHECK-LABEL: vfabs_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %v)
+  ret <vscale x 2 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
+
+define <vscale x 4 x float> @vfabs_nxv4f32(<vscale x 4 x float> %v) {
+; CHECK-LABEL: vfabs_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %v)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float>)
+
+define <vscale x 8 x float> @vfabs_nxv8f32(<vscale x 8 x float> %v) {
+; CHECK-LABEL: vfabs_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> %v)
+  ret <vscale x 8 x float> %r
+}
+
+declare <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float>)
+
+define <vscale x 16 x float> @vfabs_nxv16f32(<vscale x 16 x float> %v) {
+; CHECK-LABEL: vfabs_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> %v)
+  ret <vscale x 16 x float> %r
+}
+
+declare <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double>)
+
+define <vscale x 1 x double> @vfabs_nxv1f64(<vscale x 1 x double> %v) {
+; CHECK-LABEL: vfabs_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> %v)
+  ret <vscale x 1 x double> %r
+}
+
+declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
+
+define <vscale x 2 x double> @vfabs_nxv2f64(<vscale x 2 x double> %v) {
+; CHECK-LABEL: vfabs_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %v)
+  ret <vscale x 2 x double> %r
+}
+
+declare <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double>)
+
+define <vscale x 4 x double> @vfabs_nxv4f64(<vscale x 4 x double> %v) {
+; CHECK-LABEL: vfabs_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %v)
+  ret <vscale x 4 x double> %r
+}
+
+declare <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double>)
+
+define <vscale x 8 x double> @vfabs_nxv8f64(<vscale x 8 x double> %v) {
+; CHECK-LABEL: vfabs_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT:    vfsgnjx.vv v8, v8, v8
+; CHECK-NEXT:    ret
+  %r = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> %v)
+  ret <vscale x 8 x double> %r
+}
--- a/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll