[RISCV] Custom lower ISD::VSCALE.

This patch custom lowers ISD::VSCALE into a csrr vlenb followed by a shift right by 3 followed by a multiply by the scale amount. I've added computeKnownBits support to indicate that the csrr vlenb always produces 3 trailng bits of 0s so the shift right is "exact". This allows the shift and multiply sequence to be nicely optimized into a single shift or removed completely when the scale amount is a power of 2. The non power of 2 case multiplying by 24 is still producing suboptimal code. We could remove the right shift and use a multiply by 3. Hopefully we can improve DAG combine to fix that since it's not unique to this sequence. This replaces D94144. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94249
2025-01-31 20:51:52 +01:00 · 2021-01-13 17:14:45 -08:00 · 2021-01-13 17:14:45 -08:00 · acb413fc74
commit acb413fc74
parent 903410342b
6 changed files with 170 additions and 0 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -32,6 +32,7 @@
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"

@ -350,6 +351,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  if (Subtarget.hasStdExtV()) {
    setBooleanVectorContents(ZeroOrOneBooleanContent);

+    setOperationAction(ISD::VSCALE, XLenVT, Custom);
+
    // RVV intrinsics may have illegal operands.
    // We also need to custom legalize vmv.x.s.
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@ -686,6 +689,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
  }
  case ISD::SPLAT_VECTOR:
    return lowerSPLATVECTOR(Op, DAG);
+  case ISD::VSCALE: {
+    MVT VT = Op.getSimpleValueType();
+    SDLoc DL(Op);
+    SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+    // We define our scalable vector types for lmul=1 to use a 64 bit known
+    // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+    // vscale as VLENB / 8.
+    SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
+                                 DAG.getConstant(3, DL, VT));
+    return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+  }
  }
 }

@ -1867,6 +1881,30 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
  return true;
 }

+void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+                                                        KnownBits &Known,
+                                                        const APInt &DemandedElts,
+                                                        const SelectionDAG &DAG,
+                                                        unsigned Depth) const {
+  unsigned Opc = Op.getOpcode();
+  assert((Opc >= ISD::BUILTIN_OP_END ||
+          Opc == ISD::INTRINSIC_WO_CHAIN ||
+          Opc == ISD::INTRINSIC_W_CHAIN ||
+          Opc == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+
+  Known.resetAll();
+  switch (Opc) {
+  default: break;
+  case RISCVISD::READ_VLENB:
+    // We assume VLENB is at least 8 bytes.
+    // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
+    Known.Zero.setLowBits(3);
+    break;
+  }
+}
+
 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
    unsigned Depth) const {
@ -3540,6 +3578,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(GORCIW)
  NODE_NAME_CASE(VMV_X_S)
  NODE_NAME_CASE(SPLAT_VECTOR_I64)
+  NODE_NAME_CASE(READ_VLENB)
  }
  // clang-format on
  return nullptr;
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@ -85,6 +85,8 @@ enum NodeType : unsigned {
  // Splats an i64 scalar to a vector type (with element type i64) where the
  // scalar is a sign-extended i32.
  SPLAT_VECTOR_I64,
+  // Read VLENB CSR
+  READ_VLENB,
 };
 } // namespace RISCVISD

@ -123,6 +125,11 @@ public:

  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

+  void computeKnownBitsForTargetNode(const SDValue Op,
+                                     KnownBits &Known,
+                                     const APInt &DemandedElts,
+                                     const SelectionDAG &DAG,
+                                     unsigned Depth) const override;
  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
                                           const APInt &DemandedElts,
                                           const SelectionDAG &DAG,
--- a/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@ -17,6 +17,8 @@
 def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
                           SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
                                                SDTCisInt<1>]>>;
+def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
+                              SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;

 // X0 has special meaning for vsetvl/vsetvli.
 //  rd | rs1 |   AVL value | Effect on vl
@ -2397,6 +2399,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
  def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
 }

+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
+  def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
+                               [(set GPR:$rd, (riscv_read_vlenb))]>;
+}
+
 //===----------------------------------------------------------------------===//
 // 6. Configuration-Setting Instructions
 //===----------------------------------------------------------------------===//
--- a/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/lib/Target/RISCV/RISCVMCInstLower.cpp
@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
    if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
      OutMI.addOperand(MCOp);
  }
+
+  if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
+    OutMI.setOpcode(RISCV::CSRRS);
+    OutMI.addOperand(MCOperand::createImm(
+        RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
+    OutMI.addOperand(MCOperand::createReg(RISCV::X0));
+    return;
+  }
+
 }
--- a/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
+++ b/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
+; RUN:    | FileCheck %s
+
+define i32 @vscale_zero() nounwind {
+; CHECK-LABEL: vscale_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mv a0, zero
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 0
+  ret i32 %1
+}
+
+define i32 @vscale_one() nounwind {
+; CHECK-LABEL: vscale_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 1
+  ret i32 %1
+}
+
+define i32 @vscale_uimmpow2xlen() nounwind {
+; CHECK-LABEL: vscale_uimmpow2xlen:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 64
+  ret i32 %1
+}
+
+define i32 @vscale_non_pow2() nounwind {
+; CHECK-LABEL: vscale_non_pow2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a1, zero, 24
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 24
+  ret i32 %1
+}
+
+declare i32 @llvm.vscale.i32()
--- a/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
+++ b/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
+; RUN:    | FileCheck %s
+
+define i64 @vscale_zero() nounwind {
+; CHECK-LABEL: vscale_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mv a0, zero
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 0
+  ret i64 %1
+}
+
+define i64 @vscale_one() nounwind {
+; CHECK-LABEL: vscale_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 1
+  ret i64 %1
+}
+
+define i64 @vscale_uimmpow2xlen() nounwind {
+; CHECK-LABEL: vscale_uimmpow2xlen:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 64
+  ret i64 %1
+}
+
+define i64 @vscale_non_pow2() nounwind {
+; CHECK-LABEL: vscale_non_pow2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a1, zero, 24
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 24
+  ret i64 %1
+}
+
+declare i64 @llvm.vscale.i64()