[RISCV] Custom lower fshl/fshr with Zbt extension.

We need to add a mask to the shift amount for these operations to use the FSR/FSL instructions. We were previously doing this in isel patterns, but custom lowering will make the mask visible to optimizations earlier.
2024-11-23 11:13:28 +01:00 · 2021-01-31 17:37:44 -08:00 · 2021-01-31 17:37:44 -08:00 · ebb87abce3
commit ebb87abce3
parent e4e7688ff2
3 changed files with 44 additions and 23 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -251,8 +251,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  }

  if (Subtarget.hasStdExtZbt()) {
-    setOperationAction(ISD::FSHL, XLenVT, Legal);
-    setOperationAction(ISD::FSHR, XLenVT, Legal);
+    setOperationAction(ISD::FSHL, XLenVT, Custom);
+    setOperationAction(ISD::FSHR, XLenVT, Custom);
    setOperationAction(ISD::SELECT, XLenVT, Legal);

    if (Subtarget.is64Bit()) {
@ -729,6 +729,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
    return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
                       DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
  }
+  case ISD::FSHL:
+  case ISD::FSHR: {
+    MVT VT = Op.getSimpleValueType();
+    assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
+    SDLoc DL(Op);
+    // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
+    // use log(XLen) bits. Mask the shift amount accordingly.
+    unsigned ShAmtWidth = Subtarget.getXLen() - 1;
+    SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
+                                DAG.getConstant(ShAmtWidth, DL, VT));
+    unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
+    return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
+  }
  case ISD::TRUNCATE: {
    SDLoc DL(Op);
    EVT VT = Op.getValueType();
@ -2159,6 +2172,20 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
    }
    break;
  }
+  case RISCVISD::FSL:
+  case RISCVISD::FSR: {
+    // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
+    SDValue ShAmt = N->getOperand(2);
+    unsigned BitWidth = ShAmt.getValueSizeInBits();
+    assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
+    APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
+    if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
+      if (N->getOpcode() != ISD::DELETED_NODE)
+        DCI.AddToWorklist(N);
+      return SDValue(N, 0);
+    }
+    break;
+  }
  case RISCVISD::FSLW:
  case RISCVISD::FSRW: {
    // Only the lower 32 bits of Values and lower 6 bits of shift amount are
@ -4103,6 +4130,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(RORW)
  NODE_NAME_CASE(FSLW)
  NODE_NAME_CASE(FSRW)
+  NODE_NAME_CASE(FSL)
+  NODE_NAME_CASE(FSR)
  NODE_NAME_CASE(FMV_H_X)
  NODE_NAME_CASE(FMV_X_ANYEXTH)
  NODE_NAME_CASE(FMV_W_X_RV64)
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@ -53,6 +53,10 @@ enum NodeType : unsigned {
  // instructions.
  ROLW,
  RORW,
+  // RV64IB/RV32IB funnel shifts, with the semantics of the named RISC-V
+  // instructions, but the same operand order as fshl/fshr intrinsics.
+  FSR,
+  FSL,
  // RV64IB funnel shifts, with the semantics of the named RISC-V instructions,
  // but the same operand order as fshl/fshr intrinsics.
  FSRW,
--- a/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/lib/Target/RISCV/RISCVInstrInfoB.td
@ -21,6 +21,8 @@ def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
 def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
 def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>;
 def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>;
+def riscv_fsl  : SDNode<"RISCVISD::FSL",  SDTIntShiftDOp>;
+def riscv_fsr  : SDNode<"RISCVISD::FSR",  SDTIntShiftDOp>;

 def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
  let Name = "UImmLog2XLenHalf";
@ -776,31 +778,17 @@ def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3),
 // instruction use different orders. fshl will return its first operand for
 // shift of zero, fshr will return its second operand. fsl and fsr both return
 // $rs1 so the patterns need to have different operand orders.
-//
-// fshl and fshr only read the lower log2(xlen) bits of the shift amount, but
-// fsl/fsr instructions read log2(xlen)+1 bits. DAG combine may have removed
-// an AND mask on the shift amount that we need to add back to avoid a one in
-// the extra bit.
-// FIXME: If we can prove that the extra bit in the shift amount is zero, we
-// don't need this mask.
-let Predicates = [HasStdExtZbt, IsRV32] in {
-def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
-          (FSL GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
-          (FSR GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-}
-let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
-          (FSL GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
-def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
-          (FSR GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
-}
 let Predicates = [HasStdExtZbt] in {
-def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+def : Pat<(riscv_fsl GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsr GPR:$rs3, GPR:$rs1, GPR:$rs2),
+          (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+
+def : Pat<(riscv_fsr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
          (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>;
 // We can use FSRI for fshl by immediate if we subtract the immediate from
 // XLen and swap the operands.
-def : Pat<(fshl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+def : Pat<(riscv_fsl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
          (FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
 } // Predicates = [HasStdExtZbt]