1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[RISCV] Custom lower ISD::VSCALE.

This patch custom lowers ISD::VSCALE into a csrr vlenb followed
by a shift right by 3 followed by a multiply by the scale amount.

I've added computeKnownBits support to indicate that the csrr vlenb
always produces 3 trailng bits of 0s so the shift right is "exact".
This allows the shift and multiply sequence to be nicely optimized
into a single shift or removed completely when the scale amount is
a power of 2.

The non power of 2 case multiplying by 24 is still producing
suboptimal code. We could remove the right shift and use a
multiply by 3. Hopefully we can improve DAG combine to fix that
since it's not unique to this sequence.

This replaces D94144.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D94249
This commit is contained in:
Craig Topper 2021-01-13 17:14:45 -08:00
parent 903410342b
commit acb413fc74
6 changed files with 170 additions and 0 deletions

View File

@ -32,6 +32,7 @@
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@ -350,6 +351,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtV()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@ -686,6 +689,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::SPLAT_VECTOR:
return lowerSPLATVECTOR(Op, DAG);
case ISD::VSCALE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
}
}
@ -1867,6 +1881,30 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
return true;
}
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
switch (Opc) {
default: break;
case RISCVISD::READ_VLENB:
// We assume VLENB is at least 8 bytes.
// FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
Known.Zero.setLowBits(3);
break;
}
}
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
@ -3540,6 +3578,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(GORCIW)
NODE_NAME_CASE(VMV_X_S)
NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(READ_VLENB)
}
// clang-format on
return nullptr;

View File

@ -85,6 +85,8 @@ enum NodeType : unsigned {
// Splats an i64 scalar to a vector type (with element type i64) where the
// scalar is a sign-extended i32.
SPLAT_VECTOR_I64,
// Read VLENB CSR
READ_VLENB,
};
} // namespace RISCVISD
@ -123,6 +125,11 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,

View File

@ -17,6 +17,8 @@
def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
SDTCisInt<1>]>>;
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
// X0 has special meaning for vsetvl/vsetvli.
// rd | rs1 | AVL value | Effect on vl
@ -2397,6 +2399,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
[(set GPR:$rd, (riscv_read_vlenb))]>;
}
//===----------------------------------------------------------------------===//
// 6. Configuration-Setting Instructions
//===----------------------------------------------------------------------===//

View File

@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
OutMI.addOperand(MCOp);
}
if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
OutMI.setOpcode(RISCV::CSRRS);
OutMI.addOperand(MCOperand::createImm(
RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
OutMI.addOperand(MCOperand::createReg(RISCV::X0));
return;
}
}

View File

@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
; RUN: | FileCheck %s
define i32 @vscale_zero() nounwind {
; CHECK-LABEL: vscale_zero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: ret
entry:
%0 = call i32 @llvm.vscale.i32()
%1 = mul i32 %0, 0
ret i32 %1
}
define i32 @vscale_one() nounwind {
; CHECK-LABEL: vscale_one:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: ret
entry:
%0 = call i32 @llvm.vscale.i32()
%1 = mul i32 %0, 1
ret i32 %1
}
define i32 @vscale_uimmpow2xlen() nounwind {
; CHECK-LABEL: vscale_uimmpow2xlen:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: ret
entry:
%0 = call i32 @llvm.vscale.i32()
%1 = mul i32 %0, 64
ret i32 %1
}
define i32 @vscale_non_pow2() nounwind {
; CHECK-LABEL: vscale_non_pow2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a1, zero, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
entry:
%0 = call i32 @llvm.vscale.i32()
%1 = mul i32 %0, 24
ret i32 %1
}
declare i32 @llvm.vscale.i32()

View File

@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
; RUN: | FileCheck %s
define i64 @vscale_zero() nounwind {
; CHECK-LABEL: vscale_zero:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.vscale.i64()
%1 = mul i64 %0, 0
ret i64 %1
}
define i64 @vscale_one() nounwind {
; CHECK-LABEL: vscale_one:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.vscale.i64()
%1 = mul i64 %0, 1
ret i64 %1
}
define i64 @vscale_uimmpow2xlen() nounwind {
; CHECK-LABEL: vscale_uimmpow2xlen:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.vscale.i64()
%1 = mul i64 %0, 64
ret i64 %1
}
define i64 @vscale_non_pow2() nounwind {
; CHECK-LABEL: vscale_non_pow2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a1, zero, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.vscale.i64()
%1 = mul i64 %0, 24
ret i64 %1
}
declare i64 @llvm.vscale.i64()