mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[RISCV] Custom lower ISD::VSCALE.
This patch custom lowers ISD::VSCALE into a csrr vlenb followed by a shift right by 3 followed by a multiply by the scale amount. I've added computeKnownBits support to indicate that the csrr vlenb always produces 3 trailng bits of 0s so the shift right is "exact". This allows the shift and multiply sequence to be nicely optimized into a single shift or removed completely when the scale amount is a power of 2. The non power of 2 case multiplying by 24 is still producing suboptimal code. We could remove the right shift and use a multiply by 3. Hopefully we can improve DAG combine to fix that since it's not unique to this sequence. This replaces D94144. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94249
This commit is contained in:
parent
903410342b
commit
acb413fc74
@ -32,6 +32,7 @@
|
||||
#include "llvm/IR/IntrinsicsRISCV.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
@ -350,6 +351,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
if (Subtarget.hasStdExtV()) {
|
||||
setBooleanVectorContents(ZeroOrOneBooleanContent);
|
||||
|
||||
setOperationAction(ISD::VSCALE, XLenVT, Custom);
|
||||
|
||||
// RVV intrinsics may have illegal operands.
|
||||
// We also need to custom legalize vmv.x.s.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
|
||||
@ -686,6 +689,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
||||
}
|
||||
case ISD::SPLAT_VECTOR:
|
||||
return lowerSPLATVECTOR(Op, DAG);
|
||||
case ISD::VSCALE: {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc DL(Op);
|
||||
SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
|
||||
// We define our scalable vector types for lmul=1 to use a 64 bit known
|
||||
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
|
||||
// vscale as VLENB / 8.
|
||||
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
|
||||
DAG.getConstant(3, DL, VT));
|
||||
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1867,6 +1881,30 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
|
||||
return true;
|
||||
}
|
||||
|
||||
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
KnownBits &Known,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const {
|
||||
unsigned Opc = Op.getOpcode();
|
||||
assert((Opc >= ISD::BUILTIN_OP_END ||
|
||||
Opc == ISD::INTRINSIC_WO_CHAIN ||
|
||||
Opc == ISD::INTRINSIC_W_CHAIN ||
|
||||
Opc == ISD::INTRINSIC_VOID) &&
|
||||
"Should use MaskedValueIsZero if you don't know whether Op"
|
||||
" is a target node!");
|
||||
|
||||
Known.resetAll();
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case RISCVISD::READ_VLENB:
|
||||
// We assume VLENB is at least 8 bytes.
|
||||
// FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
|
||||
Known.Zero.setLowBits(3);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
|
||||
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
|
||||
unsigned Depth) const {
|
||||
@ -3540,6 +3578,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(GORCIW)
|
||||
NODE_NAME_CASE(VMV_X_S)
|
||||
NODE_NAME_CASE(SPLAT_VECTOR_I64)
|
||||
NODE_NAME_CASE(READ_VLENB)
|
||||
}
|
||||
// clang-format on
|
||||
return nullptr;
|
||||
|
@ -85,6 +85,8 @@ enum NodeType : unsigned {
|
||||
// Splats an i64 scalar to a vector type (with element type i64) where the
|
||||
// scalar is a sign-extended i32.
|
||||
SPLAT_VECTOR_I64,
|
||||
// Read VLENB CSR
|
||||
READ_VLENB,
|
||||
};
|
||||
} // namespace RISCVISD
|
||||
|
||||
@ -123,6 +125,11 @@ public:
|
||||
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
void computeKnownBitsForTargetNode(const SDValue Op,
|
||||
KnownBits &Known,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const override;
|
||||
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
|
@ -17,6 +17,8 @@
|
||||
def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
|
||||
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
|
||||
SDTCisInt<1>]>>;
|
||||
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
|
||||
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
|
||||
|
||||
// X0 has special meaning for vsetvl/vsetvli.
|
||||
// rd | rs1 | AVL value | Effect on vl
|
||||
@ -2397,6 +2399,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
||||
def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
|
||||
}
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
|
||||
def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
|
||||
[(set GPR:$rd, (riscv_read_vlenb))]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 6. Configuration-Setting Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
||||
if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
|
||||
OutMI.addOperand(MCOp);
|
||||
}
|
||||
|
||||
if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
|
||||
OutMI.setOpcode(RISCV::CSRRS);
|
||||
OutMI.addOperand(MCOperand::createImm(
|
||||
RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
|
||||
OutMI.addOperand(MCOperand::createReg(RISCV::X0));
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
54
test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
Normal file
54
test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
|
||||
; RUN: | FileCheck %s
|
||||
|
||||
define i32 @vscale_zero() nounwind {
|
||||
; CHECK-LABEL: vscale_zero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: mv a0, zero
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i32 @llvm.vscale.i32()
|
||||
%1 = mul i32 %0, 0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @vscale_one() nounwind {
|
||||
; CHECK-LABEL: vscale_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: srli a0, a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i32 @llvm.vscale.i32()
|
||||
%1 = mul i32 %0, 1
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @vscale_uimmpow2xlen() nounwind {
|
||||
; CHECK-LABEL: vscale_uimmpow2xlen:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: slli a0, a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i32 @llvm.vscale.i32()
|
||||
%1 = mul i32 %0, 64
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @vscale_non_pow2() nounwind {
|
||||
; CHECK-LABEL: vscale_non_pow2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: srli a0, a0, 3
|
||||
; CHECK-NEXT: addi a1, zero, 24
|
||||
; CHECK-NEXT: mul a0, a0, a1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i32 @llvm.vscale.i32()
|
||||
%1 = mul i32 %0, 24
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
declare i32 @llvm.vscale.i32()
|
54
test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
Normal file
54
test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
|
||||
; RUN: | FileCheck %s
|
||||
|
||||
define i64 @vscale_zero() nounwind {
|
||||
; CHECK-LABEL: vscale_zero:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: mv a0, zero
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i64 @llvm.vscale.i64()
|
||||
%1 = mul i64 %0, 0
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @vscale_one() nounwind {
|
||||
; CHECK-LABEL: vscale_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: srli a0, a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i64 @llvm.vscale.i64()
|
||||
%1 = mul i64 %0, 1
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @vscale_uimmpow2xlen() nounwind {
|
||||
; CHECK-LABEL: vscale_uimmpow2xlen:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: slli a0, a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i64 @llvm.vscale.i64()
|
||||
%1 = mul i64 %0, 64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @vscale_non_pow2() nounwind {
|
||||
; CHECK-LABEL: vscale_non_pow2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: srli a0, a0, 3
|
||||
; CHECK-NEXT: addi a1, zero, 24
|
||||
; CHECK-NEXT: mul a0, a0, a1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call i64 @llvm.vscale.i64()
|
||||
%1 = mul i64 %0, 24
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
declare i64 @llvm.vscale.i64()
|
Loading…
x
Reference in New Issue
Block a user