[RISCV] Initial support of LoopVectorizer for RISC-V Vector.

Define an option -riscv-vector-bits-max to specify the maximum vector bits for vectorizer. Loop vectorizer will use the value to check if it is safe to use the whole vector registers to vectorize the loop. It is not the optimum solution for loop vectorizing for scalable vector. It assumed the whole vector registers will be used to vectorize the code. If it is possible, we should configure vl to do vectorize instead of using whole vector registers. We only consider LMUL = 1 in this patch. This patch just an initial work for loop vectorizer for RISC-V Vector. Differential Revision: https://reviews.llvm.org/D95659
2024-10-18 18:42:46 +02:00 · 2021-01-27 15:00:46 +08:00 · 2021-01-27 15:00:46 +08:00 · cac4ae8ee1
commit cac4ae8ee1
parent 6fd72535e1
8 changed files with 96 additions and 0 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -5102,6 +5102,22 @@ bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
  return true;
 }

+bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
+  if (!VT.isScalableVector())
+    return false;
+
+  EVT ElemVT = VT.getVectorElementType();
+  if (Alignment >= ElemVT.getStoreSize()) {
+    if (Fast)
+      *Fast = true;
+    return true;
+  }
+
+  return false;
+}
+
 #define GET_REGISTER_MATCHER
 #include "RISCVGenAsmMatcher.inc"

--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@ -340,6 +340,13 @@ public:
                                          Value *NewVal, Value *Mask,
                                          AtomicOrdering Ord) const override;

+  /// Returns true if the target allows unaligned memory accesses of the
+  /// specified type.
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *Fast = nullptr) const override;
+
 private:
  void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
@ -394,6 +401,11 @@ private:
  bool useRVVForFixedLengthVectorVT(MVT VT) const;
 };

+namespace RISCV {
+// We use 64 bits as the known part in the scalable vector types.
+static constexpr unsigned RVVBitsPerBlock = 64;
+}; // namespace RISCV
+
 namespace RISCVVIntrinsicsTable {

 struct RISCVVIntrinsicInfo {
--- a/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/lib/Target/RISCV/RISCVSubtarget.cpp
@ -39,6 +39,11 @@ static cl::opt<unsigned> RVVVectorLMULMax(
             "Fractional LMUL values are not supported."),
    cl::init(8), cl::Hidden);

+static cl::opt<unsigned> VectorBitsMax(
+    "riscv-vector-bits-max",
+    cl::desc("Assume RISC-V vector registers are at most this big"),
+    cl::init(0), cl::Hidden);
+
 void RISCVSubtarget::anchor() {}

 RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
@ -62,6 +67,11 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
  return *this;
 }

+unsigned RISCVSubtarget::getMaxVectorSizeInBits() const {
+  assert(HasStdExtV && "Tried to get vector length without V support!");
+  return VectorBitsMax;
+}
+
 RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
                               StringRef TuneCPU, StringRef FS,
                               StringRef ABIName, const TargetMachine &TM)
--- a/lib/Target/RISCV/RISCVSubtarget.h
+++ b/lib/Target/RISCV/RISCVSubtarget.h
@ -134,6 +134,7 @@ public:
    assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
    return UserReservedRegister[i];
  }
+  unsigned getMaxVectorSizeInBits() const;

 protected:
  // GlobalISel related APIs.
--- a/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -120,3 +120,17 @@ bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
    return true;
  }
 }
+
+Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
+  // There is no assumption of the maximum vector length in V specification.
+  // We use the value specified by users as the maximum vector length.
+  // This function will use the assumed maximum vector length to get the
+  // maximum vscale for LoopVectorizer.
+  // If users do not specify the maximum vector length, we have no way to
+  // know whether the LoopVectorizer is safe to do or not.
+  // We only consider to use single vector register (LMUL = 1) to vectorize.
+  unsigned MaxVectorSizeInBits = ST->getMaxVectorSizeInBits();
+  if (ST->hasStdExtV() && MaxVectorSizeInBits != 0)
+    return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
+  return BaseT::getMaxVScale();
+}
--- a/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.h
@ -49,6 +49,8 @@ public:
                          Type *Ty, TTI::TargetCostKind CostKind);

  bool shouldExpandReduction(const IntrinsicInst *II) const;
+  bool supportsScalableVectors() const { return ST->hasStdExtV(); }
+  Optional<unsigned> getMaxVScale() const;
 };

 } // end namespace llvm
--- a/test/Transforms/LoopVectorize/RISCV/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/RISCV/lit.local.cfg
@ -0,0 +1,4 @@
+config.suffixes = ['.ll']
+
+if not 'RISCV' in config.root.targets:
+    config.unsupported = True
--- a/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
+++ b/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@ -0,0 +1,37 @@
+; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \
+; RUN:   -riscv-vector-bits-max=512 -S < %s 2>&1 \
+; RUN:   | FileCheck %s
+
+; void test(int *a, int *b, int N) {
+;   #pragma clang loop vectorize(enable) vectorize_width(2, scalable)
+;   for (int i=0; i<N; ++i) {
+;     a[i + 64] = a[i] + b[i];
+;   }
+; }
+;
+; CHECK: <vscale x 2 x i32>
+define void @test(i32* %a, i32* %b) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %2 = add nuw nsw i64 %iv, 64
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
+
+exit:
+  ret void
+}
+
+!6 = !{!6, !7, !8}
+!7 = !{!"llvm.loop.vectorize.width", i32 2}
+!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}