[RISCV] Permit larger RVV stacks and stack offsets

This patch teaches the compiler to generate code to handle larger RVV stack sizes and stack offsets which resolve an amount larger than 2047 vector registers in size. The previous behaviour was asserting on such large values as it was only able to materialize the constant by feeding it to the 12-bit immediate of an `ADDI` instruction. The compiler can now materialize this amount into a temporary register before continuing with the computation. A test case for this scenario is included which also checks that the temporary register used to materialize the amount doesn't require an additional spill slot over what we're already reserving for RVV code. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D104727
2024-11-22 10:42:39 +01:00 · 2021-06-22 18:08:52 +01:00 · 2021-06-22 18:08:52 +01:00 · 98c72058c5
commit 98c72058c5
parent 1badfbbb03
2 changed files with 100 additions and 5 deletions
--- a/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/lib/Target/RISCV/RISCVInstrInfo.cpp
@ -1473,8 +1473,8 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,

  Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
  BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
-  assert(isInt<12>(NumOfVReg) &&
-         "Expect the number of vector registers within 12-bits.");
+  assert(isInt<32>(NumOfVReg) &&
+         "Expect the number of vector registers within 32-bits.");
  if (isPowerOf2_32(NumOfVReg)) {
    uint32_t ShiftAmount = Log2_32(NumOfVReg);
    if (ShiftAmount == 0)
@ -1502,9 +1502,12 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
        .addReg(VL, RegState::Kill);
  } else {
    Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-    BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
-        .addReg(RISCV::X0)
-        .addImm(NumOfVReg);
+    if (!isInt<12>(NumOfVReg))
+      movImm(MBB, II, DL, N, NumOfVReg);
+    else
+      BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
+          .addReg(RISCV::X0)
+          .addImm(NumOfVReg);
    if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
      MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
          MF.getFunction(),
--- a/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
+++ b/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -start-before=prologepilog -o - \
+# RUN:     -verify-machineinstrs %s | FileCheck %s
+--- |
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+  target triple = "riscv64"
+
+  define void @spillslot() {
+  ; CHECK-LABEL: spillslot:
+  ; CHECK:       # %bb.0:
+  ; CHECK-NEXT:    addi sp, sp, -2032
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 2032
+  ; CHECK-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+  ; CHECK-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+  ; CHECK-NEXT:    .cfi_offset ra, -8
+  ; CHECK-NEXT:    .cfi_offset s0, -16
+  ; CHECK-NEXT:    addi s0, sp, 2032
+  ; CHECK-NEXT:    .cfi_def_cfa s0, 0
+  ; CHECK-NEXT:    addi sp, sp, -272
+  ; CHECK-NEXT:    sd a0, 8(sp)
+  ; CHECK-NEXT:    csrr a0, vlenb
+  ; CHECK-NEXT:    sd a1, 0(sp)
+  ; CHECK-NEXT:    lui a1, 1
+  ; CHECK-NEXT:    addiw a1, a1, -1024
+  ; CHECK-NEXT:    mul a0, a0, a1
+  ; CHECK-NEXT:    ld a1, 0(sp)
+  ; CHECK-NEXT:    sub sp, sp, a0
+  ; CHECK-NEXT:    andi sp, sp, -128
+  ; CHECK-NEXT:    lui a0, 1
+  ; CHECK-NEXT:    addiw a0, a0, -1808
+  ; CHECK-NEXT:    add a0, sp, a0
+  ; CHECK-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
+  ; CHECK-NEXT:    ld a0, 8(sp)
+  ; CHECK-NEXT:    call spillslot@plt
+  ; CHECK-NEXT:    lui a0, 1
+  ; CHECK-NEXT:    addiw a0, a0, -1792
+  ; CHECK-NEXT:    sub sp, s0, a0
+  ; CHECK-NEXT:    addi sp, sp, 272
+  ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+  ; CHECK-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+  ; CHECK-NEXT:    addi sp, sp, 2032
+  ; CHECK-NEXT:    ret
+    ret void
+  }
+
+...
+---
+name:            spillslot
+alignment:       4
+tracksRegLiveness: false
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    128
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 2048, alignment: 128,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: 0, size: 24576, alignment: 8,
+      stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  bb.0:
+    liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31, $v25
+
+    PseudoVSPILL_M1 killed renamable $v25, %stack.1 :: (store unknown-size into %stack.1, align 8)
+    ; This is here just to make all the eligible registers live at this point.
+    ; This way when we replace the frame index %stack.1 with its actual address
+    ; we have to allocate two virtual registers to compute it.
+    ; A later run of the the register scavenger won't find available registers
+    ; either so it will have to spill two to the emergency spill slots
+    ; required for this RVV computation.
+    PseudoCALL target-flags(riscv-plt) @spillslot, csr_ilp32_lp64, implicit-def $x1, implicit-def $x2, implicit $x1, implicit $x5, implicit $x6, implicit $x7, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x28, implicit $x29, implicit $x30, implicit $x31
+    PseudoRET
+...