1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[RISCV] Permit larger RVV stacks and stack offsets

This patch teaches the compiler to generate code to handle larger RVV
stack sizes and stack offsets which resolve an amount larger than 2047
vector registers in size.

The previous behaviour was asserting on such large values as it was only
able to materialize the constant by feeding it to the 12-bit immediate
of an `ADDI` instruction. The compiler can now materialize this amount
into a temporary register before continuing with the computation.

A test case for this scenario is included which also checks that the
temporary register used to materialize the amount doesn't require an
additional spill slot over what we're already reserving for RVV code.

Reviewed By: rogfer01

Differential Revision: https://reviews.llvm.org/D104727
This commit is contained in:
Fraser Cormack 2021-06-22 18:08:52 +01:00
parent 1badfbbb03
commit 98c72058c5
2 changed files with 100 additions and 5 deletions

View File

@ -1473,8 +1473,8 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
assert(isInt<12>(NumOfVReg) &&
"Expect the number of vector registers within 12-bits.");
assert(isInt<32>(NumOfVReg) &&
"Expect the number of vector registers within 32-bits.");
if (isPowerOf2_32(NumOfVReg)) {
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (ShiftAmount == 0)
@ -1502,9 +1502,12 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(VL, RegState::Kill);
} else {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
.addReg(RISCV::X0)
.addImm(NumOfVReg);
if (!isInt<12>(NumOfVReg))
movImm(MBB, II, DL, N, NumOfVReg);
else
BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
.addReg(RISCV::X0)
.addImm(NumOfVReg);
if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),

View File

@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -start-before=prologepilog -o - \
# RUN: -verify-machineinstrs %s | FileCheck %s
--- |
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
define void @spillslot() {
; CHECK-LABEL: spillslot:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -2032
; CHECK-NEXT: .cfi_def_cfa_offset 2032
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: addi s0, sp, 2032
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: addi sp, sp, -272
; CHECK-NEXT: sd a0, 8(sp)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: sd a1, 0(sp)
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: addiw a1, a1, -1024
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ld a1, 0(sp)
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: andi sp, sp, -128
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: addiw a0, a0, -1808
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: ld a0, 8(sp)
; CHECK-NEXT: call spillslot@plt
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: addiw a0, a0, -1792
; CHECK-NEXT: sub sp, s0, a0
; CHECK-NEXT: addi sp, sp, 272
; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 2032
; CHECK-NEXT: ret
ret void
}
...
---
name: spillslot
alignment: 4
tracksRegLiveness: false
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 128
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: default, offset: 0, size: 2048, alignment: 128,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: 0, size: 24576, alignment: 8,
stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
body: |
bb.0:
liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31, $v25
PseudoVSPILL_M1 killed renamable $v25, %stack.1 :: (store unknown-size into %stack.1, align 8)
; This is here just to make all the eligible registers live at this point.
; This way when we replace the frame index %stack.1 with its actual address
; we have to allocate two virtual registers to compute it.
; A later run of the the register scavenger won't find available registers
; either so it will have to spill two to the emergency spill slots
; required for this RVV computation.
PseudoCALL target-flags(riscv-plt) @spillslot, csr_ilp32_lp64, implicit-def $x1, implicit-def $x2, implicit $x1, implicit $x5, implicit $x6, implicit $x7, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x28, implicit $x29, implicit $x30, implicit $x31
PseudoRET
...