mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[RISCV] Permit larger RVV stacks and stack offsets
This patch teaches the compiler to generate code to handle larger RVV stack sizes and stack offsets which resolve an amount larger than 2047 vector registers in size. The previous behaviour was asserting on such large values as it was only able to materialize the constant by feeding it to the 12-bit immediate of an `ADDI` instruction. The compiler can now materialize this amount into a temporary register before continuing with the computation. A test case for this scenario is included which also checks that the temporary register used to materialize the amount doesn't require an additional spill slot over what we're already reserving for RVV code. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D104727
This commit is contained in:
parent
1badfbbb03
commit
98c72058c5
@ -1473,8 +1473,8 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
|
||||
|
||||
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
|
||||
assert(isInt<12>(NumOfVReg) &&
|
||||
"Expect the number of vector registers within 12-bits.");
|
||||
assert(isInt<32>(NumOfVReg) &&
|
||||
"Expect the number of vector registers within 32-bits.");
|
||||
if (isPowerOf2_32(NumOfVReg)) {
|
||||
uint32_t ShiftAmount = Log2_32(NumOfVReg);
|
||||
if (ShiftAmount == 0)
|
||||
@ -1502,9 +1502,12 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
|
||||
.addReg(VL, RegState::Kill);
|
||||
} else {
|
||||
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
|
||||
.addReg(RISCV::X0)
|
||||
.addImm(NumOfVReg);
|
||||
if (!isInt<12>(NumOfVReg))
|
||||
movImm(MBB, II, DL, N, NumOfVReg);
|
||||
else
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
|
||||
.addReg(RISCV::X0)
|
||||
.addImm(NumOfVReg);
|
||||
if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
|
||||
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
|
||||
MF.getFunction(),
|
||||
|
92
test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
Normal file
92
test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
Normal file
@ -0,0 +1,92 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -start-before=prologepilog -o - \
|
||||
# RUN: -verify-machineinstrs %s | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||||
target triple = "riscv64"
|
||||
|
||||
define void @spillslot() {
|
||||
; CHECK-LABEL: spillslot:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi sp, sp, -2032
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 2032
|
||||
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_offset ra, -8
|
||||
; CHECK-NEXT: .cfi_offset s0, -16
|
||||
; CHECK-NEXT: addi s0, sp, 2032
|
||||
; CHECK-NEXT: .cfi_def_cfa s0, 0
|
||||
; CHECK-NEXT: addi sp, sp, -272
|
||||
; CHECK-NEXT: sd a0, 8(sp)
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: sd a1, 0(sp)
|
||||
; CHECK-NEXT: lui a1, 1
|
||||
; CHECK-NEXT: addiw a1, a1, -1024
|
||||
; CHECK-NEXT: mul a0, a0, a1
|
||||
; CHECK-NEXT: ld a1, 0(sp)
|
||||
; CHECK-NEXT: sub sp, sp, a0
|
||||
; CHECK-NEXT: andi sp, sp, -128
|
||||
; CHECK-NEXT: lui a0, 1
|
||||
; CHECK-NEXT: addiw a0, a0, -1808
|
||||
; CHECK-NEXT: add a0, sp, a0
|
||||
; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
|
||||
; CHECK-NEXT: ld a0, 8(sp)
|
||||
; CHECK-NEXT: call spillslot@plt
|
||||
; CHECK-NEXT: lui a0, 1
|
||||
; CHECK-NEXT: addiw a0, a0, -1792
|
||||
; CHECK-NEXT: sub sp, s0, a0
|
||||
; CHECK-NEXT: addi sp, sp, 272
|
||||
; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi sp, sp, 2032
|
||||
; CHECK-NEXT: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: spillslot
|
||||
alignment: 4
|
||||
tracksRegLiveness: false
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 128
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
hasTailCall: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: default, offset: 0, size: 2048, alignment: 128,
|
||||
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: 0, size: 24576, alignment: 8,
|
||||
stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31, $v25
|
||||
|
||||
PseudoVSPILL_M1 killed renamable $v25, %stack.1 :: (store unknown-size into %stack.1, align 8)
|
||||
; This is here just to make all the eligible registers live at this point.
|
||||
; This way when we replace the frame index %stack.1 with its actual address
|
||||
; we have to allocate two virtual registers to compute it.
|
||||
; A later run of the the register scavenger won't find available registers
|
||||
; either so it will have to spill two to the emergency spill slots
|
||||
; required for this RVV computation.
|
||||
PseudoCALL target-flags(riscv-plt) @spillslot, csr_ilp32_lp64, implicit-def $x1, implicit-def $x2, implicit $x1, implicit $x5, implicit $x6, implicit $x7, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x28, implicit $x29, implicit $x30, implicit $x31
|
||||
PseudoRET
|
||||
...
|
Loading…
Reference in New Issue
Block a user