mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
6ac0148c30
Since the beginning, the offset of a frame index has been consistently interpreted backwards. It was treating it as an offset from the scratch wave offset register as a frame register. The correct interpretation is the offset from the SP on entry to the function, before the prolog. Frame index elimination then should select either SP or another register as an FP. Treat the scratch wave offset on kernel entry as the pre-incremented SP. Rely more heavily on the standard hasFP and frame pointer elimination logic, and clean up the private reservation code. This saves a copy in most callee functions. The kernel prolog emission code is still kind of a mess relying on checking the uses of physical registers, which I would prefer to eliminate. Currently selection directly emits MUBUF instructions, which require using a reference to some register. Use the register chosen for SP, and then ignore this later. This should probably be cleaned up to use pseudos that don't refer to any specific base register until frame index elimination. Add a workaround for shaders using large numbers of SGPRs. I'm not sure these cases were ever working correctly, since as far as I can tell the logic for figuring out which SGPR is the scratch wave offset doesn't match up with the shader input initialization in the shader programming guide. llvm-svn: 362661
73 lines
2.6 KiB
YAML
73 lines
2.6 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
|
|
# https://bugs.llvm.org/show_bug.cgi?id=33620
|
|
|
|
---
|
|
# This would assert due to the empty live interval created for %9
|
|
# on the last S_NOP with an undef subreg use.
|
|
|
|
# CHECK-LABEL: name: expecting_non_empty_interval
|
|
|
|
# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $exec
|
|
# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
|
# CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
# CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $exec
|
|
|
|
# CHECK: S_NOP 0, implicit %6.sub1
|
|
# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
|
# CHECK-NEXT: S_NOP 0, implicit %8.sub1
|
|
# CHECK-NEXT: S_NOP 0, implicit undef %9.sub0
|
|
|
|
name: expecting_non_empty_interval
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
scratchWaveOffsetReg: $sgpr4
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
undef %0.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %0.sub1, implicit $exec
|
|
undef %2.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
dead %3:vgpr_32 = V_MUL_F32_e32 0, %2.sub1, implicit $exec
|
|
|
|
bb.1:
|
|
S_NOP 0, implicit %2.sub1
|
|
S_NOP 0, implicit %0.sub1
|
|
S_NOP 0, implicit undef %0.sub0
|
|
|
|
...
|
|
|
|
# Similar assert which happens when trying to rematerialize.
|
|
# https://bugs.llvm.org/show_bug.cgi?id=33884
|
|
---
|
|
# CHECK-LABEL: name: rematerialize_empty_interval_has_reference
|
|
|
|
# CHECK-NOT: MOV
|
|
# CHECK: undef %1.sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
|
|
# CHECK: bb.1:
|
|
# CHECK-NEXT: S_NOP 0, implicit %1.sub2
|
|
# CHECK-NEXT: S_NOP 0, implicit undef %4.sub0
|
|
# CHECK-NEXT: undef %2.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
# CHECK-NEXT: S_NOP 0, implicit %2.sub2
|
|
name: rematerialize_empty_interval_has_reference
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
scratchWaveOffsetReg: $sgpr4
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
undef %0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
undef %1.sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
|
|
bb.1:
|
|
S_NOP 0, implicit %1.sub2
|
|
S_NOP 0, implicit undef %0.sub0
|
|
S_NOP 0, implicit %0.sub2
|
|
|
|
...
|