mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
90cca807df
This matters for byval uses outside of the entry block, which appear as copies. Previously, the only folding done was during selection, which could not see the underlying frame index. For any uses outside the entry block, the frame index was materialized in the entry block relative to the global scratch wave offset. This may produce worse code in cases where the offset ends up not fitting in the MUBUF offset field. A better heuristic would be helpfu for extreme frames. llvm-svn: 364185
135 lines
5.1 KiB
YAML
135 lines
5.1 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
|
|
|
|
---
|
|
name: no_fold_fi_non_stack_rsrc_soffset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
scratchWaveOffsetReg: '$sgpr6'
|
|
frameOffsetReg: '$sgpr6'
|
|
stackPtrOffsetReg: '$sgpr6'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: no_fold_fi_non_stack_rsrc_soffset
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%1:sreg_32_xm0 = S_MOV_B32 0
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: no_fold_fi_non_stack_rsrc
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
scratchWaveOffsetReg: '$sgpr6'
|
|
frameOffsetReg: '$sgpr6'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: no_fold_fi_non_stack_rsrc
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
# Offset is from global scratch wave offset.
|
|
---
|
|
name: fold_fi_mubuf_scratch_scratch_wave_offset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
scratchWaveOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: fold_fi_mubuf_scratch_scratch_wave_offset
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: no_fold_fi_mubuf_scratch_sp_offset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
scratchWaveOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: no_fold_fi_mubuf_scratch_sp_offset
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|