mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
59de807f62
This is the groundwork required to implement strictfp. For now, this should be NFC for regular instructoins (many instructions just gain an extra use of a reserved register). Regalloc won't rematerialize instructions with reads of physical registers, but we were suffering from that anyway with the exec reads. Should add it for all the related FP uses (possibly with some extras). I did not add it to either the gpr index mode instructions (or every single VALU instruction) since it's a ridiculous feature already modeled as an arbitrary side effect. Also work towards marking instructions with FP exceptions. This doesn't actually set the bit yet since this would start to change codegen. It seems nofpexcept is currently not implied from the regular IR FP operations. Add it to some MIR tests where I think it might matter.
118 lines
5.8 KiB
YAML
118 lines
5.8 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck -check-prefix GCN %s
|
|
#
|
|
---
|
|
name: _amdgpu_ps_main
|
|
alignment: 1
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sgpr_128 }
|
|
- { id: 1, class: sreg_32_xm0, preferred-register: '%2' }
|
|
- { id: 2, class: sreg_32_xm0, preferred-register: '%1' }
|
|
machineFunctionInfo:
|
|
argumentInfo:
|
|
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
privateSegmentWaveByteOffset: { reg: '$sgpr33' }
|
|
body: |
|
|
; GCN-LABEL: name: _amdgpu_ps_main
|
|
; GCN: bb.0:
|
|
; GCN: successors: %bb.1(0x80000000)
|
|
; GCN: %3:vgpr_32 = nofpexcept V_TRUNC_F32_e32 undef %4:vgpr_32, implicit $mode, implicit $exec
|
|
; GCN: %5:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
|
|
; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 4, %5, implicit $exec
|
|
; GCN: undef %11.sub0:vreg_128 = V_MUL_LO_I32 [[V_LSHRREV_B32_e32_]], 3, implicit $exec
|
|
; GCN: %11.sub3:vreg_128 = COPY %11.sub0
|
|
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
|
|
; GCN: bb.1:
|
|
; GCN: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
|
; GCN: [[COPY:%[0-9]+]]:vreg_128 = COPY %11
|
|
; GCN: %11.sub3:vreg_128 = V_ADD_U32_e32 target-flags(amdgpu-rel32-lo) 1, [[COPY]].sub3, implicit $exec
|
|
; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[S_ADD_I32_]], 1, implicit-def dead $scc
|
|
; GCN: S_CMP_LT_U32 [[S_ADD_I32_]], 3, implicit-def $scc
|
|
; GCN: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
; GCN: S_BRANCH %bb.2
|
|
; GCN: bb.2:
|
|
; GCN: successors: %bb.5(0x40000000), %bb.3(0x40000000)
|
|
; GCN: S_CBRANCH_SCC1 %bb.5, implicit undef $scc
|
|
; GCN: S_BRANCH %bb.3
|
|
; GCN: bb.3:
|
|
; GCN: successors: %bb.4(0x80000000)
|
|
; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
|
; GCN: dead %18:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
|
|
; GCN: dead %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN: bb.4:
|
|
; GCN: successors: %bb.4(0x7c000000), %bb.6(0x04000000)
|
|
; GCN: $vcc = COPY [[S_AND_B64_]]
|
|
; GCN: S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc
|
|
; GCN: S_BRANCH %bb.6
|
|
; GCN: bb.5:
|
|
; GCN: %21:vgpr_32 = nofpexcept V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, %11.sub0, implicit $mode, implicit $exec
|
|
; GCN: %22:vgpr_32 = nofpexcept V_MIN_F32_e32 1106771968, %21, implicit $mode, implicit $exec
|
|
; GCN: %23:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, %22, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN: %24:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, %23, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN: %25:vgpr_32 = nofpexcept V_MAD_F32 0, %24, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN: %26:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, %25, 0, undef %27:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
; GCN: EXP_DONE 0, %26, undef %28:vgpr_32, undef %29:vgpr_32, undef %30:vgpr_32, -1, -1, 15, implicit $exec
|
|
; GCN: S_ENDPGM 0
|
|
; GCN: bb.6:
|
|
; GCN: S_ENDPGM 0
|
|
bb.0:
|
|
%10:vgpr_32 = nofpexcept V_TRUNC_F32_e32 undef %11:vgpr_32, implicit $mode, implicit $exec
|
|
%12:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %10, implicit $mode, implicit $exec
|
|
%50:vgpr_32 = V_LSHRREV_B32_e32 4, killed %12, implicit $exec
|
|
%51:vgpr_32 = V_MUL_LO_I32 killed %50, 3, implicit $exec
|
|
undef %52.sub0:vreg_128 = COPY %51
|
|
%52.sub3:vreg_128 = COPY %51
|
|
%9:sreg_32_xm0 = S_MOV_B32 0
|
|
%70:sreg_32_xm0 = COPY killed %9
|
|
%71:vreg_128 = COPY killed %52
|
|
|
|
bb.1:
|
|
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
|
|
|
%53:vreg_128 = COPY killed %71
|
|
%1:sreg_32_xm0 = COPY killed %70
|
|
%57:vgpr_32 = V_ADD_U32_e32 target-flags(amdgpu-rel32-lo) 1, %53.sub3, implicit $exec
|
|
%55:vreg_128 = COPY %53
|
|
%55.sub3:vreg_128 = COPY killed %57
|
|
%2:sreg_32_xm0 = S_ADD_I32 killed %1, 1, implicit-def dead $scc
|
|
S_CMP_LT_U32 %2, 3, implicit-def $scc
|
|
%54:vreg_128 = COPY %55
|
|
%70:sreg_32_xm0 = COPY killed %2
|
|
%71:vreg_128 = COPY killed %54
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_CBRANCH_SCC1 %bb.5, implicit undef $scc
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
|
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
|
|
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
bb.4:
|
|
successors: %bb.4(0x7c000000), %bb.6(0x04000000)
|
|
|
|
$vcc = COPY %36
|
|
S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc
|
|
S_BRANCH %bb.6
|
|
|
|
bb.5:
|
|
%39:vgpr_32 = nofpexcept V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, killed %55.sub0, implicit $mode, implicit $exec
|
|
%41:vgpr_32 = nofpexcept V_MIN_F32_e32 1106771968, killed %39, implicit $mode, implicit $exec
|
|
%42:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %41, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%43:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %42, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%44:vgpr_32 = nofpexcept V_MAD_F32 0, killed %43, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%45:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed %44, 0, undef %46:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
EXP_DONE 0, killed %45, undef %47:vgpr_32, undef %48:vgpr_32, undef %49:vgpr_32, -1, -1, 15, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
bb.6:
|
|
S_ENDPGM 0
|
|
|
|
...
|