mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
59de807f62
This is the groundwork required to implement strictfp. For now, this should be NFC for regular instructoins (many instructions just gain an extra use of a reserved register). Regalloc won't rematerialize instructions with reads of physical registers, but we were suffering from that anyway with the exec reads. Should add it for all the related FP uses (possibly with some extras). I did not add it to either the gpr index mode instructions (or every single VALU instruction) since it's a ridiculous feature already modeled as an arbitrary side effect. Also work towards marking instructions with FP exceptions. This doesn't actually set the bit yet since this would start to change codegen. It seems nofpexcept is currently not implied from the regular IR FP operations. Add it to some MIR tests where I think it might matter.
286 lines
12 KiB
YAML
286 lines
12 KiB
YAML
# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-vgpr-index-mode -run-pass=greedy -stress-regalloc=16 -o - %s | FileCheck -check-prefixes=GCN %s
|
|
|
|
# An interval for a register that was partially defined was split, creating
|
|
# a new use (a COPY) which was reached by the undef point. In particular,
|
|
# there was a subrange of the new register which was reached by an "undef"
|
|
# point. When the code in extendSegmentsToUses verified value numbers between
|
|
# the new and the old live ranges, it did not account for this kind of a
|
|
# situation and asserted expecting the old value to exist. For a PHI node
|
|
# it is legal to have a missing predecessor value as long as the end of
|
|
# the predecessor is jointly dominated by the undefs.
|
|
#
|
|
# A simplified form of this can be illustrated as
|
|
#
|
|
# bb.1:
|
|
# %0:vreg_64 = IMPLICIT_DEF
|
|
# ...
|
|
# S_CBRANCH_SCC1 %bb.2, implicit $vcc
|
|
# S_BRANCH %bb.3
|
|
#
|
|
# bb.2:
|
|
# ; predecessors: %bb.1, %bb.4
|
|
# dead %1:vreg_64 = COPY %0:vreg_64 ; This is the point of the inserted split
|
|
# ...
|
|
# S_BRANCH %bb.5
|
|
#
|
|
# bb.3:
|
|
# ; predecessors: %bb.1
|
|
# undef %0.sub0:vreg_64 = COPY %123:sreg_32 ; undef point for %0.sub1
|
|
# ...
|
|
# S_BRANCH %bb.4
|
|
#
|
|
# bb.4
|
|
# ; predecessors: %bb.4
|
|
# ...
|
|
# S_BRANCH %bb.2
|
|
#
|
|
# This test exposes this scenario which caused previously caused an assert
|
|
|
|
---
|
|
name: _amdgpu_ps_main
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
liveins:
|
|
- { reg: '$vgpr2', virtual-reg: '%0' }
|
|
- { reg: '$vgpr3', virtual-reg: '%1' }
|
|
- { reg: '$vgpr4', virtual-reg: '%2' }
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
liveins: $vgpr2, $vgpr3, $vgpr4
|
|
%2:vgpr_32 = COPY $vgpr4
|
|
%1:vgpr_32 = COPY $vgpr3
|
|
%0:vgpr_32 = COPY $vgpr2
|
|
S_CBRANCH_SCC0 %bb.2, implicit undef $scc
|
|
|
|
bb.1:
|
|
successors: %bb.5(0x80000000)
|
|
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%3.sub1:vreg_128 = COPY %3.sub0
|
|
%3.sub2:vreg_128 = COPY %3.sub0
|
|
S_BRANCH %bb.5
|
|
|
|
bb.2:
|
|
successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
|
S_CBRANCH_SCC0 %bb.4, implicit undef $scc
|
|
|
|
bb.3:
|
|
successors: %bb.5(0x80000000)
|
|
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%3.sub1:vreg_128 = COPY %3.sub0
|
|
S_BRANCH %bb.5
|
|
|
|
bb.4:
|
|
successors: %bb.5(0x80000000)
|
|
%3:vreg_128 = IMPLICIT_DEF
|
|
|
|
bb.5:
|
|
successors: %bb.6(0x40000000), %bb.22(0x40000000)
|
|
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
S_CBRANCH_SCC1 %bb.22, implicit undef $scc
|
|
S_BRANCH %bb.6
|
|
|
|
bb.6:
|
|
successors: %bb.8(0x40000000), %bb.11(0x40000000)
|
|
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
dead %6:vgpr_32 = V_MUL_F32_e32 0, undef %7:vgpr_32, implicit $mode, implicit $exec
|
|
dead %8:vgpr_32 = V_MUL_F32_e32 0, %2, implicit $mode, implicit $exec
|
|
undef %9.sub1:vreg_64 = V_MUL_F32_e32 0, %1, implicit $mode, implicit $exec
|
|
undef %10.sub0:vreg_128 = V_MUL_F32_e32 0, %0, implicit $mode, implicit $exec
|
|
undef %11.sub0:sgpr_256 = S_MOV_B32 0
|
|
%11.sub1:sgpr_256 = COPY %11.sub0
|
|
%11.sub2:sgpr_256 = COPY %11.sub0
|
|
%11.sub3:sgpr_256 = COPY %11.sub0
|
|
%11.sub4:sgpr_256 = COPY %11.sub0
|
|
%11.sub5:sgpr_256 = COPY %11.sub0
|
|
%11.sub6:sgpr_256 = COPY %11.sub0
|
|
%11.sub7:sgpr_256 = COPY %11.sub0
|
|
%12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
|
%14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
%15:vreg_128 = IMPLICIT_DEF
|
|
S_CBRANCH_SCC1 %bb.8, implicit undef $scc
|
|
S_BRANCH %bb.11
|
|
|
|
bb.7:
|
|
successors: %bb.13(0x80000000)
|
|
|
|
; In reality we are checking that this code doesn't assert when splitting
|
|
; and inserting a spill. Here we just check that the point where the error
|
|
; occurs we see a correctly generated spill.
|
|
; GCN-LABEL: bb.7:
|
|
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
%5:vgpr_32 = IMPLICIT_DEF
|
|
S_BRANCH %bb.13
|
|
|
|
bb.8:
|
|
successors: %bb.9(0x40000000), %bb.10(0x40000000)
|
|
S_CBRANCH_SCC0 %bb.10, implicit undef $scc
|
|
|
|
bb.9:
|
|
successors: %bb.12(0x80000000)
|
|
|
|
; GCN-LABEL: bb.9:
|
|
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
S_BRANCH %bb.12
|
|
|
|
bb.10:
|
|
successors: %bb.12(0x80000000)
|
|
|
|
; GCN-LABEL: bb.10:
|
|
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
S_BRANCH %bb.12
|
|
|
|
bb.11:
|
|
successors: %bb.7(0x40000000), %bb.13(0x40000000)
|
|
%16:sreg_64 = V_CMP_NE_U32_e64 0, %14, implicit $exec
|
|
%17:sreg_64 = S_AND_B64 $exec, %16, implicit-def dead $scc
|
|
$vcc = COPY %17
|
|
S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
|
S_BRANCH %bb.13
|
|
|
|
bb.12:
|
|
successors: %bb.11(0x80000000)
|
|
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%5:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
S_BRANCH %bb.11
|
|
|
|
bb.13:
|
|
successors: %bb.15(0x40000000), %bb.14(0x40000000)
|
|
|
|
%18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec
|
|
%19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0, 0 :: (dereferenceable invariant load 16)
|
|
%22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec
|
|
%23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%24:vgpr_32 = COPY %20.sub3
|
|
%25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec
|
|
%26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0, 0 :: (dereferenceable invariant load 16)
|
|
%28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec
|
|
%30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec
|
|
%25:vgpr_32 = V_MAC_F32_e32 0, %18, %25, implicit $mode, implicit $exec
|
|
%31:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel) 0, 0, %12.sub0, 0, %24, 0, 0, implicit $mode, implicit $exec
|
|
%32:vgpr_32 = V_ADD_F32_e32 %25, %31, implicit $mode, implicit $exec
|
|
%33:vgpr_32 = V_MUL_F32_e32 %22, %30, implicit $mode, implicit $exec
|
|
%34:vgpr_32 = V_MUL_F32_e32 %23, %30, implicit $mode, implicit $exec
|
|
%35:vgpr_32 = V_MUL_F32_e32 %32, %30, implicit $mode, implicit $exec
|
|
%36:vgpr_32 = V_MUL_F32_e32 0, %34, implicit $mode, implicit $exec
|
|
%36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $mode, implicit $exec
|
|
%37:vgpr_32 = V_MAD_F32 0, %35, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec
|
|
%39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec
|
|
V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec
|
|
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
|
%40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $mode, implicit $exec
|
|
S_CBRANCH_VCCZ %bb.15, implicit $vcc
|
|
|
|
bb.14:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.15:
|
|
successors: %bb.16(0x40000000), %bb.18(0x40000000)
|
|
%41:vgpr_32 = V_MAD_F32 0, %40, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%42:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %41, 0, implicit $mode, implicit $exec
|
|
%43:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %41, 0, implicit $mode, implicit $exec
|
|
%44:sreg_64 = S_AND_B64 %43, %43, implicit-def dead $scc
|
|
%45:sreg_64 = S_AND_B64 %42, %42, implicit-def dead $scc
|
|
%46:sreg_64 = S_AND_B64 %45, %44, implicit-def dead $scc
|
|
%47:sreg_64 = COPY $exec, implicit-def $exec
|
|
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
|
|
$exec = S_MOV_B64_term %48
|
|
SI_MASK_BRANCH %bb.18, implicit $exec
|
|
S_BRANCH %bb.16
|
|
|
|
bb.16:
|
|
successors: %bb.18(0x80000000)
|
|
S_BRANCH %bb.18
|
|
|
|
bb.17:
|
|
successors: %bb.21(0x40000000), %bb.23(0x40000000)
|
|
%49:sreg_64 = V_CMP_NE_U32_e64 0, %5, implicit $exec
|
|
%50:sreg_64 = S_AND_B64 $exec, %49, implicit-def dead $scc
|
|
%51:vreg_128 = IMPLICIT_DEF
|
|
$vcc = COPY %50
|
|
S_CBRANCH_VCCNZ %bb.21, implicit $vcc
|
|
S_BRANCH %bb.23
|
|
|
|
bb.18:
|
|
successors: %bb.20(0x40000000), %bb.19(0x40000000)
|
|
$exec = S_OR_B64 $exec, %47, implicit-def $scc
|
|
%52:vgpr_32 = V_MAD_F32 0, %3.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %3.sub0, 0, 0, implicit $mode, implicit $exec
|
|
%53:vgpr_32 = V_MUL_F32_e32 -2147483648, %3.sub1, implicit $mode, implicit $exec
|
|
%53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %3.sub2, %53, implicit $mode, implicit $exec
|
|
%54:vgpr_32 = V_MUL_F32_e32 %53, %53, implicit $mode, implicit $exec
|
|
%54:vgpr_32 = V_MAC_F32_e32 %52, %52, %54, implicit $mode, implicit $exec
|
|
%55:vgpr_32 = V_SQRT_F32_e32 %54, implicit $mode, implicit $exec
|
|
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%56:vgpr_32 = V_MOV_B32_e32 981668463, implicit $exec
|
|
%57:sreg_64 = V_CMP_NGT_F32_e64 0, %55, 0, %56, 0, implicit $mode, implicit $exec
|
|
%58:sreg_64 = S_AND_B64 $exec, %57, implicit-def dead $scc
|
|
$vcc = COPY %58
|
|
S_CBRANCH_VCCZ %bb.20, implicit $vcc
|
|
|
|
bb.19:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.20:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.21:
|
|
successors: %bb.23(0x80000000)
|
|
%59:sreg_32 = S_MOV_B32 0
|
|
undef %51.sub0:vreg_128 = COPY %59
|
|
S_BRANCH %bb.23
|
|
|
|
bb.22:
|
|
successors: %bb.24(0x80000000)
|
|
S_BRANCH %bb.24
|
|
|
|
bb.23:
|
|
successors: %bb.22(0x80000000)
|
|
undef %60.sub1:vreg_64 = V_CVT_I32_F32_e32 %1, implicit $mode, implicit $exec
|
|
%60.sub0:vreg_64 = V_CVT_I32_F32_e32 %0, implicit $mode, implicit $exec
|
|
undef %61.sub0:sgpr_256 = S_MOV_B32 0
|
|
%61.sub1:sgpr_256 = COPY %61.sub0
|
|
%61.sub2:sgpr_256 = COPY %61.sub0
|
|
%61.sub3:sgpr_256 = COPY %61.sub0
|
|
%61.sub4:sgpr_256 = COPY %61.sub0
|
|
%61.sub5:sgpr_256 = COPY %61.sub0
|
|
%61.sub6:sgpr_256 = COPY %61.sub0
|
|
%61.sub7:sgpr_256 = COPY %61.sub0
|
|
%62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec
|
|
%63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec
|
|
%63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec
|
|
%64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
|
%64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec
|
|
%65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec
|
|
%66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec
|
|
%67:vgpr_32 = V_MAD_F32 0, %66, 0, %62, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%63:vgpr_32 = V_MAC_F32_e32 %15.sub2, %62, %63, implicit $mode, implicit $exec
|
|
%4:vgpr_32 = V_ADD_F32_e32 %63, %67, implicit $mode, implicit $exec
|
|
S_BRANCH %bb.22
|
|
|
|
bb.24:
|
|
%68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $mode, implicit $exec
|
|
%69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $mode, implicit $exec
|
|
EXP 0, undef %71:vgpr_32, %69, undef %72:vgpr_32, undef %73:vgpr_32, -1, -1, 15, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|