mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
654464aac1
When splitting a live interval with subranges, only insert copies for the lanes that are live at the point of the split. This avoids some unnecessary copies and fixes a problem where copying dead lanes was generating MIR that failed verification. The test case for this is test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir. Without this fix, some earlier live range splitting would create %430: %430 [256r,848r:0)[848r,2584r:1) 0@256r 1@848r L0000000000000003 [848r,2584r:0) 0@848r L0000000000000030 [256r,2584r:0) 0@256r weight:1.480938e-03 ... 256B undef %430.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 848B %430.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %430:vreg_128 Then RAGreedy::tryLocalSplit would split %430 into %432 and %433 just before 848B giving: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 %433 [844r,848r:0)[848r,2584r:1) 0@844r 1@848r L0000000000000030 [844r,2584r:0) 0@844r L0000000000000003 [844r,844d:0)[848r,2584r:1) 0@844r 1@848r weight:2.831776e-03 ... 256B undef %432.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 { internal %433.sub2:vreg_128 = COPY %432.sub2:vreg_128 848B } %433.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %433:vreg_128 Note that the copy from %432 to %433 at 844B is a curious bundle-without-a-BUNDLE-instruction that SplitKit creates deliberately, and it includes a copy of .sub0 which is not live at this point, and that causes it to fail verification: *** Bad machine code: No live subrange at use *** - function: zextload_global_v64i16_to_v64i64 - basic block: %bb.0 (0x7faed48) [0B;2848B) - instruction: 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 - operand 1: %432.sub0:vreg_128 - interval: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 - at: 844B Using real bundles with a BUNDLE instruction might also fix this problem, but the current fix is less invasive and also avoids some unnecessary copies. https://bugs.llvm.org/show_bug.cgi?id=47492 Differential Revision: https://reviews.llvm.org/D87757
241 lines
11 KiB
YAML
241 lines
11 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=RA %s
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefix=VR %s
|
|
|
|
---
|
|
name: splitkit_copy_bundle
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
; RA-LABEL: name: splitkit_copy_bundle
|
|
; RA: bb.0:
|
|
; RA: successors: %bb.1(0x80000000)
|
|
; RA: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; RA: undef %5.sub1:sgpr_1024 = S_MOV_B32 -1
|
|
; RA: %5.sub0:sgpr_1024 = S_MOV_B32 -1
|
|
; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %5.sub0_sub1
|
|
; RA: undef %3.sub0:sgpr_1024 = S_MOV_B32 0
|
|
; RA: bb.1:
|
|
; RA: successors: %bb.2(0x80000000)
|
|
; RA: undef %6.sub0_sub1:sgpr_1024 = COPY %4.sub0_sub1
|
|
; RA: %6.sub2:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub3:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub4:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub5:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub6:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub7:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub8:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub9:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub10:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub11:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub12:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub13:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub14:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub15:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub16:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub17:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub18:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub19:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub20:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub21:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub22:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub23:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub24:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub25:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub26:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub27:sgpr_1024 = COPY %6.sub1
|
|
; RA: %6.sub28:sgpr_1024 = COPY %6.sub0
|
|
; RA: %6.sub29:sgpr_1024 = COPY %6.sub1
|
|
; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %6.sub0_sub1
|
|
; RA: %3.sub1:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub2:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub3:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub4:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub5:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub6:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub7:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub8:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub9:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub10:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub11:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub12:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub13:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub14:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub15:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub16:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub17:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub18:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub19:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub20:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub21:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub22:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub23:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub24:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub25:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub26:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub27:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub28:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub29:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub30:sgpr_1024 = COPY %3.sub0
|
|
; RA: %3.sub31:sgpr_1024 = COPY %3.sub0
|
|
; RA: bb.2:
|
|
; RA: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; RA: S_NOP 0, csr_amdgpu_highregs, implicit [[DEF]], implicit [[DEF1]]
|
|
; RA: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
|
|
; RA: S_BRANCH %bb.2
|
|
; VR-LABEL: name: splitkit_copy_bundle
|
|
; VR: bb.0:
|
|
; VR: successors: %bb.1(0x80000000)
|
|
; VR: renamable $sgpr69 = S_MOV_B32 -1
|
|
; VR: renamable $sgpr68 = S_MOV_B32 -1
|
|
; VR: renamable $sgpr36 = S_MOV_B32 0
|
|
; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
|
|
; VR: renamable $sgpr70_sgpr71 = IMPLICIT_DEF
|
|
; VR: bb.1:
|
|
; VR: successors: %bb.2(0x80000000)
|
|
; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
|
|
; VR: renamable $sgpr40_sgpr41 = COPY killed renamable $sgpr68_sgpr69
|
|
; VR: renamable $sgpr42 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr43 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr44 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr45 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr46 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr47 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr48 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr49 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr50 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr51 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr52 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr53 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr54 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr55 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr56 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr57 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr58 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr59 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr60 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr61 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr62 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr63 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr64 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr65 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr66 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr67 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr68 = COPY renamable $sgpr40
|
|
; VR: renamable $sgpr69 = COPY renamable $sgpr41
|
|
; VR: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr40_sgpr41
|
|
; VR: renamable $sgpr37 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr38 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr39 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr40 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr41 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr42 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr43 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr44 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr45 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr46 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr47 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr48 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr49 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr50 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr51 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr52 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr53 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr54 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr55 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr56 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr57 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr58 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr59 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr60 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr61 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr62 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr63 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr64 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr65 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr66 = COPY renamable $sgpr36
|
|
; VR: renamable $sgpr67 = COPY renamable $sgpr36
|
|
; VR: bb.2:
|
|
; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
|
|
; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr70_sgpr71
|
|
; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
|
|
; VR: S_BRANCH %bb.2
|
|
bb.0:
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:sreg_64 = IMPLICIT_DEF
|
|
undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
|
|
%2.sub0:sgpr_1024 = S_MOV_B32 -1
|
|
undef %3.sub0:sgpr_1024 = S_MOV_B32 0
|
|
|
|
bb.1:
|
|
%2.sub2:sgpr_1024 = COPY %2.sub0
|
|
%2.sub3:sgpr_1024 = COPY %2.sub1
|
|
%2.sub4:sgpr_1024 = COPY %2.sub0
|
|
%2.sub5:sgpr_1024 = COPY %2.sub1
|
|
%2.sub6:sgpr_1024 = COPY %2.sub0
|
|
%2.sub7:sgpr_1024 = COPY %2.sub1
|
|
%2.sub8:sgpr_1024 = COPY %2.sub0
|
|
%2.sub9:sgpr_1024 = COPY %2.sub1
|
|
%2.sub10:sgpr_1024 = COPY %2.sub0
|
|
%2.sub11:sgpr_1024 = COPY %2.sub1
|
|
%2.sub12:sgpr_1024 = COPY %2.sub0
|
|
%2.sub13:sgpr_1024 = COPY %2.sub1
|
|
%2.sub14:sgpr_1024 = COPY %2.sub0
|
|
%2.sub15:sgpr_1024 = COPY %2.sub1
|
|
%2.sub16:sgpr_1024 = COPY %2.sub0
|
|
%2.sub17:sgpr_1024 = COPY %2.sub1
|
|
%2.sub18:sgpr_1024 = COPY %2.sub0
|
|
%2.sub19:sgpr_1024 = COPY %2.sub1
|
|
%2.sub20:sgpr_1024 = COPY %2.sub0
|
|
%2.sub21:sgpr_1024 = COPY %2.sub1
|
|
%2.sub22:sgpr_1024 = COPY %2.sub0
|
|
%2.sub23:sgpr_1024 = COPY %2.sub1
|
|
%2.sub24:sgpr_1024 = COPY %2.sub0
|
|
%2.sub25:sgpr_1024 = COPY %2.sub1
|
|
%2.sub26:sgpr_1024 = COPY %2.sub0
|
|
%2.sub27:sgpr_1024 = COPY %2.sub1
|
|
%2.sub28:sgpr_1024 = COPY %2.sub0
|
|
%2.sub29:sgpr_1024 = COPY %2.sub1
|
|
%3.sub1:sgpr_1024 = COPY %3.sub0
|
|
%3.sub2:sgpr_1024 = COPY %3.sub0
|
|
%3.sub3:sgpr_1024 = COPY %3.sub0
|
|
%3.sub4:sgpr_1024 = COPY %3.sub0
|
|
%3.sub5:sgpr_1024 = COPY %3.sub0
|
|
%3.sub6:sgpr_1024 = COPY %3.sub0
|
|
%3.sub7:sgpr_1024 = COPY %3.sub0
|
|
%3.sub8:sgpr_1024 = COPY %3.sub0
|
|
%3.sub9:sgpr_1024 = COPY %3.sub0
|
|
%3.sub10:sgpr_1024 = COPY %3.sub0
|
|
%3.sub11:sgpr_1024 = COPY %3.sub0
|
|
%3.sub12:sgpr_1024 = COPY %3.sub0
|
|
%3.sub13:sgpr_1024 = COPY %3.sub0
|
|
%3.sub14:sgpr_1024 = COPY %3.sub0
|
|
%3.sub15:sgpr_1024 = COPY %3.sub0
|
|
%3.sub16:sgpr_1024 = COPY %3.sub0
|
|
%3.sub17:sgpr_1024 = COPY %3.sub0
|
|
%3.sub18:sgpr_1024 = COPY %3.sub0
|
|
%3.sub19:sgpr_1024 = COPY %3.sub0
|
|
%3.sub20:sgpr_1024 = COPY %3.sub0
|
|
%3.sub21:sgpr_1024 = COPY %3.sub0
|
|
%3.sub22:sgpr_1024 = COPY %3.sub0
|
|
%3.sub23:sgpr_1024 = COPY %3.sub0
|
|
%3.sub24:sgpr_1024 = COPY %3.sub0
|
|
%3.sub25:sgpr_1024 = COPY %3.sub0
|
|
%3.sub26:sgpr_1024 = COPY %3.sub0
|
|
%3.sub27:sgpr_1024 = COPY %3.sub0
|
|
%3.sub28:sgpr_1024 = COPY %3.sub0
|
|
%3.sub29:sgpr_1024 = COPY %3.sub0
|
|
%3.sub30:sgpr_1024 = COPY %3.sub0
|
|
%3.sub31:sgpr_1024 = COPY %3.sub0
|
|
|
|
bb.2:
|
|
S_NOP 0, implicit %0, implicit %1, csr_amdgpu_highregs
|
|
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
|
|
S_BRANCH %bb.2
|
|
|
|
...
|