1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00
llvm-mirror/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
Jay Foad 654464aac1 [SplitKit] Only copy live lanes
When splitting a live interval with subranges, only insert copies for
the lanes that are live at the point of the split. This avoids some
unnecessary copies and fixes a problem where copying dead lanes was
generating MIR that failed verification. The test case for this is
test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir.

Without this fix, some earlier live range splitting would create %430:

%430 [256r,848r:0)[848r,2584r:1)  0@256r 1@848r L0000000000000003 [848r,2584r:0)  0@848r L0000000000000030 [256r,2584r:0)  0@256r weight:1.480938e-03
...
256B     undef %430.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec
...
848B     %430.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec
...
2584B    %431:vreg_128 = COPY %430:vreg_128

Then RAGreedy::tryLocalSplit would split %430 into %432 and %433 just
before 848B giving:

%432 [256r,844r:0)  0@256r L0000000000000030 [256r,844r:0)  0@256r weight:3.066802e-03
%433 [844r,848r:0)[848r,2584r:1)  0@844r 1@848r L0000000000000030 [844r,2584r:0)  0@844r L0000000000000003 [844r,844d:0)[848r,2584r:1)  0@844r 1@848r weight:2.831776e-03
...
256B     undef %432.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec
...
844B     undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 {
           internal %433.sub2:vreg_128 = COPY %432.sub2:vreg_128
848B     }
  %433.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec
...
2584B    %431:vreg_128 = COPY %433:vreg_128

Note that the copy from %432 to %433 at 844B is a curious
bundle-without-a-BUNDLE-instruction that SplitKit creates deliberately,
and it includes a copy of .sub0 which is not live at this point, and
that causes it to fail verification:

*** Bad machine code: No live subrange at use ***
- function:    zextload_global_v64i16_to_v64i64
- basic block: %bb.0  (0x7faed48) [0B;2848B)
- instruction: 844B    undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128
- operand 1:   %432.sub0:vreg_128
- interval:    %432 [256r,844r:0)  0@256r L0000000000000030 [256r,844r:0)  0@256r weight:3.066802e-03
- at:          844B

Using real bundles with a BUNDLE instruction might also fix this
problem, but the current fix is less invasive and also avoids some
unnecessary copies.

https://bugs.llvm.org/show_bug.cgi?id=47492

Differential Revision: https://reviews.llvm.org/D87757
2020-09-17 09:26:11 +01:00

241 lines
11 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=RA %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefix=VR %s
---
name: splitkit_copy_bundle
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; RA-LABEL: name: splitkit_copy_bundle
; RA: bb.0:
; RA: successors: %bb.1(0x80000000)
; RA: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA: undef %5.sub1:sgpr_1024 = S_MOV_B32 -1
; RA: %5.sub0:sgpr_1024 = S_MOV_B32 -1
; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %5.sub0_sub1
; RA: undef %3.sub0:sgpr_1024 = S_MOV_B32 0
; RA: bb.1:
; RA: successors: %bb.2(0x80000000)
; RA: undef %6.sub0_sub1:sgpr_1024 = COPY %4.sub0_sub1
; RA: %6.sub2:sgpr_1024 = COPY %6.sub0
; RA: %6.sub3:sgpr_1024 = COPY %6.sub1
; RA: %6.sub4:sgpr_1024 = COPY %6.sub0
; RA: %6.sub5:sgpr_1024 = COPY %6.sub1
; RA: %6.sub6:sgpr_1024 = COPY %6.sub0
; RA: %6.sub7:sgpr_1024 = COPY %6.sub1
; RA: %6.sub8:sgpr_1024 = COPY %6.sub0
; RA: %6.sub9:sgpr_1024 = COPY %6.sub1
; RA: %6.sub10:sgpr_1024 = COPY %6.sub0
; RA: %6.sub11:sgpr_1024 = COPY %6.sub1
; RA: %6.sub12:sgpr_1024 = COPY %6.sub0
; RA: %6.sub13:sgpr_1024 = COPY %6.sub1
; RA: %6.sub14:sgpr_1024 = COPY %6.sub0
; RA: %6.sub15:sgpr_1024 = COPY %6.sub1
; RA: %6.sub16:sgpr_1024 = COPY %6.sub0
; RA: %6.sub17:sgpr_1024 = COPY %6.sub1
; RA: %6.sub18:sgpr_1024 = COPY %6.sub0
; RA: %6.sub19:sgpr_1024 = COPY %6.sub1
; RA: %6.sub20:sgpr_1024 = COPY %6.sub0
; RA: %6.sub21:sgpr_1024 = COPY %6.sub1
; RA: %6.sub22:sgpr_1024 = COPY %6.sub0
; RA: %6.sub23:sgpr_1024 = COPY %6.sub1
; RA: %6.sub24:sgpr_1024 = COPY %6.sub0
; RA: %6.sub25:sgpr_1024 = COPY %6.sub1
; RA: %6.sub26:sgpr_1024 = COPY %6.sub0
; RA: %6.sub27:sgpr_1024 = COPY %6.sub1
; RA: %6.sub28:sgpr_1024 = COPY %6.sub0
; RA: %6.sub29:sgpr_1024 = COPY %6.sub1
; RA: undef %4.sub0_sub1:sgpr_1024 = COPY %6.sub0_sub1
; RA: %3.sub1:sgpr_1024 = COPY %3.sub0
; RA: %3.sub2:sgpr_1024 = COPY %3.sub0
; RA: %3.sub3:sgpr_1024 = COPY %3.sub0
; RA: %3.sub4:sgpr_1024 = COPY %3.sub0
; RA: %3.sub5:sgpr_1024 = COPY %3.sub0
; RA: %3.sub6:sgpr_1024 = COPY %3.sub0
; RA: %3.sub7:sgpr_1024 = COPY %3.sub0
; RA: %3.sub8:sgpr_1024 = COPY %3.sub0
; RA: %3.sub9:sgpr_1024 = COPY %3.sub0
; RA: %3.sub10:sgpr_1024 = COPY %3.sub0
; RA: %3.sub11:sgpr_1024 = COPY %3.sub0
; RA: %3.sub12:sgpr_1024 = COPY %3.sub0
; RA: %3.sub13:sgpr_1024 = COPY %3.sub0
; RA: %3.sub14:sgpr_1024 = COPY %3.sub0
; RA: %3.sub15:sgpr_1024 = COPY %3.sub0
; RA: %3.sub16:sgpr_1024 = COPY %3.sub0
; RA: %3.sub17:sgpr_1024 = COPY %3.sub0
; RA: %3.sub18:sgpr_1024 = COPY %3.sub0
; RA: %3.sub19:sgpr_1024 = COPY %3.sub0
; RA: %3.sub20:sgpr_1024 = COPY %3.sub0
; RA: %3.sub21:sgpr_1024 = COPY %3.sub0
; RA: %3.sub22:sgpr_1024 = COPY %3.sub0
; RA: %3.sub23:sgpr_1024 = COPY %3.sub0
; RA: %3.sub24:sgpr_1024 = COPY %3.sub0
; RA: %3.sub25:sgpr_1024 = COPY %3.sub0
; RA: %3.sub26:sgpr_1024 = COPY %3.sub0
; RA: %3.sub27:sgpr_1024 = COPY %3.sub0
; RA: %3.sub28:sgpr_1024 = COPY %3.sub0
; RA: %3.sub29:sgpr_1024 = COPY %3.sub0
; RA: %3.sub30:sgpr_1024 = COPY %3.sub0
; RA: %3.sub31:sgpr_1024 = COPY %3.sub0
; RA: bb.2:
; RA: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; RA: S_NOP 0, csr_amdgpu_highregs, implicit [[DEF]], implicit [[DEF1]]
; RA: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
; RA: S_BRANCH %bb.2
; VR-LABEL: name: splitkit_copy_bundle
; VR: bb.0:
; VR: successors: %bb.1(0x80000000)
; VR: renamable $sgpr69 = S_MOV_B32 -1
; VR: renamable $sgpr68 = S_MOV_B32 -1
; VR: renamable $sgpr36 = S_MOV_B32 0
; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
; VR: renamable $sgpr70_sgpr71 = IMPLICIT_DEF
; VR: bb.1:
; VR: successors: %bb.2(0x80000000)
; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
; VR: renamable $sgpr40_sgpr41 = COPY killed renamable $sgpr68_sgpr69
; VR: renamable $sgpr42 = COPY renamable $sgpr40
; VR: renamable $sgpr43 = COPY renamable $sgpr41
; VR: renamable $sgpr44 = COPY renamable $sgpr40
; VR: renamable $sgpr45 = COPY renamable $sgpr41
; VR: renamable $sgpr46 = COPY renamable $sgpr40
; VR: renamable $sgpr47 = COPY renamable $sgpr41
; VR: renamable $sgpr48 = COPY renamable $sgpr40
; VR: renamable $sgpr49 = COPY renamable $sgpr41
; VR: renamable $sgpr50 = COPY renamable $sgpr40
; VR: renamable $sgpr51 = COPY renamable $sgpr41
; VR: renamable $sgpr52 = COPY renamable $sgpr40
; VR: renamable $sgpr53 = COPY renamable $sgpr41
; VR: renamable $sgpr54 = COPY renamable $sgpr40
; VR: renamable $sgpr55 = COPY renamable $sgpr41
; VR: renamable $sgpr56 = COPY renamable $sgpr40
; VR: renamable $sgpr57 = COPY renamable $sgpr41
; VR: renamable $sgpr58 = COPY renamable $sgpr40
; VR: renamable $sgpr59 = COPY renamable $sgpr41
; VR: renamable $sgpr60 = COPY renamable $sgpr40
; VR: renamable $sgpr61 = COPY renamable $sgpr41
; VR: renamable $sgpr62 = COPY renamable $sgpr40
; VR: renamable $sgpr63 = COPY renamable $sgpr41
; VR: renamable $sgpr64 = COPY renamable $sgpr40
; VR: renamable $sgpr65 = COPY renamable $sgpr41
; VR: renamable $sgpr66 = COPY renamable $sgpr40
; VR: renamable $sgpr67 = COPY renamable $sgpr41
; VR: renamable $sgpr68 = COPY renamable $sgpr40
; VR: renamable $sgpr69 = COPY renamable $sgpr41
; VR: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr40_sgpr41
; VR: renamable $sgpr37 = COPY renamable $sgpr36
; VR: renamable $sgpr38 = COPY renamable $sgpr36
; VR: renamable $sgpr39 = COPY renamable $sgpr36
; VR: renamable $sgpr40 = COPY renamable $sgpr36
; VR: renamable $sgpr41 = COPY renamable $sgpr36
; VR: renamable $sgpr42 = COPY renamable $sgpr36
; VR: renamable $sgpr43 = COPY renamable $sgpr36
; VR: renamable $sgpr44 = COPY renamable $sgpr36
; VR: renamable $sgpr45 = COPY renamable $sgpr36
; VR: renamable $sgpr46 = COPY renamable $sgpr36
; VR: renamable $sgpr47 = COPY renamable $sgpr36
; VR: renamable $sgpr48 = COPY renamable $sgpr36
; VR: renamable $sgpr49 = COPY renamable $sgpr36
; VR: renamable $sgpr50 = COPY renamable $sgpr36
; VR: renamable $sgpr51 = COPY renamable $sgpr36
; VR: renamable $sgpr52 = COPY renamable $sgpr36
; VR: renamable $sgpr53 = COPY renamable $sgpr36
; VR: renamable $sgpr54 = COPY renamable $sgpr36
; VR: renamable $sgpr55 = COPY renamable $sgpr36
; VR: renamable $sgpr56 = COPY renamable $sgpr36
; VR: renamable $sgpr57 = COPY renamable $sgpr36
; VR: renamable $sgpr58 = COPY renamable $sgpr36
; VR: renamable $sgpr59 = COPY renamable $sgpr36
; VR: renamable $sgpr60 = COPY renamable $sgpr36
; VR: renamable $sgpr61 = COPY renamable $sgpr36
; VR: renamable $sgpr62 = COPY renamable $sgpr36
; VR: renamable $sgpr63 = COPY renamable $sgpr36
; VR: renamable $sgpr64 = COPY renamable $sgpr36
; VR: renamable $sgpr65 = COPY renamable $sgpr36
; VR: renamable $sgpr66 = COPY renamable $sgpr36
; VR: renamable $sgpr67 = COPY renamable $sgpr36
; VR: bb.2:
; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr70_sgpr71
; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
; VR: S_BRANCH %bb.2
bb.0:
%0:sreg_64 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
%2.sub0:sgpr_1024 = S_MOV_B32 -1
undef %3.sub0:sgpr_1024 = S_MOV_B32 0
bb.1:
%2.sub2:sgpr_1024 = COPY %2.sub0
%2.sub3:sgpr_1024 = COPY %2.sub1
%2.sub4:sgpr_1024 = COPY %2.sub0
%2.sub5:sgpr_1024 = COPY %2.sub1
%2.sub6:sgpr_1024 = COPY %2.sub0
%2.sub7:sgpr_1024 = COPY %2.sub1
%2.sub8:sgpr_1024 = COPY %2.sub0
%2.sub9:sgpr_1024 = COPY %2.sub1
%2.sub10:sgpr_1024 = COPY %2.sub0
%2.sub11:sgpr_1024 = COPY %2.sub1
%2.sub12:sgpr_1024 = COPY %2.sub0
%2.sub13:sgpr_1024 = COPY %2.sub1
%2.sub14:sgpr_1024 = COPY %2.sub0
%2.sub15:sgpr_1024 = COPY %2.sub1
%2.sub16:sgpr_1024 = COPY %2.sub0
%2.sub17:sgpr_1024 = COPY %2.sub1
%2.sub18:sgpr_1024 = COPY %2.sub0
%2.sub19:sgpr_1024 = COPY %2.sub1
%2.sub20:sgpr_1024 = COPY %2.sub0
%2.sub21:sgpr_1024 = COPY %2.sub1
%2.sub22:sgpr_1024 = COPY %2.sub0
%2.sub23:sgpr_1024 = COPY %2.sub1
%2.sub24:sgpr_1024 = COPY %2.sub0
%2.sub25:sgpr_1024 = COPY %2.sub1
%2.sub26:sgpr_1024 = COPY %2.sub0
%2.sub27:sgpr_1024 = COPY %2.sub1
%2.sub28:sgpr_1024 = COPY %2.sub0
%2.sub29:sgpr_1024 = COPY %2.sub1
%3.sub1:sgpr_1024 = COPY %3.sub0
%3.sub2:sgpr_1024 = COPY %3.sub0
%3.sub3:sgpr_1024 = COPY %3.sub0
%3.sub4:sgpr_1024 = COPY %3.sub0
%3.sub5:sgpr_1024 = COPY %3.sub0
%3.sub6:sgpr_1024 = COPY %3.sub0
%3.sub7:sgpr_1024 = COPY %3.sub0
%3.sub8:sgpr_1024 = COPY %3.sub0
%3.sub9:sgpr_1024 = COPY %3.sub0
%3.sub10:sgpr_1024 = COPY %3.sub0
%3.sub11:sgpr_1024 = COPY %3.sub0
%3.sub12:sgpr_1024 = COPY %3.sub0
%3.sub13:sgpr_1024 = COPY %3.sub0
%3.sub14:sgpr_1024 = COPY %3.sub0
%3.sub15:sgpr_1024 = COPY %3.sub0
%3.sub16:sgpr_1024 = COPY %3.sub0
%3.sub17:sgpr_1024 = COPY %3.sub0
%3.sub18:sgpr_1024 = COPY %3.sub0
%3.sub19:sgpr_1024 = COPY %3.sub0
%3.sub20:sgpr_1024 = COPY %3.sub0
%3.sub21:sgpr_1024 = COPY %3.sub0
%3.sub22:sgpr_1024 = COPY %3.sub0
%3.sub23:sgpr_1024 = COPY %3.sub0
%3.sub24:sgpr_1024 = COPY %3.sub0
%3.sub25:sgpr_1024 = COPY %3.sub0
%3.sub26:sgpr_1024 = COPY %3.sub0
%3.sub27:sgpr_1024 = COPY %3.sub0
%3.sub28:sgpr_1024 = COPY %3.sub0
%3.sub29:sgpr_1024 = COPY %3.sub0
%3.sub30:sgpr_1024 = COPY %3.sub0
%3.sub31:sgpr_1024 = COPY %3.sub0
bb.2:
S_NOP 0, implicit %0, implicit %1, csr_amdgpu_highregs
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
...