1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/AMDGPU/memory_clause.mir
Matt Arsenault f1ba4465de AMDGPU: Use kill instruction to hint soft clause live ranges
Previously we would use a bundle to hint the register allocator to not
overwrite the pointers in a sequence of loads to avoid breaking soft
clauses. This bundling was based on a fuzzy register pressure
heuristic, so we could not guarantee using more registers than are
really available. This would result in register allocator failing on
unsatisfiable bundles. Use a kill to artificially extend the live
ranges, so we can always succeed at register allocation even if it
means extra spills in the worst case.

This seems to capture most of the benefit of the bundle while avoiding
most of the risk presented by the bundle. However the lit tests do
show a handful of regressions. In some cases with sequences of
volatile loads, unused load components end up getting reallocated to
the next load which forces a wait between. There are also a few small
scheduling regressions where a hazard used to be avoided, and one
spill torture test which for some reason nearly doubles the stack
usage. There is also a bit of noise from leftover kills (it may make
sense for post-RA pseudos to strip all of these out).
2021-02-26 18:26:40 -05:00

549 lines
24 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: {{^}}name: vector_clause{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: %5:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, 0, 0, 0, implicit $exec
---
name: vector_clause
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
%5:vreg_64 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %2, 16, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %3, 32, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %4, 48, 0, 0, 0, 0, implicit $exec
...
# This would be a valid soft clause, but there's no need for a KILL
# since the pointer uses are live beyond the end the clause.
# GCN-LABEL: {{^}}name: vector_clause_no_kill{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
---
name: vector_clause_no_kill
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_full{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
---
name: subreg_full
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
%1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_part{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
---
name: subreg_part
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: dead{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
---
name: dead
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_dead{{$}}
# GCN: undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, 0, 0, 0, implicit $exec
---
name: subreg_dead
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: kill{{$}}
# GCN: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: kill
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vreg_64 = IMPLICIT_DEF
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %2, %3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %2, %4, 16, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: indirect{{$}}
# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: indirect
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: stack{{$}}
# GCN: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
---
name: stack
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
stack:
- { id: 0, type: default, offset: 0, size: 64, alignment: 8 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: overflow_counter{{$}}
# GCN: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: overflow_counter
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
%3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, 0, implicit $exec
%5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, 0, implicit $exec
%6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
%7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, 0, implicit $exec
%8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, 0, implicit $exec
%9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, 0, implicit $exec
%10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
%11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, 0, implicit $exec
%12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, 0, implicit $exec
%13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, 0, implicit $exec
%14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, 0, implicit $exec
%15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, 0, implicit $exec
%16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, 0, implicit $exec
%17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, 0, 0, 0, implicit $exec
%18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: reg_pressure{{$}}
# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: reg_pressure
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
%6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, 0, implicit $exec
%7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, 0, 0, 0, implicit $exec
%8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: image_clause{{$}}
# GCN: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL undef %2:sgpr_128{{$}}
# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: image_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sgpr_256 }
- { id: 2, class: sgpr_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
%4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
%5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
...
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
# GCN: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %2{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: mixed_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sgpr_256 }
- { id: 2, class: sgpr_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: atomic{{$}}
# GCN: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: dead %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
---
name: atomic
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
%3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
# One of the pointers has an additional use after the clause, but one
# doesn't. Only the final use should be killed.
# GCN-LABEL: {{^}}name: ptr_use_after_clause{{$}}
# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0
---
name: ptr_use_after_clause
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %0
...
# Only part of the register is really live past the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
---
name: ptr_use_after_clause_subreg
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
...
# More complex situation where only some of the use subregisters live
# beyond the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg_multi{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3_sub4, %0.sub7{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
---
name: ptr_use_after_clause_subreg_multi
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
...
# Have subranges, but none of them are killed
# GCN-LABEL: {{^}}name: no_killed_subranges{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub2_sub3{{$}}
---
name: no_killed_subranges
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
S_NOP 0, implicit %0.sub2_sub3
...
# sub2 is undef at entry to the soft clause. It should not be have its
# live range extended.
# GCN-LABEL: name: no_killed_undef_subrange_use
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1{{$}}
# GCN-NEXT: %0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub2
---
name: no_killed_undef_subrange_use
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
undef %0.sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
%0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
S_NOP 0, implicit %0.sub2
...
# Make sure intervening implicit_defs are not treated as breaking the
# clause
#
# GCN-LABEL: {{^}}name: implicit_def_no_break{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %3{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: implicit_def_no_break
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
%3:vreg_64 = IMPLICIT_DEF
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, 0, 0, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %1, implicit %2, implicit %4, implicit %5
...
# GCN-LABEL: {{^}}name: kill_part_subreg{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1_sub2, %0.sub3
---
name: kill_part_subreg
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
S_NOP 0, implicit %1
...
# GCN-LABEL: {{^}}name: mem_clause_sreg256_used_stack{{$}}
# GCN: undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0, 0
# GCN-NEXT: dead %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0, 0
# GCN-NEXT: KILL undef %1
---
name: mem_clause_sreg256_used_stack
stack:
- { id: 0, type: default, offset: 0, size: 40, alignment: 8 }
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0, 0
%0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0, 0
S_ENDPGM 0
...