1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/AMDGPU/flat-load-clustering.mir
Stanislav Mekhanoshin fbfa163a41 Allow target to decide when to cluster loads/stores in misched
MachineScheduler when clustering loads or stores checks if base
pointers point to the same memory. This check is done through
comparison of base registers of two memory instructions. This
works fine when instructions have separate offset operand. If
they require a full calculated pointer such instructions can
never be clustered according to such logic.

Changed shouldClusterMemOps to accept base registers as well and
let it decide what to do about it.

Differential Revision: https://reviews.llvm.org/D37698

llvm-svn: 313208
2017-09-13 22:20:47 +00:00

78 lines
3.3 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
# CGN-LABEL: name: flat_load_clustering
# GCN: FLAT_LOAD_DWORD
# GCN-NEXT: FLAT_LOAD_DWORD
--- |
define amdgpu_kernel void @flat_load_clustering(i32 addrspace(1)* nocapture %arg, i32 addrspace(2)* nocapture readonly %arg1) {
bb:
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep1 = getelementptr inbounds i32, i32 addrspace(2)* %arg1, i64 %idxprom
%load1 = load i32, i32 addrspace(2)* %gep1, align 4
%gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %idxprom
%gep34 = getelementptr inbounds i32, i32 addrspace(2)* %gep1, i64 4
%load2 = load i32, i32 addrspace(2)* %gep34, align 4
%gep4 = getelementptr inbounds i32, i32 addrspace(1)* %gep2, i64 4
store i32 %load1, i32 addrspace(1)* %gep2, align 4
store i32 %load2, i32 addrspace(1)* %gep4, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
...
---
name: flat_load_clustering
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: sgpr_64 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: sreg_64_xexec }
- { id: 4, class: sreg_64_xexec }
- { id: 5, class: vgpr_32 }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
- { id: 8, class: vgpr_32 }
- { id: 9, class: vreg_64 }
- { id: 10, class: vreg_64 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vreg_64 }
- { id: 13, class: vreg_64 }
liveins:
- { reg: '%vgpr0', virtual-reg: '%0' }
- { reg: '%sgpr4_sgpr5', virtual-reg: '%1' }
body: |
bb.0.bb:
liveins: %vgpr0, %sgpr4_sgpr5
%1 = COPY %sgpr4_sgpr5
%0 = COPY %vgpr0
%3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%7 = V_LSHLREV_B32_e32 2, %0, implicit %exec
%2 = V_MOV_B32_e32 0, implicit %exec
undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def %vcc, implicit %exec
%11 = COPY %4.sub1
%12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead %vcc, implicit killed %vcc, implicit %exec
%5 = FLAT_LOAD_DWORD %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.gep1)
undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def %vcc, implicit %exec
%8 = COPY %3.sub1
%9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead %vcc, implicit killed %vcc, implicit %exec
undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def %vcc, implicit %exec
%13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead %vcc, implicit killed %vcc, implicit %exec
%6 = FLAT_LOAD_DWORD %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.gep34)
undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def %vcc, implicit %exec
%10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead %vcc, implicit killed %vcc, implicit %exec
FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.gep2)
FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.gep4)
S_ENDPGM
...