mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
AMDGPU/GlobalISel: Select atomic loads
A new check for an explicitly atomic MMO is needed to avoid incorrectly matching pattern for non-atomic loads llvm-svn: 371418
This commit is contained in:
parent
f2ca76077f
commit
4ec23178ea
@ -27,6 +27,7 @@ class GINodeEquiv<Instruction i, SDNode node> {
|
||||
// (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel
|
||||
// stores this information in the MachineMemoryOperand.
|
||||
bit CheckMMOIsNonAtomic = 0;
|
||||
bit CheckMMOIsAtomic = 0;
|
||||
|
||||
// SelectionDAG has one node for all loads and uses predicates to
|
||||
// differentiate them. GlobalISel on the other hand uses separate opcodes.
|
||||
@ -143,6 +144,11 @@ def : GINodeEquiv<G_ICMP, setcc> {
|
||||
// G_STORE with a non-atomic MachineMemOperand.
|
||||
def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = 1; }
|
||||
|
||||
def : GINodeEquiv<G_LOAD, atomic_load> {
|
||||
let CheckMMOIsNonAtomic = 0;
|
||||
let CheckMMOIsAtomic = 1;
|
||||
}
|
||||
|
||||
def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>;
|
||||
def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap>;
|
||||
def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add>;
|
||||
|
@ -46,12 +46,19 @@ def gi_smrd_sgpr :
|
||||
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
|
||||
GIComplexPatternEquiv<SMRDSgpr>;
|
||||
|
||||
// FIXME: Why are the atomic versions separated?
|
||||
def gi_flat_offset :
|
||||
GIComplexOperandMatcher<s64, "selectFlatOffset">,
|
||||
GIComplexPatternEquiv<FLATOffset>;
|
||||
def gi_flat_offset_signed :
|
||||
GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
|
||||
GIComplexPatternEquiv<FLATOffsetSigned>;
|
||||
def gi_flat_atomic :
|
||||
GIComplexOperandMatcher<s64, "selectFlatOffset">,
|
||||
GIComplexPatternEquiv<FLATAtomic>;
|
||||
def gi_flat_signed_atomic :
|
||||
GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
|
||||
GIComplexPatternEquiv<FLATSignedAtomic>;
|
||||
|
||||
def gi_mubuf_scratch_offset :
|
||||
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
|
||||
@ -69,6 +76,7 @@ def gi_ds_1addr_1offset :
|
||||
// SelectionDAG. The GISel selector can just insert m0 initialization
|
||||
// directly before before selecting a glue-less load, so hide this
|
||||
// distinction.
|
||||
|
||||
def : GINodeEquiv<G_LOAD, AMDGPUld_glue> {
|
||||
let CheckMMOIsNonAtomic = 1;
|
||||
}
|
||||
@ -77,6 +85,11 @@ def : GINodeEquiv<G_STORE, AMDGPUst_glue> {
|
||||
let CheckMMOIsNonAtomic = 1;
|
||||
}
|
||||
|
||||
def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
|
||||
bit CheckMMOIsAtomic = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>;
|
||||
def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap_glue>;
|
||||
|
@ -456,16 +456,16 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
|
||||
|
||||
} // End IsLoad = 1
|
||||
|
||||
let AddressSpaces = LoadAddress_local.AddrSpaces in {
|
||||
|
||||
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
|
||||
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
|
||||
(atomic_load_32_glue node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i32;
|
||||
}
|
||||
def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
|
||||
(atomic_load_64_glue node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
(atomic_load_64_glue node:$ptr)> {
|
||||
let MemoryVT = i64;
|
||||
}
|
||||
|
||||
} // End let AddressSpaces = LoadAddress_local.AddrSpaces
|
||||
|
||||
|
||||
|
@ -73,7 +73,7 @@ define i128 @sequence_sizes([8 x i8] %in) {
|
||||
}
|
||||
|
||||
; Just to make sure we don't accidentally emit a normal load/store.
|
||||
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %2:gpr(s64) = G_LOAD %0:gpr(p0) :: (load seq_cst 8 from %ir.addr) (in function: atomic_ops)
|
||||
; FALLBACK-WITH-REPORT-ERR: cannot select: G_STORE %1:gpr(s64), %0:gpr64sp(p0) :: (store unordered 8 into %ir.addr) (in function: atomic_ops)
|
||||
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops
|
||||
; FALLBACK-WITH-REPORT-LABEL: atomic_ops:
|
||||
define i64 @atomic_ops(i64* %addr) {
|
||||
|
308
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
Normal file
308
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
Normal file
@ -0,0 +1,308 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_v2s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_v2s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_p3_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_p3_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](p3)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s64_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_v2s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_v4s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_v4s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_p1_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_p1_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_p0_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_p0_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s32_seq_cst_gep_m2048
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
%2:vgpr(p0) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s32_seq_cst_gep_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
%2:vgpr(p0) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
@ -0,0 +1,363 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_v2s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_v2s16_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_p3_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_p3_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX7-LABEL: name: load_atomic_global_p3_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX9-LABEL: name: load_atomic_global_p3_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](p3)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s64_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_v2s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_v2s32_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_v4s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_v4s16_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_p1_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_p1_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX7-LABEL: name: load_atomic_global_p1_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX9-LABEL: name: load_atomic_global_p1_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_p0_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_p0_seq_cst
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX7-LABEL: name: load_atomic_global_p0_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX9-LABEL: name: load_atomic_global_p0_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
%2:vgpr(p1) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX6: liveins: $vgpr0_vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
|
||||
; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
%2:vgpr(p1) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
314
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir
Normal file
314
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir
Normal file
@ -0,0 +1,314 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_s32_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-LABEL: name: load_atomic_local_s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_atomic_local_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_v2s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_v2s16_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_p3_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_p3_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX6: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX7-LABEL: name: load_atomic_local_p3_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX9-LABEL: name: load_atomic_local_p3_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](p3)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_s64_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_s64_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX7-LABEL: name: load_atomic_local_s64_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX9-LABEL: name: load_atomic_local_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_v2s32_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_v2s32_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_v4s16_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_v4s16_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_p1_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_p1_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX7-LABEL: name: load_atomic_local_p1_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX9-LABEL: name: load_atomic_local_p1_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_p0_seq_cst
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_p0_seq_cst
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX7-LABEL: name: load_atomic_local_p0_seq_cst
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX9-LABEL: name: load_atomic_local_p0_seq_cst
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_local_s32_seq_cst_gep_65535
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst 4, addrspace 3)
|
||||
; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 65535
|
||||
%2:vgpr(p3) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 3)
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
@ -1644,79 +1644,3 @@ body: |
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX8-LABEL: name: load_atomic_flat_s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
|
||||
; GFX8: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX10-LABEL: name: load_atomic_flat_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
|
||||
; GFX10: $vgpr0 = COPY [[LOAD]](s32)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_flat_s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_flat_s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX8-LABEL: name: load_atomic_flat_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX9-LABEL: name: load_atomic_flat_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX10-LABEL: name: load_atomic_flat_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
@ -1584,79 +1584,3 @@ body: |
|
||||
$vgpr0 = COPY %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_global_s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX8-LABEL: name: load_atomic_global_s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX9-LABEL: name: load_atomic_global_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; GFX10-LABEL: name: load_atomic_global_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[LOAD]](s32)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_atomic_global_s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX7-LABEL: name: load_atomic_global_s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX8-LABEL: name: load_atomic_global_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX9-LABEL: name: load_atomic_global_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX10-LABEL: name: load_atomic_global_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
@ -3614,6 +3614,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
|
||||
}
|
||||
if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
|
||||
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
|
||||
else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
|
||||
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
|
||||
"Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
|
||||
}
|
||||
|
||||
if (Src->isLeaf()) {
|
||||
Init *SrcInit = Src->getLeafValue();
|
||||
|
Loading…
Reference in New Issue
Block a user