1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

AMDGPU: Start selecting flat instruction offsets

llvm-svn: 305201
This commit is contained in:
Matt Arsenault 2017-06-12 16:53:51 +00:00
parent a57541fa02
commit 8857889fd1
5 changed files with 216 additions and 72 deletions

View File

@ -138,8 +138,10 @@ private:
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectFlat(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@ -1314,16 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
return true;
}
bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
int64_t OffsetVal = 0;
if (Subtarget->hasFlatInstOffsets() &&
CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
if (isUInt<12>(COffsetVal)) {
Addr = N0;
OffsetVal = COffsetVal;
}
}
VAddr = Addr;
Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
return true;
}
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
return SelectFlatOffset(Addr, VAddr, Offset, SLC);
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {

View File

@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@ -338,31 +339,31 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
(inst $addr, 0, 0, 0)
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 0, $slc)
>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
(inst $addr, 0, 1, 0)
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 1, $slc)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $addr, $data, 0, 0, 0)
(node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
(inst $vaddr, $data, $offset, 0, $slc)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
// atomic store follows atomic binop convention so the address comes
// first.
(node i64:$addr, vt:$data),
(inst $addr, $data, 0, 1, 0)
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
(inst $vaddr, $data, $offset, 1, $slc)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
(vt (node i64:$addr, data_vt:$data)),
(inst $addr, $data, 0, 0)
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
(inst $vaddr, $data, $offset, $slc)
>;
let Predicates = [isCIVI] in {

View File

@ -12,7 +12,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
; CHECK: DebugProps:
; CHECK: DebuggerABIVersion: [ 1, 0 ]
; CHECK: ReservedNumVGPRs: 4
; CHECK: ReservedFirstVGPR: 11
; GFX700: ReservedFirstVGPR: 11
; GFX800: ReservedFirstVGPR: 11
; GFX9: ReservedFirstVGPR: 14
; CHECK: PrivateSegmentBufferSGPR: 0
; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {

View File

@ -1,6 +1,7 @@
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
@ -172,6 +173,55 @@ define amdgpu_kernel void @flat_scratch_multidword_store() {
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
ret void
}
; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
attributes #3 = { nounwind readnone }

View File

@ -1,8 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_add_i32_offset:
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -10,8 +12,28 @@ entry:
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023
%val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
%val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -22,7 +44,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -32,7 +55,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -82,7 +106,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_offset:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -91,7 +116,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -102,7 +128,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -112,7 +139,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -162,7 +190,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_offset:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -171,7 +200,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -182,7 +212,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -192,7 +223,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -242,7 +274,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_offset:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -251,7 +284,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -262,7 +296,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -272,7 +307,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -322,7 +358,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_offset:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -331,7 +368,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -342,7 +380,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -352,7 +391,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -402,7 +442,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_offset:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -411,7 +452,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -422,7 +464,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -432,7 +475,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -482,7 +526,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_offset:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -491,7 +536,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -502,7 +548,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -512,7 +559,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -562,7 +610,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_offset:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -571,7 +620,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -582,7 +632,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -592,7 +643,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -642,7 +694,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -651,7 +704,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -662,7 +716,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -672,7 +727,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -724,7 +780,8 @@ entry:
; CMP_SWAP
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -733,7 +790,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
entry:
@ -745,7 +803,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -755,7 +814,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
entry:
@ -808,7 +868,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_offset:
; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -817,7 +878,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@ -828,7 +890,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@ -838,7 +901,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@ -888,7 +952,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_load_i32_offset:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
entry:
@ -909,7 +974,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
entry:
@ -932,7 +998,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_store_i32_offset:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@ -949,7 +1016,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index