mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AMDGPU: Start selecting flat instruction offsets
llvm-svn: 305201
This commit is contained in:
parent
a57541fa02
commit
8857889fd1
@ -138,8 +138,10 @@ private:
|
||||
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
|
||||
SDValue &ImmOffset, SDValue &VOffset) const;
|
||||
|
||||
bool SelectFlat(SDValue Addr, SDValue &VAddr,
|
||||
SDValue &Offset, SDValue &SLC) const;
|
||||
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
|
||||
SDValue &Offset, SDValue &SLC) const;
|
||||
bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
|
||||
SDValue &Offset, SDValue &SLC) const;
|
||||
|
||||
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
|
||||
bool &Imm) const;
|
||||
@ -1314,16 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
|
||||
SDValue &VAddr,
|
||||
SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
|
||||
SDValue &VAddr,
|
||||
SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
int64_t OffsetVal = 0;
|
||||
|
||||
if (Subtarget->hasFlatInstOffsets() &&
|
||||
CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
SDValue N0 = Addr.getOperand(0);
|
||||
SDValue N1 = Addr.getOperand(1);
|
||||
uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
|
||||
if (isUInt<12>(COffsetVal)) {
|
||||
Addr = N0;
|
||||
OffsetVal = COffsetVal;
|
||||
}
|
||||
}
|
||||
|
||||
VAddr = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
|
||||
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
|
||||
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
|
||||
SDValue &VAddr,
|
||||
SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
return SelectFlatOffset(Addr, VAddr, Offset, SLC);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
|
||||
SDValue &Offset, bool &Imm) const {
|
||||
|
||||
|
@ -7,7 +7,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
|
||||
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
|
||||
def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FLAT classes
|
||||
@ -338,31 +339,31 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
|
||||
|
||||
// Patterns for global loads with no offset.
|
||||
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 0, 0)
|
||||
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
|
||||
(inst $vaddr, $offset, 0, $slc)
|
||||
>;
|
||||
|
||||
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 1, 0)
|
||||
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
|
||||
(inst $vaddr, $offset, 1, $slc)
|
||||
>;
|
||||
|
||||
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $addr, $data, 0, 0, 0)
|
||||
(node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
|
||||
(inst $vaddr, $data, $offset, 0, $slc)
|
||||
>;
|
||||
|
||||
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
// atomic store follows atomic binop convention so the address comes
|
||||
// first.
|
||||
(node i64:$addr, vt:$data),
|
||||
(inst $addr, $data, 0, 1, 0)
|
||||
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
|
||||
(inst $vaddr, $data, $offset, 1, $slc)
|
||||
>;
|
||||
|
||||
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
|
||||
ValueType data_vt = vt> : Pat <
|
||||
(vt (node i64:$addr, data_vt:$data)),
|
||||
(inst $addr, $data, 0, 0)
|
||||
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
|
||||
(inst $vaddr, $data, $offset, $slc)
|
||||
>;
|
||||
|
||||
let Predicates = [isCIVI] in {
|
||||
|
@ -12,7 +12,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||
; CHECK: DebugProps:
|
||||
; CHECK: DebuggerABIVersion: [ 1, 0 ]
|
||||
; CHECK: ReservedNumVGPRs: 4
|
||||
; CHECK: ReservedFirstVGPR: 11
|
||||
; GFX700: ReservedFirstVGPR: 11
|
||||
; GFX800: ReservedFirstVGPR: 11
|
||||
; GFX9: ReservedFirstVGPR: 14
|
||||
; CHECK: PrivateSegmentBufferSGPR: 0
|
||||
; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
|
||||
|
||||
; Disable optimizations in case there are optimizations added that
|
||||
; specialize away generic pointer accesses.
|
||||
@ -172,6 +173,55 @@ define amdgpu_kernel void @flat_scratch_multidword_store() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
|
||||
; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
|
||||
define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
|
||||
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
|
||||
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
|
||||
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
|
||||
; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
|
||||
store volatile i8 %x, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
|
||||
; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
|
||||
; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
|
||||
define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
|
||||
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
|
||||
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
|
||||
define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
|
||||
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
|
||||
; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
|
||||
define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
|
||||
%fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
|
||||
%val = load volatile i8, i8 addrspace(4)* %fptr.offset
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
attributes #3 = { nounwind readnone }
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_offset:
|
||||
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -10,8 +12,28 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
|
||||
; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
|
||||
define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023
|
||||
%val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
|
||||
; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
|
||||
%val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
|
||||
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -22,7 +44,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
|
||||
; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -32,7 +55,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -82,7 +106,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_and_i32_offset:
|
||||
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -91,7 +116,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
|
||||
; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -102,7 +128,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
|
||||
; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -112,7 +139,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -162,7 +190,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_sub_i32_offset:
|
||||
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -171,7 +200,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
|
||||
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -182,7 +212,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
|
||||
; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -192,7 +223,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -242,7 +274,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32_offset:
|
||||
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -251,7 +284,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
|
||||
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -262,7 +296,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
|
||||
; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -272,7 +307,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -322,7 +358,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umax_i32_offset:
|
||||
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -331,7 +368,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
|
||||
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -342,7 +380,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
|
||||
; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -352,7 +391,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -402,7 +442,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_min_i32_offset:
|
||||
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -411,7 +452,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
|
||||
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -422,7 +464,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
|
||||
; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -432,7 +475,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -482,7 +526,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umin_i32_offset:
|
||||
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -491,7 +536,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
|
||||
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -502,7 +548,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
|
||||
; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -512,7 +559,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -562,7 +610,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_or_i32_offset:
|
||||
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -571,7 +620,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
|
||||
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -582,7 +632,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
|
||||
; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -592,7 +643,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -642,7 +694,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -651,7 +704,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
||||
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -662,7 +716,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -672,7 +727,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -724,7 +780,8 @@ entry:
|
||||
; CMP_SWAP
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
|
||||
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -733,7 +790,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
|
||||
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
|
||||
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -745,7 +803,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
|
||||
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -755,7 +814,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
|
||||
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
|
||||
entry:
|
||||
@ -808,7 +868,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xor_i32_offset:
|
||||
; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -817,7 +878,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
|
||||
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
@ -828,7 +890,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
|
||||
; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||
; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
@ -838,7 +901,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
|
||||
; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
@ -888,7 +952,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i32_offset:
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
|
||||
entry:
|
||||
@ -909,7 +974,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
|
||||
entry:
|
||||
@ -932,7 +998,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i32_offset:
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
|
||||
@ -949,7 +1016,8 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
|
||||
|
Loading…
Reference in New Issue
Block a user