mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics
Global instructions have the signed offsets.
This commit is contained in:
parent
544267f834
commit
aa9d3db2ef
@ -783,6 +783,11 @@ class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt
|
||||
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
|
||||
>;
|
||||
|
||||
class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data),
|
||||
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
|
||||
>;
|
||||
|
||||
class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
|
||||
ValueType data_vt = vt> : GCNPat <
|
||||
(vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
|
||||
@ -971,8 +976,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64
|
||||
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
|
||||
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
|
||||
|
||||
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
|
||||
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
|
||||
def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
|
||||
def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
|
||||
|
||||
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
|
||||
|
||||
|
@ -54,6 +54,15 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
|
||||
; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
|
||||
define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {
|
||||
main_body:
|
||||
%p = getelementptr float, float addrspace(1)* %ptr, i64 -1
|
||||
call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
|
||||
; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off
|
||||
define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
|
||||
@ -71,6 +80,15 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
|
||||
; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
|
||||
define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
|
||||
main_body:
|
||||
%p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1
|
||||
call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure this artificially selects with an incorrect subtarget, but
|
||||
; the feature set.
|
||||
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
|
||||
|
Loading…
x
Reference in New Issue
Block a user