mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AMDGPU] Do not check denorm for LDS FP atomic with unsafe flag
This is already how it is handled for global and flat atomics. Differential Revision: https://reviews.llvm.org/D102366
This commit is contained in:
parent
91a232a867
commit
dc20bc576c
@ -12156,8 +12156,15 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
// to round-to-nearest-even.
|
||||
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) {
|
||||
return (Ty->isDoubleTy() && !fpModeMatchesGlobalFPAtomicMode(RMW)) ?
|
||||
AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
|
||||
if (!Ty->isDoubleTy())
|
||||
return AtomicExpansionKind::None;
|
||||
|
||||
return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
|
||||
RMW->getFunction()
|
||||
->getFnAttribute("amdgpu-unsafe-fp-atomics")
|
||||
.getValueAsString() == "true")
|
||||
? AtomicExpansionKind::None
|
||||
: AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
|
@ -957,11 +957,28 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspac
|
||||
; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
%ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 {
|
||||
; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24
|
||||
; GFX90A-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX90A-NEXT: ds_read_b64 v[0:1], v0
|
||||
; GFX90A-NEXT: BB51_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: BB52_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0
|
||||
@ -973,7 +990,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspac
|
||||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB51_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz BB52_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
@ -996,5 +1013,36 @@ main_body:
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 {
|
||||
; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
||||
main_body:
|
||||
%ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 {
|
||||
; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
||||
main_body:
|
||||
%ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
|
||||
attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
|
||||
attributes #3 = { "denormal-fp-math"="ieee,ieee" }
|
||||
attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
|
||||
|
Loading…
Reference in New Issue
Block a user