mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
Copy syncscope when expanding atomicrmw into cmpxchg loop
Fixes: SWDEV-280070 Differential Revision: https://reviews.llvm.org/D99902
This commit is contained in:
parent
36fcdf34d3
commit
87c12ebe40
@ -23,7 +23,7 @@ class Value;
|
||||
/// /* OUT */ %success, /* OUT */ %new_loaded)
|
||||
using CreateCmpXchgInstFun =
|
||||
function_ref<void(IRBuilder<> &, Value *, Value *, Value *, Align,
|
||||
AtomicOrdering, Value *&, Value *&)>;
|
||||
AtomicOrdering, SyncScope::ID, Value *&, Value *&)>;
|
||||
|
||||
/// Expand an atomic RMW instruction into a loop utilizing
|
||||
/// cmpxchg. You'll want to make sure your target machine likes cmpxchg
|
||||
|
@ -97,7 +97,7 @@ namespace {
|
||||
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
|
||||
static Value *insertRMWCmpXchgLoop(
|
||||
IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
|
||||
AtomicOrdering MemOpOrder,
|
||||
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
|
||||
CreateCmpXchgInstFun CreateCmpXchg);
|
||||
bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
|
||||
@ -467,8 +467,8 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
|
||||
|
||||
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
|
||||
Value *Loaded, Value *NewVal, Align AddrAlign,
|
||||
AtomicOrdering MemOpOrder, Value *&Success,
|
||||
Value *&NewLoaded) {
|
||||
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
|
||||
Value *&Success, Value *&NewLoaded) {
|
||||
Type *OrigTy = NewVal->getType();
|
||||
|
||||
// This code can go away when cmpxchg supports FP types.
|
||||
@ -483,7 +483,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
|
||||
|
||||
Value *Pair = Builder.CreateAtomicCmpXchg(
|
||||
Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
|
||||
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
|
||||
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
|
||||
Success = Builder.CreateExtractValue(Pair, 1, "success");
|
||||
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
|
||||
|
||||
@ -768,6 +768,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
|
||||
void AtomicExpand::expandPartwordAtomicRMW(
|
||||
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
|
||||
AtomicOrdering MemOpOrder = AI->getOrdering();
|
||||
SyncScope::ID SSID = AI->getSyncScopeID();
|
||||
|
||||
IRBuilder<> Builder(AI);
|
||||
|
||||
@ -788,7 +789,8 @@ void AtomicExpand::expandPartwordAtomicRMW(
|
||||
if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
|
||||
OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
|
||||
PMV.AlignedAddrAlignment, MemOpOrder,
|
||||
PerformPartwordOp, createCmpXchgInstFun);
|
||||
SSID, PerformPartwordOp,
|
||||
createCmpXchgInstFun);
|
||||
} else {
|
||||
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
|
||||
OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
|
||||
@ -1392,7 +1394,7 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
|
||||
|
||||
Value *AtomicExpand::insertRMWCmpXchgLoop(
|
||||
IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
|
||||
AtomicOrdering MemOpOrder,
|
||||
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
|
||||
CreateCmpXchgInstFun CreateCmpXchg) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
@ -1440,7 +1442,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
|
||||
MemOpOrder == AtomicOrdering::Unordered
|
||||
? AtomicOrdering::Monotonic
|
||||
: MemOpOrder,
|
||||
Success, NewLoaded);
|
||||
SSID, Success, NewLoaded);
|
||||
assert(Success && NewLoaded);
|
||||
|
||||
Loaded->addIncoming(NewLoaded, LoopBB);
|
||||
@ -1477,7 +1479,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
IRBuilder<> Builder(AI);
|
||||
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
|
||||
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
|
||||
AI->getOrdering(),
|
||||
AI->getOrdering(), AI->getSyncScopeID(),
|
||||
[&](IRBuilder<> &Builder, Value *Loaded) {
|
||||
return performAtomicOp(AI->getOperation(), Builder, Loaded,
|
||||
AI->getValOperand());
|
||||
@ -1628,11 +1630,11 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
|
||||
expandAtomicRMWToCmpXchg(
|
||||
I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
|
||||
Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
|
||||
Value *&Success, Value *&NewLoaded) {
|
||||
SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
|
||||
// Create the CAS instruction normally...
|
||||
AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
|
||||
Addr, Loaded, NewVal, Alignment, MemOpOrder,
|
||||
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
|
||||
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
|
||||
Success = Builder.CreateExtractValue(Pair, 1, "success");
|
||||
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
|
||||
|
||||
|
76
test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll
Normal file
76
test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Check that syncscope it copied from atomicrmw to cmpxchg during expansion.
|
||||
; There should be no scc unless we have system scope.
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_agent:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_agent(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("agent") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_workgroup:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_workgroup(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("workgroup") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_wavefront:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_wavefront(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("wavefront") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_agent_one_as:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_agent_one_as(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("agent-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_workgroup_one_as:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_workgroup_one_as(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("workgroup-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_wavefront_one_as:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_wavefront_one_as(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("wavefront-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_singlethread_one_as:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
|
||||
define void @expand_atomicrmw_singlethread_one_as(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("singlethread-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_one_as:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc scc{{$}}
|
||||
define void @expand_atomicrmw_one_as(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}expand_atomicrmw_system:
|
||||
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc scc{{$}}
|
||||
define void @expand_atomicrmw_system(float addrspace(1)* nocapture %arg) {
|
||||
entry:
|
||||
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 monotonic, align 4
|
||||
ret void
|
||||
}
|
@ -474,7 +474,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -505,11 +505,8 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspa
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
|
||||
@ -596,7 +593,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -704,12 +701,13 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
|
||||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
@ -783,12 +781,13 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
|
@ -184,11 +184,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
@ -369,11 +366,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
@ -524,7 +518,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX900-NEXT: buffer_wbinvl1_vol
|
||||
@ -550,7 +544,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX908-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: buffer_wbinvl1_vol
|
||||
@ -577,7 +571,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -605,7 +599,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -13,7 +13,7 @@ define float @test_atomicrmw_fadd_f32_flat(float* %ptr, float %value) {
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -30,7 +30,7 @@ define float @test_atomicrmw_fadd_f32_flat(float* %ptr, float %value) {
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -47,7 +47,7 @@ define float @test_atomicrmw_fadd_f32_flat(float* %ptr, float %value) {
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -69,7 +69,7 @@ define float @test_atomicrmw_fadd_f32_global(float addrspace(1)* %ptr, float %va
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -86,7 +86,7 @@ define float @test_atomicrmw_fadd_f32_global(float addrspace(1)* %ptr, float %va
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -103,7 +103,7 @@ define float @test_atomicrmw_fadd_f32_global(float addrspace(1)* %ptr, float %va
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -125,7 +125,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(float addrspace(1)* %ptr
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -142,7 +142,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(float addrspace(1)* %ptr
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -159,7 +159,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_ieee(float addrspace(1)* %ptr
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -181,7 +181,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(float addrspace(
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -198,7 +198,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(float addrspace(
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -207,7 +207,7 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(float addrspace(
|
||||
; GFX9-NEXT: ret void
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(1)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(1)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
|
||||
; GFX908-NEXT: ret void
|
||||
;
|
||||
%res = atomicrmw fadd float addrspace(1)* %ptr, float %value seq_cst
|
||||
@ -224,7 +224,7 @@ define float @test_atomicrmw_fadd_f32_local(float addrspace(3)* %ptr, float %val
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(3)* [[PTR]] to i32 addrspace(3)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(3)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(3)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
@ -233,11 +233,11 @@ define float @test_atomicrmw_fadd_f32_local(float addrspace(3)* %ptr, float %val
|
||||
; CI-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f32_local(
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(3)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(3)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
|
||||
; GFX9-NEXT: ret float [[RES]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f32_local(
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(3)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(3)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
|
||||
; GFX908-NEXT: ret float [[RES]]
|
||||
;
|
||||
%res = atomicrmw fadd float addrspace(3)* %ptr, float %value seq_cst
|
||||
@ -246,15 +246,15 @@ define float @test_atomicrmw_fadd_f32_local(float addrspace(3)* %ptr, float %val
|
||||
|
||||
define half @test_atomicrmw_fadd_f16_flat(half* %ptr, half %value) {
|
||||
; CI-LABEL: @test_atomicrmw_fadd_f16_flat(
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; CI-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f16_flat(
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX9-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f16_flat(
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX908-NEXT: ret half [[RES]]
|
||||
;
|
||||
%res = atomicrmw fadd half* %ptr, half %value seq_cst
|
||||
@ -263,15 +263,15 @@ define half @test_atomicrmw_fadd_f16_flat(half* %ptr, half %value) {
|
||||
|
||||
define half @test_atomicrmw_fadd_f16_global(half addrspace(1)* %ptr, half %value) {
|
||||
; CI-LABEL: @test_atomicrmw_fadd_f16_global(
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; CI-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f16_global(
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX9-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f16_global(
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX908-NEXT: ret half [[RES]]
|
||||
;
|
||||
%res = atomicrmw fadd half addrspace(1)* %ptr, half %value seq_cst
|
||||
@ -280,15 +280,15 @@ define half @test_atomicrmw_fadd_f16_global(half addrspace(1)* %ptr, half %value
|
||||
|
||||
define half @test_atomicrmw_fadd_f16_local(half addrspace(3)* %ptr, half %value) {
|
||||
; CI-LABEL: @test_atomicrmw_fadd_f16_local(
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; CI-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f16_local(
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX9-NEXT: ret half [[RES]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f16_local(
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
|
||||
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 2
|
||||
; GFX908-NEXT: ret half [[RES]]
|
||||
;
|
||||
%res = atomicrmw fadd half addrspace(3)* %ptr, half %value seq_cst
|
||||
@ -305,7 +305,7 @@ define double @test_atomicrmw_fadd_f64_flat(double* %ptr, double %value) {
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -322,7 +322,7 @@ define double @test_atomicrmw_fadd_f64_flat(double* %ptr, double %value) {
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -339,7 +339,7 @@ define double @test_atomicrmw_fadd_f64_flat(double* %ptr, double %value) {
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -361,7 +361,7 @@ define double @test_atomicrmw_fadd_f64_global(double addrspace(1)* %ptr, double
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -378,7 +378,7 @@ define double @test_atomicrmw_fadd_f64_global(double addrspace(1)* %ptr, double
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -395,7 +395,7 @@ define double @test_atomicrmw_fadd_f64_global(double addrspace(1)* %ptr, double
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -417,7 +417,7 @@ define double @test_atomicrmw_fadd_f64_local(double addrspace(3)* %ptr, double %
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -434,7 +434,7 @@ define double @test_atomicrmw_fadd_f64_local(double addrspace(3)* %ptr, double %
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -451,7 +451,7 @@ define double @test_atomicrmw_fadd_f64_local(double addrspace(3)* %ptr, double %
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
|
||||
@ -463,4 +463,116 @@ define double @test_atomicrmw_fadd_f64_local(double addrspace(3)* %ptr, double %
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define float @test_atomicrmw_fadd_f32_global_agent(float addrspace(1)* %ptr, float %value) {
|
||||
; CI-LABEL: @test_atomicrmw_fadd_f32_global_agent(
|
||||
; CI-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CI: atomicrmw.start:
|
||||
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CI: atomicrmw.end:
|
||||
; CI-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_agent(
|
||||
; GFX9-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; GFX9: atomicrmw.start:
|
||||
; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; GFX9: atomicrmw.end:
|
||||
; GFX9-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_agent(
|
||||
; GFX908-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; GFX908: atomicrmw.start:
|
||||
; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") monotonic monotonic, align 4
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; GFX908: atomicrmw.end:
|
||||
; GFX908-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%res = atomicrmw fadd float addrspace(1)* %ptr, float %value syncscope("agent") monotonic
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @test_atomicrmw_fadd_f32_global_one_as(float addrspace(1)* %ptr, float %value) {
|
||||
; CI-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
|
||||
; CI-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CI: atomicrmw.start:
|
||||
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
|
||||
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CI: atomicrmw.end:
|
||||
; CI-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
|
||||
; GFX9-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; GFX9: atomicrmw.start:
|
||||
; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
|
||||
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; GFX9: atomicrmw.end:
|
||||
; GFX9-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_one_as(
|
||||
; GFX908-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
|
||||
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; GFX908: atomicrmw.start:
|
||||
; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
|
||||
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
|
||||
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
|
||||
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
|
||||
; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
|
||||
; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("one-as") monotonic monotonic, align 4
|
||||
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
|
||||
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; GFX908: atomicrmw.end:
|
||||
; GFX908-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%res = atomicrmw fadd float addrspace(1)* %ptr, float %value syncscope("one-as") monotonic
|
||||
ret float %res
|
||||
}
|
||||
|
||||
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
|
||||
|
Loading…
Reference in New Issue
Block a user