mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] fix LDS f32 intrinsics
- using qualified pointer addrspace in intrinsics class to avoid .f32 mangling - changed too common atomic mangling to ds - added missing intrinsics to AMDGPUTTIImpl::getTgtMemIntrinsic Reviewed by: b-sumner Differential Revision: https://reviews.llvm.org/D42383 llvm-svn: 323516
This commit is contained in:
parent
22e0ea0640
commit
28862346c2
@ -295,10 +295,10 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
|
|||||||
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
|
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
|
||||||
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
|
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
|
||||||
|
|
||||||
class AMDGPUAtomicF32Intrin<string clang_builtin> :
|
class AMDGPULDSF32Intrin<string clang_builtin> :
|
||||||
GCCBuiltin<clang_builtin>,
|
GCCBuiltin<clang_builtin>,
|
||||||
Intrinsic<[llvm_float_ty],
|
Intrinsic<[llvm_float_ty],
|
||||||
[LLVMAnyPointerType<llvm_float_ty>,
|
[LLVMQualPointerType<llvm_float_ty, 3>,
|
||||||
llvm_float_ty,
|
llvm_float_ty,
|
||||||
llvm_i32_ty, // ordering
|
llvm_i32_ty, // ordering
|
||||||
llvm_i32_ty, // scope
|
llvm_i32_ty, // scope
|
||||||
@ -306,9 +306,9 @@ class AMDGPUAtomicF32Intrin<string clang_builtin> :
|
|||||||
[IntrArgMemOnly, NoCapture<0>]
|
[IntrArgMemOnly, NoCapture<0>]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def int_amdgcn_atomic_fadd : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fadd">;
|
def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fadd">;
|
||||||
def int_amdgcn_atomic_fmin : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmin">;
|
def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmin">;
|
||||||
def int_amdgcn_atomic_fmax : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmax">;
|
def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmax">;
|
||||||
|
|
||||||
class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic <
|
class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic <
|
||||||
[llvm_anyfloat_ty], // vdata(VGPR)
|
[llvm_anyfloat_ty], // vdata(VGPR)
|
||||||
|
@ -292,7 +292,10 @@ bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
|||||||
MemIntrinsicInfo &Info) const {
|
MemIntrinsicInfo &Info) const {
|
||||||
switch (Inst->getIntrinsicID()) {
|
switch (Inst->getIntrinsicID()) {
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec: {
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
|
case Intrinsic::amdgcn_ds_fmax: {
|
||||||
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
|
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
|
||||||
auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
|
auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
|
||||||
if (!Ordering || !Volatile)
|
if (!Ordering || !Volatile)
|
||||||
@ -475,9 +478,9 @@ static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
|
|||||||
case Intrinsic::r600_read_tidig_z:
|
case Intrinsic::r600_read_tidig_z:
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax:
|
case Intrinsic::amdgcn_ds_fmax:
|
||||||
case Intrinsic::amdgcn_image_atomic_swap:
|
case Intrinsic::amdgcn_image_atomic_swap:
|
||||||
case Intrinsic::amdgcn_image_atomic_add:
|
case Intrinsic::amdgcn_image_atomic_add:
|
||||||
case Intrinsic::amdgcn_image_atomic_sub:
|
case Intrinsic::amdgcn_image_atomic_sub:
|
||||||
|
@ -566,9 +566,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||||||
switch (IntrID) {
|
switch (IntrID) {
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax: {
|
case Intrinsic::amdgcn_ds_fmax: {
|
||||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||||
Info.memVT = MVT::getVT(CI.getType());
|
Info.memVT = MVT::getVT(CI.getType());
|
||||||
Info.ptrVal = CI.getOperand(0);
|
Info.ptrVal = CI.getOperand(0);
|
||||||
@ -807,9 +807,9 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
|
|||||||
switch (II->getIntrinsicID()) {
|
switch (II->getIntrinsicID()) {
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax: {
|
case Intrinsic::amdgcn_ds_fmax: {
|
||||||
Value *Ptr = II->getArgOperand(0);
|
Value *Ptr = II->getArgOperand(0);
|
||||||
AccessTy = II->getType();
|
AccessTy = II->getType();
|
||||||
Ops.push_back(Ptr);
|
Ops.push_back(Ptr);
|
||||||
@ -4827,9 +4827,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||||||
switch (IntrID) {
|
switch (IntrID) {
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax: {
|
case Intrinsic::amdgcn_ds_fmax: {
|
||||||
MemSDNode *M = cast<MemSDNode>(Op);
|
MemSDNode *M = cast<MemSDNode>(Op);
|
||||||
unsigned Opc;
|
unsigned Opc;
|
||||||
switch (IntrID) {
|
switch (IntrID) {
|
||||||
@ -4839,13 +4839,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
Opc = AMDGPUISD::ATOMIC_DEC;
|
Opc = AMDGPUISD::ATOMIC_DEC;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
Opc = AMDGPUISD::ATOMIC_LOAD_FADD;
|
Opc = AMDGPUISD::ATOMIC_LOAD_FADD;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
|
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::amdgcn_atomic_fmax:
|
case Intrinsic::amdgcn_ds_fmax:
|
||||||
Opc = AMDGPUISD::ATOMIC_LOAD_FMAX;
|
Opc = AMDGPUISD::ATOMIC_LOAD_FMAX;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -261,9 +261,9 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
|
|||||||
switch (II->getIntrinsicID()) {
|
switch (II->getIntrinsicID()) {
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax: {
|
case Intrinsic::amdgcn_ds_fmax: {
|
||||||
const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
|
const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
|
||||||
if (!IsVolatile || !IsVolatile->isZero())
|
if (!IsVolatile || !IsVolatile->isZero())
|
||||||
return false;
|
return false;
|
||||||
@ -292,9 +292,9 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands(
|
|||||||
case Intrinsic::objectsize:
|
case Intrinsic::objectsize:
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
case Intrinsic::amdgcn_atomic_fadd:
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
case Intrinsic::amdgcn_atomic_fmin:
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
case Intrinsic::amdgcn_atomic_fmax:
|
case Intrinsic::amdgcn_ds_fmax:
|
||||||
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
|
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
|
||||||
PostorderStack, Visited);
|
PostorderStack, Visited);
|
||||||
break;
|
break;
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||||
|
|
||||||
declare float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* nocapture, float, i32, i32, i1)
|
declare float @llvm.amdgcn.ds.fadd(float addrspace(3)* nocapture, float, i32, i32, i1)
|
||||||
declare float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* nocapture, float, i32, i32, i1)
|
declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32, i32, i1)
|
||||||
declare float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* nocapture, float, i32, i32, i1)
|
declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32, i32, i1)
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}lds_atomic_fadd_f32:
|
; GCN-LABEL: {{^}}lds_ds_fadd:
|
||||||
; VI-DAG: s_mov_b32 m0
|
; VI-DAG: s_mov_b32 m0
|
||||||
; GFX9-NOT: m0
|
; GFX9-NOT: m0
|
||||||
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||||
@ -13,20 +13,20 @@ declare float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* nocapture, float,
|
|||||||
; GCN: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
; GCN: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||||
; GCN: s_waitcnt lgkmcnt(1)
|
; GCN: s_waitcnt lgkmcnt(1)
|
||||||
; GCN: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
; GCN: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||||
define amdgpu_kernel void @lds_atomic_fadd_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||||
%idx.add = add nuw i32 %idx, 4
|
%idx.add = add nuw i32 %idx, 4
|
||||||
%shl0 = shl i32 %idx.add, 3
|
%shl0 = shl i32 %idx.add, 3
|
||||||
%shl1 = shl i32 %idx.add, 4
|
%shl1 = shl i32 %idx.add, 4
|
||||||
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
||||||
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
||||||
%a1 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a1 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a2 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a2 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a3 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
%a3 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
||||||
store float %a3, float addrspace(1)* %out
|
store float %a3, float addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}lds_atomic_fmin_f32:
|
; GCN-LABEL: {{^}}lds_ds_fmin:
|
||||||
; VI-DAG: s_mov_b32 m0
|
; VI-DAG: s_mov_b32 m0
|
||||||
; GFX9-NOT: m0
|
; GFX9-NOT: m0
|
||||||
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||||
@ -34,20 +34,20 @@ define amdgpu_kernel void @lds_atomic_fadd_f32(float addrspace(1)* %out, float a
|
|||||||
; GCN: ds_min_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
; GCN: ds_min_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||||
; GCN: s_waitcnt lgkmcnt(1)
|
; GCN: s_waitcnt lgkmcnt(1)
|
||||||
; GCN: ds_min_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
; GCN: ds_min_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||||
define amdgpu_kernel void @lds_atomic_fmin_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
define amdgpu_kernel void @lds_ds_fmin(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||||
%idx.add = add nuw i32 %idx, 4
|
%idx.add = add nuw i32 %idx, 4
|
||||||
%shl0 = shl i32 %idx.add, 3
|
%shl0 = shl i32 %idx.add, 3
|
||||||
%shl1 = shl i32 %idx.add, 4
|
%shl1 = shl i32 %idx.add, 4
|
||||||
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
||||||
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
||||||
%a1 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a1 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a2 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a2 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a3 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
%a3 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
||||||
store float %a3, float addrspace(1)* %out
|
store float %a3, float addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}lds_atomic_fmax_f32:
|
; GCN-LABEL: {{^}}lds_ds_fmax:
|
||||||
; VI-DAG: s_mov_b32 m0
|
; VI-DAG: s_mov_b32 m0
|
||||||
; GFX9-NOT: m0
|
; GFX9-NOT: m0
|
||||||
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||||
@ -55,15 +55,15 @@ define amdgpu_kernel void @lds_atomic_fmin_f32(float addrspace(1)* %out, float a
|
|||||||
; GCN: ds_max_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
; GCN: ds_max_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||||
; GCN: s_waitcnt lgkmcnt(1)
|
; GCN: s_waitcnt lgkmcnt(1)
|
||||||
; GCN: ds_max_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
; GCN: ds_max_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||||
define amdgpu_kernel void @lds_atomic_fmax_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
define amdgpu_kernel void @lds_ds_fmax(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||||
%idx.add = add nuw i32 %idx, 4
|
%idx.add = add nuw i32 %idx, 4
|
||||||
%shl0 = shl i32 %idx.add, 3
|
%shl0 = shl i32 %idx.add, 3
|
||||||
%shl1 = shl i32 %idx.add, 4
|
%shl1 = shl i32 %idx.add, 4
|
||||||
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
||||||
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
||||||
%a1 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a1 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a2 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
%a2 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false)
|
||||||
%a3 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
%a3 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false)
|
||||||
store float %a3, float addrspace(1)* %out
|
store float %a3, float addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user