mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Update tests to use modern buffer intrinsics
This commit is contained in:
parent
f9eef6e26a
commit
104f9f96d4
@ -77,13 +77,13 @@ entry:
|
||||
%e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v1a, i32 0, i32 %idx
|
||||
%x = load i32, i32 addrspace(5)* %e
|
||||
%xf = bitcast i32 %x to float
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %xf, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %xf, <4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
|
||||
|
||||
|
||||
; Check we have CS_NUM_USED_VGPRS in PAL metadata.
|
||||
|
@ -49,12 +49,12 @@ else:
|
||||
|
||||
end:
|
||||
%r = phi float [ %v.if, %if ], [ %v.else, %else ]
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %r, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %r, <4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind writeonly }
|
||||
|
@ -29,7 +29,7 @@ for.body:
|
||||
br i1 %cc, label %mid.loop, label %for.end
|
||||
|
||||
mid.loop:
|
||||
%v = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %tid, i32 %i, i1 false, i1 false)
|
||||
%v = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %tid, i32 %i, i32 0, i32 0)
|
||||
%cc2 = fcmp oge float %v, 0.0
|
||||
br i1 %cc2, label %end.loop, label %for.end
|
||||
|
||||
@ -48,7 +48,7 @@ end:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
|
||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
||||
|
@ -9,8 +9,8 @@
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
|
||||
ret float %out.0
|
||||
@ -25,8 +25,8 @@ main_body:
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = bitcast float %out to i32
|
||||
%out.1 = call i32 @llvm.amdgcn.softwqm.i32(i32 %out.0)
|
||||
@ -45,10 +45,10 @@ main_body:
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%temp = fadd float %src0, %src1
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %temp, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %temp, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %temp, %temp
|
||||
%out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
|
||||
ret float %out.0
|
||||
@ -67,11 +67,11 @@ main_body:
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test_softwqm2(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%temp = fadd float %src0, %src1
|
||||
%temp.0 = call float @llvm.amdgcn.wqm.f32(float %temp)
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %temp.0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %temp.0, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %temp, %temp
|
||||
%out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
|
||||
ret float %out.0
|
||||
@ -89,9 +89,9 @@ main_body:
|
||||
;CHECK-NOT: s_wqm_b64
|
||||
define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%temp = fadd float %src0, %src1
|
||||
%temp.0 = call float @llvm.amdgcn.wwm.f32(float %temp)
|
||||
%out = fadd float %temp.0, %temp.0
|
||||
@ -115,14 +115,14 @@ main_body:
|
||||
br i1 %cmp, label %IF, label %ELSE
|
||||
|
||||
IF:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
|
||||
br label %END
|
||||
|
||||
ELSE:
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
|
||||
br label %END
|
||||
|
||||
END:
|
||||
@ -157,14 +157,14 @@ main_body:
|
||||
br i1 %cmp, label %IF, label %ELSE
|
||||
|
||||
IF:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
|
||||
br label %END
|
||||
|
||||
ELSE:
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.sample, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data.sample, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
|
||||
br label %END
|
||||
|
||||
END:
|
||||
@ -172,9 +172,9 @@ END:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
|
||||
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
|
||||
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare void @llvm.amdgcn.kill(i1) #1
|
||||
|
@ -22,7 +22,7 @@ loop:
|
||||
br i1 %tmp27, label %then, label %endif
|
||||
|
||||
then: ; preds = %bb
|
||||
call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
|
||||
br label %endif
|
||||
|
||||
endif: ; preds = %bb28, %bb
|
||||
@ -49,7 +49,7 @@ loop:
|
||||
%tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
|
||||
%tmp23 = add nuw i32 %tmp23phi, 1
|
||||
%tmp27 = icmp ult i32 %arg, %tmp23
|
||||
call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
|
||||
br i1 %tmp27, label %loop, label %loopexit
|
||||
|
||||
loopexit:
|
||||
@ -76,7 +76,7 @@ loop:
|
||||
br i1 %tmp27, label %then, label %endif
|
||||
|
||||
then: ; preds = %bb
|
||||
call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
|
||||
br label %endif
|
||||
|
||||
endif: ; preds = %bb28, %bb
|
||||
@ -86,8 +86,6 @@ loopexit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3
|
||||
|
||||
attributes #3 = { nounwind writeonly }
|
||||
|
||||
attributes #0 = { nounwind writeonly }
|
||||
|
@ -42,7 +42,7 @@
|
||||
; W32: v_mov_b32_e32 v0, [[RES]]
|
||||
|
||||
define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
||||
%call = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
|
||||
%call = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
|
||||
ret float %call
|
||||
}
|
||||
|
||||
@ -128,8 +128,8 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
|
||||
|
||||
define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %out0, float addrspace(1)* %out1) #0 {
|
||||
entry:
|
||||
%val0 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
|
||||
%val1 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
|
||||
%val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
|
||||
%val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) #1
|
||||
store volatile float %val0, float addrspace(1)* %out0
|
||||
store volatile float %val1, float addrspace(1)* %out1
|
||||
ret void
|
||||
@ -317,13 +317,13 @@ entry:
|
||||
define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %in, float addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
|
||||
%val0 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i1 zeroext false, i1 zeroext false) #1
|
||||
%val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%cmp = icmp eq i32 %idx, 0
|
||||
br i1 %cmp, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%val1 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i1 zeroext false, i1 zeroext false) #1
|
||||
%val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
@ -333,7 +333,7 @@ bb2:
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
|
||||
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -9,7 +9,7 @@ define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
|
||||
%tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i32 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
@ -19,7 +19,7 @@ define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i)
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
|
||||
%tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0, i32 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i)
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
|
||||
%tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
@ -39,10 +39,12 @@ define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
|
||||
%tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i32 0, i32 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
|
||||
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
|
||||
declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #1
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -8,7 +8,7 @@
|
||||
; CHECK: s_cbranch_vccnz
|
||||
define amdgpu_ps float @main(<4 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%v = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 true, i1 false)
|
||||
%v = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 1)
|
||||
%cc = fcmp une float %v, 1.000000e+00
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
@ -22,7 +22,6 @@ else:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
|
||||
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
||||
|
@ -39,9 +39,10 @@ entry:
|
||||
|
||||
bb0:
|
||||
%desc = load <4 x i32>, <4 x i32>* %arg, align 8
|
||||
tail call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> %desc, i32 0, i32 undef, i1 zeroext false, i1 zeroext false)
|
||||
tail call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> %desc, i32 undef, i32 0, i32 0)
|
||||
br label %bb0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind writeonly
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1 immarg, i1 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind writeonly }
|
||||
|
@ -38,7 +38,7 @@ bb:
|
||||
%tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
|
||||
%tmp17 = add i32 %arg5, %arg7
|
||||
%tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i32 0, i32 0)
|
||||
%tmp19 = extractelement <4 x float> %tmp18, i32 0
|
||||
%tmp20 = extractelement <4 x float> %tmp18, i32 1
|
||||
%tmp21 = extractelement <4 x float> %tmp18, i32 2
|
||||
@ -489,7 +489,7 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -20,7 +20,7 @@ main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0
|
||||
%tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32>
|
||||
%tmp11 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i1 false, i1 false)
|
||||
%tmp11 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i32 0, i32 0)
|
||||
%tmp12 = extractelement <4 x float> %tmp11, i32 0
|
||||
%tmp13 = extractelement <4 x float> %tmp11, i32 1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
@ -29,7 +29,7 @@ main_body:
|
||||
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1
|
||||
%tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0
|
||||
%tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32>
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i1 false, i1 false)
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i32 0, i32 0)
|
||||
%tmp19 = extractelement <4 x float> %tmp18, i32 0
|
||||
%tmp20 = extractelement <4 x float> %tmp18, i32 1
|
||||
%tmp21 = extractelement <4 x float> %tmp18, i32 2
|
||||
@ -56,7 +56,7 @@ main_body:
|
||||
%tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = add i32 %arg5, %arg7
|
||||
%tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32>
|
||||
%tmp13 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i1 false, i1 false)
|
||||
%tmp13 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i32 0, i32 0)
|
||||
%tmp14 = extractelement <4 x float> %tmp13, i32 0
|
||||
%tmp15 = extractelement <4 x float> %tmp13, i32 1
|
||||
%tmp16 = extractelement <4 x float> %tmp13, i32 2
|
||||
@ -65,7 +65,7 @@ main_body:
|
||||
%tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0
|
||||
%tmp20 = add i32 %arg5, %arg7
|
||||
%tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32>
|
||||
%tmp21 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i1 false, i1 false)
|
||||
%tmp21 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i32 0, i32 0)
|
||||
%tmp22 = extractelement <4 x float> %tmp21, i32 0
|
||||
%tmp23 = extractelement <4 x float> %tmp21, i32 1
|
||||
%tmp24 = extractelement <4 x float> %tmp21, i32 2
|
||||
@ -76,7 +76,7 @@ main_body:
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -721,7 +721,7 @@ main_body:
|
||||
br i1 %cc, label %endif, label %if
|
||||
|
||||
if:
|
||||
%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
%src = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src, %src
|
||||
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
|
||||
%out.1 = fadd float %src, %out.0
|
||||
@ -758,8 +758,8 @@ main_body:
|
||||
; GFX1064: s_and_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = bitcast float %out to i32
|
||||
%out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
|
||||
@ -1108,7 +1108,7 @@ declare float @llvm.amdgcn.wwm.f32(float)
|
||||
declare i32 @llvm.amdgcn.wqm.i32(i32)
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32)
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1)
|
||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32)
|
||||
declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32)
|
||||
|
@ -32,7 +32,7 @@ main_body:
|
||||
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
|
||||
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
|
||||
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
@ -49,11 +49,11 @@ main_body:
|
||||
;CHECK: .size test3
|
||||
define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %c) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
|
||||
%tex.2 = extractelement <4 x i32> %tex.1, i32 0
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %tex, <4 x i32> undef, i32 %tex.2, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %tex, <4 x i32> undef, i32 %tex.2, i32 0, i32 0, i32 0)
|
||||
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
@ -77,7 +77,7 @@ main_body:
|
||||
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
|
||||
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
|
||||
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex.0 = extractelement <4 x float> %tex, i32 0
|
||||
%tex.1 = extractelement <4 x float> %tex, i32 1
|
||||
%tex.2 = extractelement <4 x float> %tex, i32 2
|
||||
@ -102,11 +102,11 @@ define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp
|
||||
main_body:
|
||||
%c.1 = mul i32 %c, %d
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i32 0, i32 0)
|
||||
%c.1.bc = bitcast i32 %c.1 to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -122,8 +122,8 @@ main_body:
|
||||
;CHECK-NOT: v_mov_b32_e32
|
||||
define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = call float @llvm.amdgcn.wqm.f32(float %out)
|
||||
ret float %out.0
|
||||
@ -138,8 +138,8 @@ main_body:
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = bitcast float %out to i32
|
||||
%out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
|
||||
@ -156,8 +156,8 @@ main_body:
|
||||
;CHECK: v_add_f32_e32
|
||||
define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src0, %src1
|
||||
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
|
||||
ret float %out.0
|
||||
@ -172,8 +172,8 @@ main_body:
|
||||
;CHECK: v_add_{{[iu]}}32_e32
|
||||
define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%src0.0 = bitcast float %src0 to i32
|
||||
%src1.0 = bitcast float %src1 to i32
|
||||
%out = add i32 %src0.0, %src1.0
|
||||
@ -201,7 +201,7 @@ main_body:
|
||||
br i1 %cc, label %endif, label %if
|
||||
|
||||
if:
|
||||
%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
%src = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src, %src
|
||||
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
|
||||
%out.1 = fadd float %src, %out.0
|
||||
@ -230,7 +230,7 @@ main_body:
|
||||
br i1 %cc, label %endif, label %if
|
||||
|
||||
if:
|
||||
%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
%src = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
%out = fadd float %src, %src
|
||||
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
|
||||
br label %endif
|
||||
@ -252,9 +252,9 @@ endif:
|
||||
;CHECK: s_wqm_b64 exec, exec
|
||||
define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%temp = fadd float %src1, %src1
|
||||
%temp.0 = call float @llvm.amdgcn.wwm.f32(float %temp)
|
||||
%out = fadd float %temp.0, %temp.0
|
||||
@ -341,13 +341,13 @@ endloop:
|
||||
;CHECK: v_add_{{[iu]}}32_e32
|
||||
define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
|
||||
main_body:
|
||||
%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
%src = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
%src.0 = bitcast float %src to i32
|
||||
%src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0)
|
||||
%out = add i32 %src.1, %src.1
|
||||
%out.0 = call i32 @llvm.amdgcn.wwm.i32(i32 %out)
|
||||
%out.1 = bitcast i32 %out.0 to float
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %out.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %out.1, <4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -359,15 +359,15 @@ main_body:
|
||||
;CHECK: buffer_load_dword
|
||||
define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
%src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
%src1.0 = bitcast float %src1 to i32
|
||||
%src1.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src1.0, i32 undef)
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
%src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
|
||||
%src0.0 = bitcast float %src0 to i32
|
||||
%src0.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %src0.0)
|
||||
%out = add i32 %src0.1, %src1.1
|
||||
%out.0 = bitcast i32 %out to float
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %out.0, <4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %out.0, <4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -396,14 +396,14 @@ main_body:
|
||||
|
||||
IF:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%data.if = extractelement <4 x float> %dtex, i32 0
|
||||
br label %END
|
||||
|
||||
ELSE:
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
|
||||
br label %END
|
||||
|
||||
END:
|
||||
@ -439,14 +439,14 @@ main_body:
|
||||
|
||||
IF:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%data.if = extractelement <4 x float> %dtex, i32 0
|
||||
br label %END
|
||||
|
||||
ELSE:
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
|
||||
br label %END
|
||||
|
||||
END:
|
||||
@ -472,16 +472,16 @@ define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i3
|
||||
main_body:
|
||||
%idx.1 = extractelement <3 x i32> %idx, i32 0
|
||||
%data.1 = extractelement <2 x float> %data, i32 0
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i32 0, i32 0)
|
||||
|
||||
; The load that determines the branch (and should therefore be WQM) is
|
||||
; surrounded by stores that require disabled WQM.
|
||||
%idx.2 = extractelement <3 x i32> %idx, i32 1
|
||||
%z = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i1 0, i1 0)
|
||||
%z = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx.2, i32 0, i32 0, i32 0)
|
||||
|
||||
%idx.3 = extractelement <3 x i32> %idx, i32 2
|
||||
%data.3 = extractelement <2 x float> %data, i32 1
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data.3, <4 x i32> undef, i32 %idx.3, i32 0, i32 0, i32 0)
|
||||
|
||||
%cc = fcmp ogt float %z, 0.0
|
||||
br i1 %cc, label %IF, label %ELSE
|
||||
@ -497,7 +497,7 @@ ELSE:
|
||||
END:
|
||||
%coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ]
|
||||
%coord.END.bc = bitcast i32 %coord.END to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
@ -514,11 +514,11 @@ END:
|
||||
;CHECK-DAG: store
|
||||
define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%dtex.1 = extractelement <4 x float> %dtex, i32 0
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
|
||||
%cc = fcmp ogt float %dtex.1, 0.0
|
||||
br i1 %cc, label %IF, label %ELSE
|
||||
@ -556,14 +556,14 @@ main_body:
|
||||
br i1 %cond, label %IF, label %END
|
||||
|
||||
IF:
|
||||
%data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
|
||||
%data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i32 0, i32 0)
|
||||
br label %END
|
||||
|
||||
END:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -584,20 +584,20 @@ END:
|
||||
;CHECK: image_sample
|
||||
define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%idx.0 = extractelement <2 x i32> %idx, i32 0
|
||||
%data.0 = extractelement <2 x float> %data, i32 0
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i32 0, i32 0)
|
||||
|
||||
%z.cmp = fcmp olt float %z, 0.0
|
||||
call void @llvm.amdgcn.kill(i1 %z.cmp)
|
||||
|
||||
%idx.1 = extractelement <2 x i32> %idx, i32 1
|
||||
%data.1 = extractelement <2 x float> %data, i32 1
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
|
||||
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i32 0, i32 0)
|
||||
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex2.0 = extractelement <4 x float> %tex2, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%out = fadd <4 x float> %tex, %dtex
|
||||
|
||||
ret <4 x float> %out
|
||||
@ -617,11 +617,11 @@ main_body:
|
||||
; CHECK: v_cmpx_
|
||||
define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
|
||||
%z.cmp = fcmp olt float %z, 0.0
|
||||
call void @llvm.amdgcn.kill(i1 %z.cmp)
|
||||
@ -675,7 +675,7 @@ loop:
|
||||
|
||||
body:
|
||||
%c.iv0 = extractelement <4 x float> %c.iv, i32 0
|
||||
%c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
%ctr.next = fadd float %ctr.iv, 2.0
|
||||
br label %loop
|
||||
|
||||
@ -706,18 +706,18 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
|
||||
entry:
|
||||
%array = alloca [32 x i32], align 4, addrspace(5)
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
|
||||
%s.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 0
|
||||
store volatile i32 %a, i32 addrspace(5)* %s.gep, align 4
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i32 0, i32 0)
|
||||
|
||||
%c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
|
||||
%c = load i32, i32 addrspace(5)* %c.gep, align 4
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i32 0)
|
||||
|
||||
ret void
|
||||
}
|
||||
@ -734,9 +734,9 @@ entry:
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -748,9 +748,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
|
||||
entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
%cc = icmp sgt i32 %c, 0
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
@ -782,16 +782,16 @@ main_body:
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
if:
|
||||
%r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
br label %end
|
||||
|
||||
else:
|
||||
%r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]
|
||||
call void @llvm.amdgcn.buffer.store.f32(float 1.0, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float 1.0, <4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
@ -806,9 +806,9 @@ end:
|
||||
define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
|
||||
main_body:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
|
||||
%cmp = icmp eq i32 %z, 0
|
||||
br i1 %cmp, label %IF, label %ENDIF
|
||||
|
||||
@ -828,10 +828,15 @@ ENDIF:
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
|
||||
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
|
||||
|
||||
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) #2
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #2
|
||||
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #3
|
||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #3
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare void @llvm.amdgcn.kill(i1) #1
|
||||
|
Loading…
Reference in New Issue
Block a user