1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
Jay Foad 7622b45921 [AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.

I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.

To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.

Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.

Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB

Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68338
2020-02-28 21:35:34 +00:00

2393 lines
123 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
; VERDE-LABEL: sample_1d_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[16:17], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v5, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: s_mov_b32 s15, 0xf000
; VERDE-NEXT: s_mov_b32 s14, -1
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v3, v0
; VERDE-NEXT: v_mov_b32_e32 v4, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[16:17]
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v6, s12
; GFX6789-NEXT: v_mov_b32_e32 v7, s13
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: global_store_dword v[6:7], v4, off
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0f,0x81,0xf0,0x05,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
ret <4 x float> %v.vec
}
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_1:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x81,0xf0,0x02,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f = extractelement <4 x float> %res.vec, i32 0
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
ret <2 x float> %res
}
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_2:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x02,0x81,0xf0,0x02,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f = extractelement <4 x float> %res.vec, i32 1
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
ret <2 x float> %res
}
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_3:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x04,0x81,0xf0,0x02,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f = extractelement <4 x float> %res.vec, i32 2
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
ret <2 x float> %res
}
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_4:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x08,0x81,0xf0,0x02,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f = extractelement <4 x float> %res.vec, i32 3
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
ret <2 x float> %res
}
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v3, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x03,0x81,0xf0,0x03,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f1 = extractelement <4 x float> %res.vec, i32 0
%res.f2 = extractelement <4 x float> %res.vec, i32 1
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
ret <4 x float> %res
}
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v3, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0a,0x81,0xf0,0x03,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f1 = extractelement <4 x float> %res.vec, i32 1
%res.f2 = extractelement <4 x float> %res.vec, i32 3
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
ret <4 x float> %res
}
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v4, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v3, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_134:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0d,0x81,0xf0,0x04,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%res.vec = extractvalue {<4 x float>,i32} %v, 0
%res.f1 = extractelement <4 x float> %res.vec, i32 0
%res.f2 = extractelement <4 x float> %res.vec, i32 2
%res.f3 = extractelement <4 x float> %res.vec, i32 3
%res.err = extractvalue {<4 x float>,i32} %v, 1
%res.errf = bitcast i32 %res.err to float
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
%res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
%res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
ret <4 x float> %res
}
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
; VERDE-LABEL: sample_1d_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[16:17], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: v_mov_b32_e32 v5, v0
; VERDE-NEXT: v_mov_b32_e32 v0, 0
; VERDE-NEXT: s_mov_b32 s15, 0xf000
; VERDE-NEXT: s_mov_b32 s14, -1
; VERDE-NEXT: v_mov_b32_e32 v1, v0
; VERDE-NEXT: v_mov_b32_e32 v2, v0
; VERDE-NEXT: v_mov_b32_e32 v3, v0
; VERDE-NEXT: v_mov_b32_e32 v4, v0
; VERDE-NEXT: s_and_b64 exec, exec, s[16:17]
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_lwe:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
; GFX6789-NEXT: v_mov_b32_e32 v6, s12
; GFX6789-NEXT: v_mov_b32_e32 v7, s13
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: global_store_dword v[6:7], v4, off
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_lwe:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x0f,0x82,0xf0,0x05,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
ret <4 x float> %v.vec
}
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; VERDE-LABEL: sample_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
; VERDE-LABEL: sample_3d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_3d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_3d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
; VERDE-LABEL: sample_cube:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cube:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cube:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
; VERDE-LABEL: sample_1darray:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1darray:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1darray:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
; VERDE-LABEL: sample_2darray:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_2darray:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_2darray:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; VERDE-LABEL: sample_c_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; VERDE-LABEL: sample_c_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
; VERDE-LABEL: sample_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
; VERDE-LABEL: sample_c_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_c_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
; VERDE-LABEL: sample_b_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_b_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
; VERDE-LABEL: sample_b_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_b_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
; VERDE-LABEL: sample_c_b_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_b_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
; VERDE-LABEL: sample_c_b_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_b_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
; VERDE-LABEL: sample_b_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_b_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_b_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_b_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
; VERDE-LABEL: sample_c_b_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_b_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_c_b_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_b_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
; VERDE-LABEL: sample_d_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_d_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
; VERDE-LABEL: sample_c_d_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_c_d_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
; VERDE-LABEL: sample_d_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_d_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
; VERDE-LABEL: sample_c_d_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_c_d_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
; VERDE-LABEL: sample_cd_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_cd_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
; VERDE-LABEL: sample_c_cd_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cd_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cd_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_c_cd_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cd_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cd_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
; VERDE-LABEL: sample_cd_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_cd_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
; VERDE-LABEL: sample_c_cd_cl_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cd_cl_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cd_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_c_cd_cl_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cd_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cd_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
; VERDE-LABEL: sample_l_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_l_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_l_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
; VERDE-LABEL: sample_l_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_l_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
; VERDE-LABEL: sample_c_l_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_l_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_l_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
; VERDE-LABEL: sample_c_l_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_l_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_lz_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_lz_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_lz_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; VERDE-LABEL: sample_lz_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_lz_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; VERDE-LABEL: sample_c_lz_1d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_lz_1d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_lz_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; VERDE-LABEL: sample_c_lz_2d:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_lz_2d:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
; VERDE-LABEL: sample_c_d_o_2darray_V1:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_o_2darray_V1:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V1:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret float %v
}
define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v9, 0
; VERDE-NEXT: v_mov_b32_e32 v10, v9
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
; VERDE-NEXT: s_mov_b32 s15, 0xf000
; VERDE-NEXT: s_mov_b32 s14, -1
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: v_mov_b32_e32 v0, v9
; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
; GFX6789-NEXT: v_mov_b32_e32 v0, s12
; GFX6789-NEXT: v_mov_b32_e32 v1, s13
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: global_store_dword v[0:1], v10, off
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08]
; GFX10-NEXT: v_mov_b32_e32 v2, s12 ; encoding: [0x0c,0x02,0x04,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v3, s13 ; encoding: [0x0d,0x02,0x06,0x7e]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%v.vec = extractvalue {float, i32} %v, 0
%v.err = extractvalue {float, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
ret float %v.vec
}
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
; VERDE-LABEL: sample_c_d_o_2darray_V2:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_o_2darray_V2:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V2:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v9, 0
; VERDE-NEXT: v_mov_b32_e32 v10, v9
; VERDE-NEXT: v_mov_b32_e32 v11, v9
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: v_mov_b32_e32 v0, v9
; VERDE-NEXT: v_mov_b32_e32 v1, v10
; VERDE-NEXT: v_mov_b32_e32 v2, v11
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
; GFX6789-NEXT: v_mov_b32_e32 v11, v9
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
; GFX6789-NEXT: v_mov_b32_e32 v1, v10
; GFX6789-NEXT: v_mov_b32_e32 v2, v11
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
; GFX10-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%v.vec = extractvalue {<2 x float>, i32} %v, 0
%v.f1 = extractelement <2 x float> %v.vec, i32 0
%v.f2 = extractelement <2 x float> %v.vec, i32 1
%v.err = extractvalue {<2 x float>, i32} %v, 1
%v.errf = bitcast i32 %v.err to float
%res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
%res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
%res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
ret <4 x float> %res.2
}
define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_unorm:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_unorm:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_unorm:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_glc:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_glc:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_glc:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x2f,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_slc:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_slc:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_slc:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x0f,0x80,0xf2,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: sample_1d_glc_slc:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_1d_glc_slc:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d_glc_slc:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; encoding: [0x00,0x2f,0x80,0xf2,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
ret <4 x float> %v
}
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_0:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_0:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_0:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%elt0 = extractelement <4 x float> %r, i32 0
ret float %elt0
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_01:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_01:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_01:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_012:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_012:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_012:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
ret <3 x float> %out
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_12:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_12:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_12:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_03:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_03:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_03:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
ret <2 x float> %out
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_13:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_13:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_13:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
ret <2 x float> %out
}
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_123:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_123:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_123:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0e,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
ret <3 x float> %out
}
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_none_enabled:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_none_enabled:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %r
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_123_to_12:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_123_to_12:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; VERDE-LABEL: adjust_writemask_sample_013_to_13:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: s_mov_b64 s[12:13], exec
; VERDE-NEXT: s_wqm_b64 exec, exec
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
; GFX6789: ; %bb.0: ; %main_body
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
; GFX6789-NEXT: s_wqm_b64 exec, exec
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: adjust_writemask_sample_013_to_13:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }