mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
7622b45921
Summary: pickNodeBidirectional tried to compare the best top candidate and the best bottom candidate by examining TopCand.Reason and BotCand.Reason. This is unsound because, after calling pickNodeFromQueue, Cand.Reason does not reflect the most important reason why Cand was chosen. Rather it reflects the most recent reason why it beat some other potential candidate, which could have been for some low priority tie breaker reason. I have seen this cause problems where TopCand is a good candidate, but because TopCand.Reason is ORDER (which is very low priority) it is repeatedly ignored in favour of a mediocre BotCand. This is not how bidirectional scheduling is supposed to work. To fix this I changed the code to always compare TopCand and BotCand directly, like the generic implementation of pickNodeBidirectional does. This removes some uncommented AMDGPU-specific logic; if this logic turns out to be important then perhaps it could be moved into an override of tryCandidate instead. Graphics shader benchmarking on gfx10 shows a lot more positive than negative effects from this change. Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68338
2393 lines
123 KiB
LLVM
2393 lines
123 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[16:17], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[16:17]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, s12
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, s13
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v[6:7], v4, off
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0f,0x81,0xf0,0x05,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, i32 addrspace(1)* %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x81,0xf0,0x02,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 0
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x02,0x81,0xf0,0x02,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x04,0x81,0xf0,0x02,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 2
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x08,0x81,0xf0,0x02,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x03,0x81,0xf0,0x03,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0a,0x81,0xf0,0x03,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0d,0x81,0xf0,0x04,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 2
|
|
%res.f3 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
|
|
%res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_lwe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[16:17], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[16:17]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_lwe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, s12
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, s13
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v[6:7], v4, off
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_lwe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x0f,0x82,0xf0,0x05,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, i32 addrspace(1)* %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
|
|
; VERDE-LABEL: sample_3d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_3d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_3d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
|
|
; VERDE-LABEL: sample_cube:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cube:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cube:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
|
|
; VERDE-LABEL: sample_1darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1darray:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_2darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_2darray:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
|
|
; VERDE-LABEL: sample_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
|
|
; VERDE-LABEL: sample_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_c_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_cd_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cd_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_cd_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cd_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_c_cd_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cd_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_cd_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cd_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_cd_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cd_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_cd_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cd_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cd_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cd_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cd_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cd_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_l_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_l_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_l_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_l_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_lz_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_lz_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_lz_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_lz_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, s12
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, s13
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v[0:1], v10, off
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s12 ; encoding: [0x0c,0x02,0x04,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, s13 ; encoding: [0x0d,0x02,0x06,0x7e]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00]
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {float, i32} %v, 0
|
|
%v.err = extractvalue {float, i32} %v, 1
|
|
store i32 %v.err, i32 addrspace(1)* %out, align 4
|
|
ret float %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v11, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v10
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v11
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v11, v9
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v10
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v11
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
|
|
; GFX10-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<2 x float>, i32} %v, 0
|
|
%v.f1 = extractelement <2 x float> %v.vec, i32 0
|
|
%v.f2 = extractelement <2 x float> %v.vec, i32 1
|
|
%v.err = extractvalue {<2 x float>, i32} %v, 1
|
|
%v.errf = bitcast i32 %v.err to float
|
|
%res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
|
|
%res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
|
|
%res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
|
|
ret <4 x float> %res.2
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_unorm:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_unorm:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_unorm:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_glc:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x2f,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_slc:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x0f,0x80,0xf2,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_glc_slc:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; encoding: [0x00,0x2f,0x80,0xf2,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_0:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_0:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_0:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%elt0 = extractelement <4 x float> %r, i32 0
|
|
ret float %elt0
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_01:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_01:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_01:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_012:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_012:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_012:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_12:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_03:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_03:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_03:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_13:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_123:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0e,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_none_enabled:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123_to_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_013_to_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
|
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
|
|
; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone }
|