mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
d1f23a1772
Support for XNACK and SRAMECC is not static on some GPUs. We must be able to differentiate between different scenarios for these dynamic subtarget features. The possible settings are: - Unsupported: The GPU has no support for XNACK/SRAMECC. - Any: Preference is unspecified. Use conservative settings that can run anywhere. - Off: Request support for XNACK/SRAMECC Off - On: Request support for XNACK/SRAMECC On GCNSubtarget will track the four options based on the following criteria. If the subtarget does not support XNACK/SRAMECC we say the setting is "Unsupported". If no subtarget features for XNACK/SRAMECC are requested we must support "Any" mode. If the subtarget features XNACK/SRAMECC exist in the feature string when initializing the subtarget, the settings are "On/Off". The defaults are updated to be conservatively correct, meaning if no setting for XNACK or SRAMECC is explicitly requested, defaults will be used which generate code that can be run anywhere. This corresponds to the "Any" setting. Differential Revision: https://reviews.llvm.org/D85882
7324 lines
326 KiB
LLVM
7324 lines
326 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s
|
|
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s
|
|
|
|
define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_short v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
|
|
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
|
|
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
|
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
|
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
|
|
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: MOV T0.Y, 0.0,
|
|
; EG-NEXT: MOV * T0.Z, 0.0,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load i16, i16 addrspace(4)* %in
|
|
store i16 %ld, i16 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v2i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dword s4, s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v2i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v2i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_load_dword s0, s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v2i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
|
store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v3i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v3i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 4
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2
|
|
; GCN-HSA-NEXT: flat_store_short v[2:3], v4
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v5
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v3i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s2
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v3i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 2 @6
|
|
; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
|
|
; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
|
|
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
|
|
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
|
|
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
|
|
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
|
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
|
; EG-NEXT: LSHL T5.X, T2.W, PV.W,
|
|
; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: MOV T5.Y, 0.0,
|
|
; EG-NEXT: MOV * T5.Z, 0.0,
|
|
; EG-NEXT: LSHR T8.X, T0.W, literal.x,
|
|
; EG-NEXT: LSHL T0.W, T7.X, literal.y,
|
|
; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: OR_INT T6.X, PV.W, PS,
|
|
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
|
store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v4i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v4i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v4i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v4i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
|
store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v8i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v8i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v8i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v8i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
|
store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v16i16:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v16i16:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
|
|
; GCN-HSA-NEXT: s_add_u32 s10, s8, 16
|
|
; GCN-HSA-NEXT: s_addc_u32 s11, s9, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s11
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v16i16:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s1
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v16i16:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
|
|
; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @10
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1
|
|
; EG-NEXT: Fetch clause starting at 10:
|
|
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR * T2.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 17:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
|
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8)
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11
|
|
; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10
|
|
; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_load_v16i16_align2:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
|
|
; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v1
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v18, v0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v2
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v17, v4
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v3
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v16, v5
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v7, v6
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v9, v8
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v11, 16, v11
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v11, v10
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v12
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
|
|
; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v15, v14
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_load_v16i16_align2:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 1, @13, KC0[], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
|
|
; EG-NEXT: TEX 0 @10
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1
|
|
; EG-NEXT: Fetch clause starting at 10:
|
|
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Y,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: MOV * T2.X, literal.x,
|
|
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
|
|
store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_i16_to_i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_i16_to_i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%a = load i16, i16 addrspace(4)* %in
|
|
%ext = zext i16 %a to i32
|
|
store i32 %ext, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_i16_to_i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_i16_to_i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
%a = load i16, i16 addrspace(4)* %in
|
|
%ext = sext i16 %a to i32
|
|
store i32 %ext, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v1i16_to_v1i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
|
%ext = zext <1 x i16> %load to <1 x i32>
|
|
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v1i16_to_v1i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
|
%ext = sext <1 x i16> %load to <1 x i32>
|
|
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v2i16_to_v2i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T4.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T4.Y, T4.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
|
|
; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
|
%ext = zext <2 x i16> %load to <2 x i32>
|
|
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; TODO: We should use ASHR instead of LSHR + BFE
|
|
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s4, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s1, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s2, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s2
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v2i16_to_v2i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T4.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T0.W, T4.X, literal.x,
|
|
; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
|
%ext = sext <2 x i16> %load to <2 x i32>
|
|
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s6
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s0, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s1, s1, s6
|
|
; GCN-HSA-NEXT: s_and_b32 s0, s0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v3i16_to_v3i32:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 2 @6
|
|
; EG-NEXT: ALU 2, @17, KC0[], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1
|
|
; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1
|
|
; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: MOV * T1.X, KC0[2].Z,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 17:
|
|
; EG-NEXT: LSHR T4.X, T0.W, literal.x,
|
|
; EG-NEXT: MOV * T3.Y, T1.X,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
|
%ext = zext <3 x i16> %ld to <3 x i32>
|
|
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32:
|
|
; GCN-NOHSA-SI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32:
|
|
; GCN-HSA: ; %bb.0: ; %entry
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s2, s0, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s1, s1
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32:
|
|
; GCN-NOHSA-VI: ; %bb.0: ; %entry
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v3i16_to_v3i32:
|
|
; EG: ; %bb.0: ; %entry
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 2 @6
|
|
; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1
|
|
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44)
|
|
; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
entry:
|
|
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
|
%ext = sext <3 x i16> %ld to <3 x i32>
|
|
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; v4i16 is naturally 8 byte aligned
|
|
; TODO: This should use LD, but for some there are redundant MOVs
|
|
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s1, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s3, s0, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s1, s1, s6
|
|
; GCN-HSA-NEXT: s_and_b32 s0, s0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v4i16_to_v4i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV T2.X, T5.X,
|
|
; EG-NEXT: MOV * T3.X, T5.Y,
|
|
; EG-NEXT: MOV T0.Y, PV.X,
|
|
; EG-NEXT: MOV * T0.Z, PS,
|
|
; EG-NEXT: LSHR * T5.W, PV.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T5.X, T0.Y, literal.x,
|
|
; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
|
%ext = zext <4 x i16> %load to <4 x i32>
|
|
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; v4i16 is naturally 8 byte aligned
|
|
; TODO: This should use LD, but for some there are redundant MOVs
|
|
; TODO: We should use ASHR instead of LSHR + BFE
|
|
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[4:5], 48
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[0:1], 48
|
|
; GCN-HSA-NEXT: s_ashr_i32 s4, s0, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s1, s1
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s7, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v4i16_to_v4i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV T2.X, T5.X,
|
|
; EG-NEXT: MOV * T3.X, T5.Y,
|
|
; EG-NEXT: MOV T0.Y, PV.X,
|
|
; EG-NEXT: MOV * T0.Z, PS,
|
|
; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, T0.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, T0.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
|
%ext = sext <4 x i16> %load to <4 x i32>
|
|
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; v8i16 is naturally 16 byte aligned
|
|
; TODO: These should use LSHR instead of BFE_UINT
|
|
; TODO: This should use DST, but for some there are redundant MOVs
|
|
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s8
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s8
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s3, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s9, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s4, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s7, s7, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s6, s6, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s8
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v8i16_to_v8i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T7.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T8.W, T7.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T8.Y, T7.X, literal.x,
|
|
; EG-NEXT: LSHR * T9.W, T7.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
|
|
; EG-NEXT: AND_INT T9.Z, T7.W, literal.x,
|
|
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
|
%ext = zext <8 x i16> %load to <8 x i32>
|
|
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; v8i16 is naturally 16 byte aligned
|
|
; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
|
|
; TODO: This should use DST, but for some there are redundant MOVs
|
|
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s9, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s10, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s2, s7, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s3, s6, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s8, s5, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s9, s4, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s10, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s11, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s8, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s9, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s10
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v8i16_to_v8i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T7.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
|
|
; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
|
|
; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
|
|
; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T10.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
|
%ext = sext <8 x i16> %load to <8 x i32>
|
|
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s1, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s0, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s3, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s1, s1, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s0, s0, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s12
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s12, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s3, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s13, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s14, s4, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s15, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s17, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s8, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s11, s11, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s10, s10, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s5, s5, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s4, s4, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s7, s7, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s6, s6, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s9, s9, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s8, s8, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s12
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v16i16_to_v16i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 1 @8
|
|
; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
|
|
; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T11.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: LSHR * T13.W, T12.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T13.Y, T12.X, literal.x,
|
|
; EG-NEXT: LSHR * T14.W, T12.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T13.X, T12.X, literal.x,
|
|
; EG-NEXT: AND_INT T14.Z, T12.W, literal.x,
|
|
; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
; EG-NEXT: LSHR T14.Y, T12.Z, literal.x,
|
|
; EG-NEXT: LSHR * T15.W, T11.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T14.X, T12.Z, literal.x,
|
|
; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T16.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T15.Y, T11.X, literal.y,
|
|
; EG-NEXT: LSHR T17.W, T11.W, literal.y,
|
|
; EG-NEXT: AND_INT * T15.X, T11.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T17.Z, T11.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T11.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T17.Y, T11.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
|
%ext = zext <16 x i16> %load to <16 x i32>
|
|
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s1, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s0, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s1, s1
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s0, s0
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s14, s3, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s3, s3
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s2, s2
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s16, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s17, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s2, s11, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s3, s10, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s12, s5, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s13, s4, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s14, s7, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s15, s6, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s16, s9, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s17, s8, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s18, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s19, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s16, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s17, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s14, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s15, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s12, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s13, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v16i16_to_v16i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 1 @8
|
|
; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 12:
|
|
; EG-NEXT: MOV * T11.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 13:
|
|
; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T14.X, PV.W, literal.x,
|
|
; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
|
|
; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
|
|
; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
|
|
; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T11.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
|
|
; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T12.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
|
%ext = sext <16 x i16> %load to <16 x i32>
|
|
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s1, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s0, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s3, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s27, s1, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s0, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s3, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s2, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s9, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s8, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s11, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s10, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s13, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s12, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s15, s18
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s14, s18
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s38
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s37
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_and_b32 s34, s17, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s35, s16, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s36, s19, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s21, s5, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s22, s4, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s23, s7, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s24, s6, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s25, s9, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s26, s8, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s27, s11, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s28, s10, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s29, s13, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s30, s12, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s31, s15, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s33, s14, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s20, s18, s20
|
|
; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s17
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s37, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s19, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s18, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s17, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s16, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s36
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s34
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s33
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s28
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v32i16_to_v32i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 3 @12
|
|
; EG-NEXT: ALU 71, @21, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 12:
|
|
; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1
|
|
; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1
|
|
; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1
|
|
; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1
|
|
; EG-NEXT: ALU clause starting at 20:
|
|
; EG-NEXT: MOV * T19.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 21:
|
|
; EG-NEXT: LSHR * T23.W, T20.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T23.Z, T20.Y, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T23.Y, T20.X, literal.x,
|
|
; EG-NEXT: LSHR * T24.W, T20.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T23.X, T20.X, literal.x,
|
|
; EG-NEXT: AND_INT T24.Z, T20.W, literal.x,
|
|
; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
; EG-NEXT: LSHR T24.Y, T20.Z, literal.x,
|
|
; EG-NEXT: LSHR * T25.W, T19.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T24.X, T20.Z, literal.x,
|
|
; EG-NEXT: AND_INT T25.Z, T19.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T26.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T25.Y, T19.X, literal.y,
|
|
; EG-NEXT: LSHR T27.W, T19.W, literal.y,
|
|
; EG-NEXT: AND_INT * T25.X, T19.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T27.Z, T19.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T19.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T27.Y, T19.Z, literal.y,
|
|
; EG-NEXT: LSHR T28.W, T22.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T27.X, T19.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T28.Z, T22.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T29.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T28.Y, T22.X, literal.y,
|
|
; EG-NEXT: LSHR T30.W, T22.W, literal.y,
|
|
; EG-NEXT: AND_INT * T28.X, T22.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T30.Z, T22.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T30.Y, T22.Z, literal.y,
|
|
; EG-NEXT: LSHR T31.W, T21.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T30.X, T22.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T31.Z, T21.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T32.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T31.Y, T21.X, literal.y,
|
|
; EG-NEXT: LSHR T33.W, T21.W, literal.y,
|
|
; EG-NEXT: AND_INT * T31.X, T21.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T33.Z, T21.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T33.Y, T21.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T33.X, T21.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR * T34.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
|
%ext = zext <32 x i16> %load to <32 x i32>
|
|
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s1, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s0, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s1
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s0
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s3, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s3
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s2
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s13, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s36, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s15, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s25
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s21
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s33, s17, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s34, s16, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s35, s19, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s36, s18, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s22, s7, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s23, s6, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s24, s9, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s25, s8, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s26, s11, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s27, s10, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s28, s13, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s29, s12, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s30, s15, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s31, s14, 16
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s36, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s30, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s26, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s27, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s22, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s23, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v32i16_to_v32i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 8, @20, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 3 @12
|
|
; EG-NEXT: ALU 73, @29, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 12:
|
|
; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1
|
|
; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1
|
|
; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1
|
|
; EG-NEXT: ALU clause starting at 20:
|
|
; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
|
|
; EG-NEXT: MOV * T22.X, KC0[2].Z,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 29:
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T26.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T27.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T0.W, T22.W, literal.y,
|
|
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T28.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T0.Y, T22.Y, literal.y,
|
|
; EG-NEXT: BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR T1.W, T24.W, literal.y,
|
|
; EG-NEXT: LSHR * T2.W, T24.Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: BFE_INT T29.X, T25.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T1.Y, T23.W, literal.x,
|
|
; EG-NEXT: BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR T3.W, T23.Y, literal.x,
|
|
; EG-NEXT: LSHR * T4.W, T25.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T30.X, T25.Z, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T2.Y, T25.W, literal.x,
|
|
; EG-NEXT: BFE_INT T31.Z, T23.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T4.W, T25.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T31.X, T23.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T32.Z, T23.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T4.W, T25.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T32.X, T23.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T25.Z, T24.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T3.W, T23.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T25.X, T24.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T33.Z, T24.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T32.W, T1.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T3.W, T23.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T33.X, T24.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T23.Z, T22.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T25.W, T2.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T2.W, T24.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T23.X, T22.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T34.Z, T22.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T1.W, T24.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T34.X, T22.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T33.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T0.Z, T22.X, literal.x,
|
|
; EG-NEXT: BFE_INT T23.W, T0.Y, 0.0, literal.x,
|
|
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T22.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT T23.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: LSHR T0.Z, T22.Z, literal.y,
|
|
; EG-NEXT: BFE_INT T34.W, T0.W, 0.0, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T24.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT * T34.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
|
%ext = sext <32 x i16> %load to <32 x i32>
|
|
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x10
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s13, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s15, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s17, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s35, s16, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s19, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s53, s18, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s16, s16, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s18, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s54, s37, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s55, s36, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s56, s39, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s57, s38, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s58, s41, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s59, s40, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s60, s43, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s61, s42, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s62, s45, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s63, s44, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s64, s47, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s65, s46, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s66, s49, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s67, s48, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s68, s51, s20
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s20, s50, s20
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s37, s37, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s36, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s39, s39, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s38, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s41, s41, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s40, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s42, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s45, s45, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s44, s44, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s47, s47, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s46, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s49, s49, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s48, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s51, s51, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s50, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s43, s43, 16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s68
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s67
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s48
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s66
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s49
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s65
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s46
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s64
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s47
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s63
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s44
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s62
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s45
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s61
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s42
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s60
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s59
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s43
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s40
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s58
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s41
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s57
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s56
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s55
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s54
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s53
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s52
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s34
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s33
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s31
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s30
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s29
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s28
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s27
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s37, 0xffff
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s20, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s21, s4, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s22, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s23, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s24, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s25, s8, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s26, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s27, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s28, s13, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s29, s12, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s30, s15, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s31, s14, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s33, s17, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s34, s16, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s35, s19, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s36, s18, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s38, s5, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s39, s4, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s40, s7, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s41, s6, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s42, s9, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s43, s8, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s44, s11, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s45, s10, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s46, s13, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s47, s12, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s48, s15, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s49, s14, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s50, s17, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s51, s16, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s52, s19, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s53, s18, s37
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x10
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_and_b32 s59, s8, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s60, s11, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s61, s10, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s62, s13, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s63, s12, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s64, s15, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s65, s14, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s66, s17, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s67, s16, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s68, s19, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s54, s5, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s55, s4, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s56, s7, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s57, s6, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s58, s9, s37
|
|
; GCN-HSA-NEXT: s_and_b32 s37, s18, s37
|
|
; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s67
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s66
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s17
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s61
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s11
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s37
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s68
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s65
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s63
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s62
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v20, s59
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s57
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s58
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v23, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s56
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s55
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s4
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s54
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s53
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s51
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s52
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s49
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s47
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s46
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s45
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s44
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s43
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s42
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s41
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s40
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s39
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x40
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s68, s19, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s51, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s53, s50, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s55, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s57, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s59, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s61, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s62, s13, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s63, s12, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s64, s15, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s65, s14, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s66, s17, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s67, s16, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s37, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s37, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s36, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s36, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s39, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s39, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s38, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s38, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s41, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s41, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s40, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s33, s40, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s43, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s35, s43, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s42, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s37, s42, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s45, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s39, s45, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s44, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s41, s44, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s47, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s43, s47, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s46, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s45, s46, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s49, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s47, s49, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s49, s48, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s48, s48, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s51, s51, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s50, s50, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s18, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s67
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s66
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s65
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s64
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s63
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s60
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s59
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s58
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s57
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s56
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s54
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s53
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s51
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s52
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s47
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s46
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s45
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s43
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s42
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s41
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s39
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s38
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s37
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s34
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v64i16_to_v64i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 3 @22
|
|
; EG-NEXT: ALU 55, @39, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 3 @30
|
|
; EG-NEXT: ALU 87, @95, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T38.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T39.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T43.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T36.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 22:
|
|
; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1
|
|
; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 48, #1
|
|
; EG-NEXT: VTX_READ_128 T38.XYZW, T35.X, 32, #1
|
|
; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 16, #1
|
|
; EG-NEXT: Fetch clause starting at 30:
|
|
; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1
|
|
; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1
|
|
; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1
|
|
; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1
|
|
; EG-NEXT: ALU clause starting at 38:
|
|
; EG-NEXT: MOV * T35.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 39:
|
|
; EG-NEXT: LSHR * T40.W, T36.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T40.Z, T36.Y, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T40.Y, T36.X, literal.x,
|
|
; EG-NEXT: LSHR * T41.W, T36.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T40.X, T36.X, literal.x,
|
|
; EG-NEXT: AND_INT T41.Z, T36.W, literal.x,
|
|
; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
; EG-NEXT: LSHR T41.Y, T36.Z, literal.x,
|
|
; EG-NEXT: LSHR * T42.W, T39.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T41.X, T36.Z, literal.x,
|
|
; EG-NEXT: AND_INT T42.Z, T39.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T43.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T42.Y, T39.X, literal.y,
|
|
; EG-NEXT: LSHR T44.W, T39.W, literal.y,
|
|
; EG-NEXT: AND_INT * T42.X, T39.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T44.Z, T39.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T39.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T44.Y, T39.Z, literal.y,
|
|
; EG-NEXT: LSHR T45.W, T38.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T44.X, T39.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T45.Z, T38.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T46.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T45.Y, T38.X, literal.y,
|
|
; EG-NEXT: LSHR T47.W, T38.W, literal.y,
|
|
; EG-NEXT: AND_INT * T45.X, T38.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T47.Z, T38.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T47.Y, T38.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T47.X, T38.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: LSHR * T35.W, T37.Y, literal.y,
|
|
; EG-NEXT: 80(1.121039e-43), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T48.X, PV.W, literal.x,
|
|
; EG-NEXT: AND_INT * T35.Z, T37.Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
|
|
; EG-NEXT: ALU clause starting at 95:
|
|
; EG-NEXT: LSHR T35.Y, T37.X, literal.x,
|
|
; EG-NEXT: LSHR * T53.W, T37.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T35.X, T37.X, literal.x,
|
|
; EG-NEXT: AND_INT T53.Z, T37.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T53.Y, T37.Z, literal.y,
|
|
; EG-NEXT: LSHR T54.W, T52.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T53.X, T37.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T54.Z, T52.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43)
|
|
; EG-NEXT: LSHR T55.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T54.Y, T52.X, literal.y,
|
|
; EG-NEXT: LSHR T56.W, T52.W, literal.y,
|
|
; EG-NEXT: AND_INT * T54.X, T52.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T56.Z, T52.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43)
|
|
; EG-NEXT: LSHR T52.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T56.Y, T52.Z, literal.y,
|
|
; EG-NEXT: LSHR T57.W, T51.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T56.X, T52.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T57.Z, T51.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43)
|
|
; EG-NEXT: LSHR T58.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T57.Y, T51.X, literal.y,
|
|
; EG-NEXT: LSHR T59.W, T51.W, literal.y,
|
|
; EG-NEXT: AND_INT * T57.X, T51.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T59.Z, T51.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43)
|
|
; EG-NEXT: LSHR T51.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T59.Y, T51.Z, literal.y,
|
|
; EG-NEXT: LSHR T60.W, T50.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T59.X, T51.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T60.Z, T50.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43)
|
|
; EG-NEXT: LSHR T61.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T60.Y, T50.X, literal.y,
|
|
; EG-NEXT: LSHR T62.W, T50.W, literal.y,
|
|
; EG-NEXT: AND_INT * T60.X, T50.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T62.Z, T50.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43)
|
|
; EG-NEXT: LSHR T50.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T62.Y, T50.Z, literal.y,
|
|
; EG-NEXT: LSHR T63.W, T49.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T62.X, T50.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T63.Z, T49.Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43)
|
|
; EG-NEXT: LSHR T64.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T63.Y, T49.X, literal.y,
|
|
; EG-NEXT: LSHR T65.W, T49.W, literal.y,
|
|
; EG-NEXT: AND_INT * T63.X, T49.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T65.Z, T49.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43)
|
|
; EG-NEXT: LSHR T49.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T65.Y, T49.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T65.X, T49.Z, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR * T66.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
|
%ext = zext <64 x i16> %load to <64 x i32>
|
|
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x10
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s20, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s21, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s24, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s25, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s13, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s15, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s17, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s16, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s17, s17
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s16, s16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s19, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s52, s18, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s19, s19
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s18, s18
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s53, s37, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s54, s36, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s37, s37
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s36, s36
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s55, s39, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s38, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s39, s39
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s38, s38
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s41, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s40, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s41, s41
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s40, s40
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s42, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s60, s43
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s42, s42
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s45, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s62, s44, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s45, s45
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s44, s44
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s63, s47, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s64, s46, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s47, s47
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s46, s46
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s65, s49, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s66, s48, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s49, s49
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s48, s48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s67, s51, 16
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s68, s50, 16
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s51, s51
|
|
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s50, s50
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s43, s43, 16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s68
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s51
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s67
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s48
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s66
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s49
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s65
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s46
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s64
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s47
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s63
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s44
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s62
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s45
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s61
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s42
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s59
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s60
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s40
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s43
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s58
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s41
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s57
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s39
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s54
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s37
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s53
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s22, s5
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s23, s4
|
|
; GCN-HSA-NEXT: s_ashr_i32 s24, s7, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s25, s6, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s26, s7
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s27, s6
|
|
; GCN-HSA-NEXT: s_ashr_i32 s28, s9, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s29, s8, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s30, s9
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s31, s8
|
|
; GCN-HSA-NEXT: s_ashr_i32 s33, s11, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s34, s10, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s35, s11
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s36, s10
|
|
; GCN-HSA-NEXT: s_ashr_i32 s37, s13, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s38, s12, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s39, s13
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s40, s12
|
|
; GCN-HSA-NEXT: s_ashr_i32 s41, s15, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s42, s14, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s43, s15
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s44, s14
|
|
; GCN-HSA-NEXT: s_ashr_i32 s45, s17, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s46, s16, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s47, s17
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s48, s16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s49, s19, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s50, s18, 16
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s51, s19
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s52, s18
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x10
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_ashr_i32 s57, s9, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s59, s11, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s60, s10, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s61, s13, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s62, s12, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s63, s15, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s64, s14, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s65, s17, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s66, s16, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s67, s19, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s68, s18, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s53, s5, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s54, s4, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s55, s7, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s56, s6, 16
|
|
; GCN-HSA-NEXT: s_ashr_i32 s58, s8, 16
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3]
|
|
; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s58
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s54
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s53
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s51
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s49
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s46
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s47
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s45
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s42
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s43
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s41
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s40
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s38
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x40
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s68, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s66, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s68
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s64, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s61, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s62, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s59, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s60, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s57, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s58, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s55, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s56, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s53, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s54, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s52, s50, 16
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s37, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s22, s37
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s39, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s26, s39
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s41, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s30, s41
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s43, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s35, s43
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s37, s45, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s39, s45
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s47, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s43, s47
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s49, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s47, s49
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s49, s51, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s51, s51
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s50, s50
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s54
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s36, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s23, s36
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s38, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s27, s38
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s40, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s31, s40
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s42, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s36, s42
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s44, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s40, s44
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s42, s46, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s44, s46
|
|
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s46, s48, 16
|
|
; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s48, s48
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s51
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s47
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s43
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s39
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s37
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s27
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v64i16_to_v64i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 17, @38, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 7 @22
|
|
; EG-NEXT: ALU 75, @56, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: ALU 71, @132, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 22:
|
|
; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1
|
|
; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1
|
|
; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1
|
|
; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1
|
|
; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1
|
|
; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1
|
|
; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1
|
|
; EG-NEXT: ALU clause starting at 38:
|
|
; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T36.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T39.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T40.X, PV.W, literal.x,
|
|
; EG-NEXT: MOV * T41.X, KC0[2].Z,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 56:
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T49.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
|
|
; EG-NEXT: LSHR T50.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
|
|
; EG-NEXT: LSHR T51.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
|
|
; EG-NEXT: LSHR T52.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
|
|
; EG-NEXT: LSHR T53.X, PV.W, literal.x,
|
|
; EG-NEXT: LSHR T0.Y, T41.W, literal.y,
|
|
; EG-NEXT: LSHR T0.Z, T41.Y, literal.y,
|
|
; EG-NEXT: LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T54.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T1.Y, T48.Y, literal.y,
|
|
; EG-NEXT: LSHR T1.Z, T47.W, literal.y,
|
|
; EG-NEXT: LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T55.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T2.Y, T46.W, literal.y,
|
|
; EG-NEXT: LSHR T2.Z, T46.Y, literal.y,
|
|
; EG-NEXT: LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T56.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T3.Y, T45.Y, literal.y,
|
|
; EG-NEXT: BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR T3.W, T43.W, literal.y,
|
|
; EG-NEXT: LSHR * T4.W, T43.Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: BFE_INT T57.X, T44.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T4.Y, T42.W, literal.x,
|
|
; EG-NEXT: BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR T5.W, T42.Y, literal.x,
|
|
; EG-NEXT: LSHR * T6.W, T44.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T58.X, T44.Z, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T5.Y, T44.W, literal.x,
|
|
; EG-NEXT: BFE_INT T59.Z, T42.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T6.W, T44.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T59.X, T42.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T60.Z, T42.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T6.W, T44.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T60.X, T42.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T44.Z, T43.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T5.W, T42.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T44.X, T43.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T61.Z, T43.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T60.W, T4.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T5.W, T42.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T61.X, T43.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T60.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T42.Z, T45.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT * T44.W, T4.W, 0.0, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 132:
|
|
; EG-NEXT: LSHR * T4.W, T43.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T42.X, T45.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T44.Y, PV.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T3.W, T43.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T43.Z, T46.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T3.W, T45.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T43.X, T46.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T63.Z, T46.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T2.W, T45.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T63.X, T46.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T45.Z, T47.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T2.W, T46.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T45.X, T47.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T64.Z, T47.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T63.W, T2.Y, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T2.W, T46.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T64.X, T47.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T46.Z, T48.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T45.W, T1.W, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T47.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T46.X, T48.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T65.Z, T48.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T64.W, T1.Z, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T47.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T65.X, T48.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T47.Z, T41.Y, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T1.W, T48.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T47.X, T41.X, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T66.Z, T41.W, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: LSHR * T0.W, T48.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T66.X, T41.Z, 0.0, literal.x,
|
|
; EG-NEXT: BFE_INT T65.Y, PS, 0.0, literal.x,
|
|
; EG-NEXT: LSHR T1.Z, T41.X, literal.x,
|
|
; EG-NEXT: BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43)
|
|
; EG-NEXT: LSHR T41.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT T47.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: LSHR T0.Z, T41.Z, literal.y,
|
|
; EG-NEXT: BFE_INT T66.W, T0.Y, 0.0, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T48.X, PS, literal.x,
|
|
; EG-NEXT: BFE_INT * T66.Y, PV.Z, 0.0, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
|
%ext = sext <64 x i16> %load to <64 x i32>
|
|
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_i16_to_i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_i16_to_i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV * T0.Y, 0.0,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%a = load i16, i16 addrspace(4)* %in
|
|
%ext = zext i16 %a to i64
|
|
store i64 %ext, i64 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Need to optimize this sequence to avoid extra bfe:
|
|
; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
|
|
; t31: i64 = any_extend t28
|
|
; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
|
|
; TODO: These could be expanded earlier using ASHR 15
|
|
define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_i16_to_i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_i16_to_i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
%a = load i16, i16 addrspace(4)* %in
|
|
%ext = sext i16 %a to i64
|
|
store i64 %ext, i64 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v1i16_to_v1i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV * T0.Y, 0.0,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
|
%ext = zext <1 x i16> %load to <1 x i64>
|
|
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3
|
|
; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v1i16_to_v1i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
|
%ext = sext <1 x i16> %load to <1 x i64>
|
|
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s1, s0, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s0, s0, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s4, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v2i16_to_v2i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T4.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: LSHR * T4.Z, T4.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
|
|
; EG-NEXT: MOV T4.Y, 0.0,
|
|
; EG-NEXT: MOV T4.W, 0.0,
|
|
; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
|
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
|
%ext = zext <2 x i16> %load to <2 x i64>
|
|
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s0, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v2i16_to_v2i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T4.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: ASHR * T4.W, T4.X, literal.x,
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: ASHR * T4.Z, T4.X, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x,
|
|
; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
|
|
; EG-NEXT: ASHR * T4.Y, PV.X, literal.x,
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
|
%ext = sext <2 x i16> %load to <2 x i64>
|
|
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s0, s3, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s1, s2, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, s8
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s3
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s0
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s4, s3, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s5, s2, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s7, s2, s6
|
|
; GCN-HSA-NEXT: s_and_b32 s2, s3, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s5, s8
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s4, s8
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v4i16_to_v4i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV T2.X, T5.X,
|
|
; EG-NEXT: MOV * T3.X, T5.Y,
|
|
; EG-NEXT: MOV T0.Y, PV.X,
|
|
; EG-NEXT: MOV * T0.Z, PS,
|
|
; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T5.X, T0.Z, literal.x,
|
|
; EG-NEXT: MOV T5.Y, 0.0,
|
|
; EG-NEXT: LSHR T6.Z, T0.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T6.Y, 0.0,
|
|
; EG-NEXT: MOV T5.W, 0.0,
|
|
; EG-NEXT: MOV * T6.W, 0.0,
|
|
; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
|
%ext = zext <4 x i16> %load to <4 x i64>
|
|
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_mov_b32 s4, s3
|
|
; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[2:3], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v4i16_to_v4i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: MOV T2.X, T5.X,
|
|
; EG-NEXT: MOV * T3.X, T5.Y,
|
|
; EG-NEXT: MOV T0.Y, PS,
|
|
; EG-NEXT: MOV * T0.Z, PV.X,
|
|
; EG-NEXT: ASHR * T5.W, PV.Z, literal.x,
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ASHR T5.Z, T0.Z, literal.y,
|
|
; EG-NEXT: ASHR * T7.W, T0.Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T5.Y, PV.X, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: LSHR T8.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR * T7.Y, PV.X, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
|
|
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
|
%ext = sext <4 x i16> %load to <4 x i64>
|
|
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s0, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s1, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s3, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s9, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s0
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s9, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s11, s4, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s3, s7, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s4, s4, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s6, s6, s8
|
|
; GCN-HSA-NEXT: s_and_b32 s5, s5, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s4, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s5, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s6, s8
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s7, s8
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v8i16_to_v8i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 10:
|
|
; EG-NEXT: MOV * T7.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 11:
|
|
; EG-NEXT: LSHR * T8.Z, T7.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T8.X, T7.W, literal.x,
|
|
; EG-NEXT: MOV T8.Y, 0.0,
|
|
; EG-NEXT: LSHR T9.Z, T7.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T9.Y, 0.0,
|
|
; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T10.X, T7.Y, literal.x,
|
|
; EG-NEXT: MOV T10.Y, 0.0,
|
|
; EG-NEXT: LSHR T7.Z, T7.X, literal.y,
|
|
; EG-NEXT: AND_INT * T7.X, T7.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T7.Y, 0.0,
|
|
; EG-NEXT: MOV T8.W, 0.0,
|
|
; EG-NEXT: MOV * T9.W, 0.0,
|
|
; EG-NEXT: MOV T10.W, 0.0,
|
|
; EG-NEXT: MOV * T7.W, 0.0,
|
|
; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T12.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T13.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR * T14.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
|
%ext = zext <8 x i16> %load to <8 x i64>
|
|
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s15
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_mov_b32 s2, s7
|
|
; GCN-HSA-NEXT: s_mov_b32 s8, s5
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s5
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v8i16_to_v8i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 10:
|
|
; EG-NEXT: MOV * T7.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 11:
|
|
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T9.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: ASHR * T10.W, T7.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T11.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR T10.Z, T7.X, literal.y,
|
|
; EG-NEXT: ASHR * T12.W, T7.Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T12.Z, T7.Y, literal.x,
|
|
; EG-NEXT: ASHR * T13.W, T7.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T10.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T13.Z, T7.Z, literal.x,
|
|
; EG-NEXT: ASHR * T14.W, T7.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T12.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR * T14.Z, T7.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T13.Y, PV.X, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T7.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR * T14.Y, PV.X, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
|
|
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
|
%ext = sext <8 x i16> %load to <8 x i64>
|
|
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[14:15], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s12
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s13
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s14
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s12, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_lshr_b32 s13, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s15, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s2, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s8, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s17, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s19, s4, 16
|
|
; GCN-HSA-NEXT: s_and_b32 s3, s9, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s4, s4, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s6, s6, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s10, s10, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s8, s8, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s5, s5, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s7, s7, s12
|
|
; GCN-HSA-NEXT: s_and_b32 s11, s11, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s4, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s5, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s6, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s7, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s8, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s9, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s10, s12
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s11, s12
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s17
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v16i16_to_v16i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 1 @12
|
|
; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 12:
|
|
; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 16:
|
|
; EG-NEXT: MOV * T11.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 17:
|
|
; EG-NEXT: LSHR * T13.Z, T12.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T13.X, T12.W, literal.x,
|
|
; EG-NEXT: MOV T13.Y, 0.0,
|
|
; EG-NEXT: LSHR T14.Z, T12.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T14.Y, 0.0,
|
|
; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T15.X, T12.Y, literal.x,
|
|
; EG-NEXT: MOV T15.Y, 0.0,
|
|
; EG-NEXT: LSHR T12.Z, T12.X, literal.y,
|
|
; EG-NEXT: AND_INT * T12.X, T12.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T12.Y, 0.0,
|
|
; EG-NEXT: LSHR * T16.Z, T11.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T16.X, T11.W, literal.x,
|
|
; EG-NEXT: MOV T16.Y, 0.0,
|
|
; EG-NEXT: LSHR T17.Z, T11.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T17.Y, 0.0,
|
|
; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T18.X, T11.Y, literal.x,
|
|
; EG-NEXT: MOV T18.Y, 0.0,
|
|
; EG-NEXT: LSHR T11.Z, T11.X, literal.y,
|
|
; EG-NEXT: AND_INT * T11.X, T11.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T11.Y, 0.0,
|
|
; EG-NEXT: MOV T13.W, 0.0,
|
|
; EG-NEXT: MOV * T14.W, 0.0,
|
|
; EG-NEXT: MOV T15.W, 0.0,
|
|
; EG-NEXT: MOV * T12.W, 0.0,
|
|
; EG-NEXT: MOV T16.W, 0.0,
|
|
; EG-NEXT: MOV * T17.W, 0.0,
|
|
; EG-NEXT: MOV T18.W, 0.0,
|
|
; EG-NEXT: MOV * T11.W, 0.0,
|
|
; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T23.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T25.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
|
|
; EG-NEXT: LSHR * T26.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
|
%ext = zext <16 x i16> %load to <16 x i64>
|
|
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s11
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s9
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s7
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s5
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s25
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_mov_b32 s2, s11
|
|
; GCN-HSA-NEXT: s_mov_b32 s12, s9
|
|
; GCN-HSA-NEXT: s_mov_b32 s14, s7
|
|
; GCN-HSA-NEXT: s_mov_b32 s16, s5
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s20, s8, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[10:11], s[10:11], 48
|
|
; GCN-HSA-NEXT: s_lshr_b32 s22, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s24, s4, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[4:5], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[6:7], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[8:9], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[8:9], s[8:9], 48
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[24:25], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[22:23], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
|
|
; GCN-HSA-NEXT: s_add_u32 s22, s0, 0x70
|
|
; GCN-HSA-NEXT: s_addc_u32 s23, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s8
|
|
; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s9
|
|
; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s8
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-HSA-NEXT: s_add_u32 s6, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_nop 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s11
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[8:9], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s9
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[8:9], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, s5
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v16i16_to_v16i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 1 @12
|
|
; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 12:
|
|
; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 16:
|
|
; EG-NEXT: MOV * T11.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 17:
|
|
; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T14.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T15.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T16.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T17.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T18.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: ASHR * T19.W, T11.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR T19.Z, T11.X, literal.y,
|
|
; EG-NEXT: ASHR * T21.W, T11.Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T21.Z, T11.Y, literal.x,
|
|
; EG-NEXT: ASHR * T22.W, T11.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T19.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T22.Z, T11.Z, literal.x,
|
|
; EG-NEXT: ASHR * T23.W, T11.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T21.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T23.Z, T11.W, literal.x,
|
|
; EG-NEXT: ASHR * T24.W, T12.X, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T22.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T24.Z, T12.X, literal.x,
|
|
; EG-NEXT: ASHR * T11.W, T12.Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T23.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T11.Z, T12.Y, literal.x,
|
|
; EG-NEXT: ASHR * T25.W, T12.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T24.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T25.Z, T12.Z, literal.x,
|
|
; EG-NEXT: ASHR * T26.W, T12.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T11.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR * T26.Z, T12.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T25.Y, PV.X, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T12.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR * T26.Y, PV.X, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
|
|
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
|
%ext = sext <16 x i16> %load to <16 x i64>
|
|
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s5, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s7, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s9, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s11, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s13, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s15, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s17, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s19, 16
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s4, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s6, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s8, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s10, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s12, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s14, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s16, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s18, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, s2
|
|
; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, s2
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s13
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s31
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_and_b32 s21, s4, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s22, s6, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s23, s8, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s24, s10, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s25, s12, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s26, s14, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s27, s16, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s28, s18, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s29, s5, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s30, s7, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s31, s9, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s33, s11, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s34, s13, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s35, s15, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s36, s17, s20
|
|
; GCN-HSA-NEXT: s_and_b32 s20, s19, s20
|
|
; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s4, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s5, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s6, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s7, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s8, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s9, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s10, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s11, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s12, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s13, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s14, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s33, s15, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s34, s16, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s35, s17, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s36, s18, s20
|
|
; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s19, s20
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s35
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s33
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s29
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s27
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s25
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s21
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_zextload_v32i16_to_v32i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 2 @22
|
|
; EG-NEXT: ALU 33, @31, KC0[], KC1[]
|
|
; EG-NEXT: TEX 0 @28
|
|
; EG-NEXT: ALU 92, @65, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 22:
|
|
; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1
|
|
; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1
|
|
; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1
|
|
; EG-NEXT: Fetch clause starting at 28:
|
|
; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 30:
|
|
; EG-NEXT: MOV * T19.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 31:
|
|
; EG-NEXT: LSHR * T23.Z, T20.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T23.X, T20.W, literal.x,
|
|
; EG-NEXT: MOV T23.Y, 0.0,
|
|
; EG-NEXT: LSHR T24.Z, T20.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T24.X, T20.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T24.Y, 0.0,
|
|
; EG-NEXT: LSHR * T25.Z, T20.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T25.X, T20.Y, literal.x,
|
|
; EG-NEXT: MOV T25.Y, 0.0,
|
|
; EG-NEXT: LSHR T20.Z, T20.X, literal.y,
|
|
; EG-NEXT: AND_INT * T20.X, T20.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T20.Y, 0.0,
|
|
; EG-NEXT: LSHR * T26.Z, T22.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T26.X, T22.W, literal.x,
|
|
; EG-NEXT: MOV T26.Y, 0.0,
|
|
; EG-NEXT: LSHR T27.Z, T22.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T27.X, T22.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T27.Y, 0.0,
|
|
; EG-NEXT: LSHR * T28.Z, T22.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T28.X, T22.Y, literal.x,
|
|
; EG-NEXT: MOV T28.Y, 0.0,
|
|
; EG-NEXT: LSHR T22.Z, T22.X, literal.y,
|
|
; EG-NEXT: AND_INT * T22.X, T22.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T22.Y, 0.0,
|
|
; EG-NEXT: LSHR * T19.Z, T21.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 65:
|
|
; EG-NEXT: AND_INT T19.X, T21.W, literal.x,
|
|
; EG-NEXT: MOV T19.Y, 0.0,
|
|
; EG-NEXT: LSHR T30.Z, T21.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T30.X, T21.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T30.Y, 0.0,
|
|
; EG-NEXT: LSHR * T31.Z, T21.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T31.X, T21.Y, literal.x,
|
|
; EG-NEXT: MOV T31.Y, 0.0,
|
|
; EG-NEXT: LSHR T21.Z, T21.X, literal.y,
|
|
; EG-NEXT: AND_INT * T21.X, T21.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T21.Y, 0.0,
|
|
; EG-NEXT: LSHR * T32.Z, T29.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T32.X, T29.W, literal.x,
|
|
; EG-NEXT: MOV T32.Y, 0.0,
|
|
; EG-NEXT: LSHR T33.Z, T29.Z, literal.y,
|
|
; EG-NEXT: AND_INT * T33.X, T29.Z, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T33.Y, 0.0,
|
|
; EG-NEXT: LSHR * T34.Z, T29.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T34.X, T29.Y, literal.x,
|
|
; EG-NEXT: MOV T34.Y, 0.0,
|
|
; EG-NEXT: LSHR T29.Z, T29.X, literal.y,
|
|
; EG-NEXT: AND_INT * T29.X, T29.X, literal.x,
|
|
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
|
; EG-NEXT: MOV T29.Y, 0.0,
|
|
; EG-NEXT: MOV T23.W, 0.0,
|
|
; EG-NEXT: MOV * T24.W, 0.0,
|
|
; EG-NEXT: MOV T25.W, 0.0,
|
|
; EG-NEXT: MOV * T20.W, 0.0,
|
|
; EG-NEXT: MOV T26.W, 0.0,
|
|
; EG-NEXT: MOV * T27.W, 0.0,
|
|
; EG-NEXT: MOV T28.W, 0.0,
|
|
; EG-NEXT: MOV * T22.W, 0.0,
|
|
; EG-NEXT: MOV T19.W, 0.0,
|
|
; EG-NEXT: MOV * T30.W, 0.0,
|
|
; EG-NEXT: MOV T31.W, 0.0,
|
|
; EG-NEXT: MOV * T21.W, 0.0,
|
|
; EG-NEXT: MOV T32.W, 0.0,
|
|
; EG-NEXT: MOV * T33.W, 0.0,
|
|
; EG-NEXT: MOV T34.W, 0.0,
|
|
; EG-NEXT: MOV * T29.W, 0.0,
|
|
; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T36.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T39.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T40.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T41.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
|
|
; EG-NEXT: LSHR T42.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
|
|
; EG-NEXT: LSHR T43.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
|
|
; EG-NEXT: LSHR T44.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
|
|
; EG-NEXT: LSHR T45.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
|
|
; EG-NEXT: LSHR T46.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
|
|
; EG-NEXT: LSHR T47.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
|
|
; EG-NEXT: LSHR T48.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
|
|
; EG-NEXT: LSHR T49.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
|
|
; EG-NEXT: LSHR * T50.X, PV.W, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
|
%ext = zext <32 x i16> %load to <32 x i64>
|
|
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
|
; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
|
|
; GCN-NOHSA-SI: ; %bb.0:
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, s23
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s21
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s19
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s24, s17
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s26, s15
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s28, s13
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s30, s11
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s34, s9
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s22, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s20, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s18, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s16, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s14, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s12, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s10, 16
|
|
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s8, 16
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[8:9], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[54:55], s[10:11], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[56:57], s[12:13], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[58:59], s[14:15], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[60:61], s[16:17], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[62:63], s[18:19], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[64:65], s[20:21], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[66:67], s[22:23], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[14:15], s[14:15], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[16:17], s[16:17], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[18:19], s[18:19], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[20:21], s[20:21], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[22:23], s[22:23], 48
|
|
; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[12:13], s[12:13], 48
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s44
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s45
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s21
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[28:29], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[26:27], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[34:35], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[30:31], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s7
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s19
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s25
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s17
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s23
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s15
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s21
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[52:53], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[50:51], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[48:49], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[46:47], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[42:43], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[40:41], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[38:39], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[36:37], 0x100000
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s26
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s66
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s67
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s64
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s65
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s63
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s61
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s58
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s59
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s56
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s57
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s54
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s55
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
|
|
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s20
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s21
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s18
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s19
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s16
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s17
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s14
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s15
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, s12
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, s13
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; GCN-NOHSA-SI-NEXT: s_endpgm
|
|
;
|
|
; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
|
|
; GCN-HSA: ; %bb.0:
|
|
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0
|
|
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-HSA-NEXT: s_mov_b32 s8, s51
|
|
; GCN-HSA-NEXT: s_mov_b32 s34, s49
|
|
; GCN-HSA-NEXT: s_mov_b32 s52, s47
|
|
; GCN-HSA-NEXT: s_mov_b32 s54, s45
|
|
; GCN-HSA-NEXT: s_mov_b32 s56, s43
|
|
; GCN-HSA-NEXT: s_mov_b32 s58, s41
|
|
; GCN-HSA-NEXT: s_mov_b32 s60, s39
|
|
; GCN-HSA-NEXT: s_mov_b32 s62, s37
|
|
; GCN-HSA-NEXT: s_lshr_b32 s30, s46, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s24, s44, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s20, s42, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s16, s40, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s12, s38, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
|
|
; GCN-HSA-NEXT: s_lshr_b32 s64, s50, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s66, s48, 16
|
|
; GCN-HSA-NEXT: s_lshr_b32 s68, s36, 16
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[36:37], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[28:29], s[36:37], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[38:39], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[36:37], s[38:39], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[40:41], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[38:39], s[40:41], 48
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[40:41], s[42:43], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[42:43], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[42:43], s[44:45], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[44:45], s[46:47], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[46:47], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[46:47], s[48:49], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[48:49], 0x100000
|
|
; GCN-HSA-NEXT: s_ashr_i64 s[48:49], s[50:51], 48
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[50:51], 0x100000
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s49
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[68:69], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[66:67], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[64:65], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x100000
|
|
; GCN-HSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000
|
|
; GCN-HSA-NEXT: s_add_u32 s64, s0, 0xf0
|
|
; GCN-HSA-NEXT: s_addc_u32 s65, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s34
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 0xd0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s35
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 0xb0
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x90
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v28, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v29, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x70
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v30, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v31, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x50
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v32, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v33, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s46
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s47
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s34
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s35
|
|
; GCN-HSA-NEXT: s_add_u32 s34, s0, 16
|
|
; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s53
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s44
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s45
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s26
|
|
; GCN-HSA-NEXT: s_add_u32 s26, s0, 0xe0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s27
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s54
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s55
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s42
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s43
|
|
; GCN-HSA-NEXT: s_addc_u32 s27, s1, 0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v23, s65
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s56
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s57
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s40
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s41
|
|
; GCN-HSA-NEXT: s_add_u32 s22, s0, 0xc0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v34, s34
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s23
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s26
|
|
; GCN-HSA-NEXT: s_addc_u32 s23, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s22
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v20, s58
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s59
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s38
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v23, s39
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s61
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s62
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s63
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v35, s35
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s28
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s29
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s50
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s51
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s27
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s48
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s49
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v19, s23
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18
|
|
; GCN-HSA-NEXT: s_add_u32 s18, s0, 0xa0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19
|
|
; GCN-HSA-NEXT: s_addc_u32 s19, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s18
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s31
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_nop 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
|
|
; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x80
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
|
|
; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s14
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s15
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_nop 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
|
|
; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x60
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11
|
|
; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_nop 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
|
|
; GCN-HSA-NEXT: s_add_u32 s6, s0, 64
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7
|
|
; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_nop 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-HSA-NEXT: s_add_u32 s4, s0, 32
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9
|
|
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
|
|
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; GCN-HSA-NEXT: s_endpgm
|
|
;
|
|
; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
|
|
; GCN-NOHSA-VI: ; %bb.0:
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x0
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1
|
|
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s66, s27
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s68, s27, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[26:27], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s26, 16
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s60, s25
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s25, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s66
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s67
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[24:25], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s24, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s64
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s65
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s54, s23
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s23, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s60
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[22:23], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s22, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s58
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s59
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s21
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s21, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[20:21], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s20, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s52
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s53
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s19
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s19, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s50
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[18:19], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s36, s17
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s17, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s44
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[16:17], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s30, s15
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s15, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s38
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[14:15], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s12, 16
|
|
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s13, 16
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[12:13], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, s13
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
|
|
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s28
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
|
|
; GCN-NOHSA-VI-NEXT: s_nop 0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7
|
|
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
|
|
; GCN-NOHSA-VI-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: constant_sextload_v32i16_to_v32i64:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @22
|
|
; EG-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 2 @24
|
|
; EG-NEXT: ALU 74, @87, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 22:
|
|
; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1
|
|
; EG-NEXT: Fetch clause starting at 24:
|
|
; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1
|
|
; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1
|
|
; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1
|
|
; EG-NEXT: ALU clause starting at 30:
|
|
; EG-NEXT: MOV * T19.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 31:
|
|
; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
|
|
; EG-NEXT: LSHR T23.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
|
|
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
|
|
; EG-NEXT: LSHR T25.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
|
|
; EG-NEXT: LSHR T26.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
|
|
; EG-NEXT: LSHR T27.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
|
|
; EG-NEXT: LSHR T28.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
|
|
; EG-NEXT: LSHR T29.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
|
|
; EG-NEXT: LSHR T30.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
|
|
; EG-NEXT: LSHR T31.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
|
|
; EG-NEXT: LSHR T32.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
|
|
; EG-NEXT: LSHR T33.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
|
|
; EG-NEXT: LSHR T34.X, PV.W, literal.x,
|
|
; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
|
|
; EG-NEXT: ASHR * T35.W, T20.X, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T36.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR T35.Z, T20.X, literal.y,
|
|
; EG-NEXT: ASHR * T37.W, T20.Y, literal.z,
|
|
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
|
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T35.X, T20.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR * T37.Z, T20.Y, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: BFE_INT T37.X, T20.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T35.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR * T19.W, T20.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: ALU clause starting at 87:
|
|
; EG-NEXT: ASHR T19.Z, T20.Z, literal.x,
|
|
; EG-NEXT: ASHR * T41.W, T20.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T19.X, T20.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T37.Y, T37.X, literal.y,
|
|
; EG-NEXT: ASHR T41.Z, T20.W, literal.x,
|
|
; EG-NEXT: ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T41.X, T20.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T19.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T42.Z, T40.X, literal.x,
|
|
; EG-NEXT: ASHR * T20.W, T40.Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T41.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T20.Z, T40.Y, literal.x,
|
|
; EG-NEXT: ASHR * T43.W, T40.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T42.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T43.Z, T40.Z, literal.x,
|
|
; EG-NEXT: ASHR * T44.W, T40.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T43.X, T40.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T20.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T44.Z, T40.W, literal.x,
|
|
; EG-NEXT: ASHR * T45.W, T39.X, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T44.X, T40.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T43.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T45.Z, T39.X, literal.x,
|
|
; EG-NEXT: ASHR * T40.W, T39.Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T44.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T40.Z, T39.Y, literal.x,
|
|
; EG-NEXT: ASHR * T46.W, T39.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T45.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T46.Z, T39.Z, literal.x,
|
|
; EG-NEXT: ASHR * T47.W, T39.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T46.X, T39.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T40.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T47.Z, T39.W, literal.x,
|
|
; EG-NEXT: ASHR * T48.W, T38.X, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T47.X, T39.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T46.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T48.Z, T38.X, literal.x,
|
|
; EG-NEXT: ASHR * T39.W, T38.Y, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T47.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T39.Z, T38.Y, literal.x,
|
|
; EG-NEXT: ASHR * T49.W, T38.Z, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T48.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR T49.Z, T38.Z, literal.x,
|
|
; EG-NEXT: ASHR * T50.W, T38.W, literal.y,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T49.X, T38.Z, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T39.Y, PV.X, literal.y,
|
|
; EG-NEXT: ASHR * T50.Z, T38.W, literal.x,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: BFE_INT T50.X, T38.W, 0.0, literal.x,
|
|
; EG-NEXT: ASHR T49.Y, PV.X, literal.y,
|
|
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
|
|
; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
|
|
; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
|
|
; EG-NEXT: ASHR * T50.Y, PV.X, literal.y,
|
|
; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
|
|
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
|
%ext = sext <32 x i16> %load to <32 x i64>
|
|
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; These trigger undefined register machine verifier errors
|
|
|
|
; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
|
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
|
; %ext = zext <64 x i16> %load to <64 x i64>
|
|
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
|
; ret void
|
|
; }
|
|
|
|
; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
|
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
|
; %ext = sext <64 x i16> %load to <64 x i64>
|
|
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
|
; ret void
|
|
; }
|
|
|
|
attributes #0 = { nounwind }
|