mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Add some baseline immediate encoding test changes
Add some encoding checks and add a few new cases.
This commit is contained in:
parent
c60db04871
commit
56837a55b3
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,11 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
; FIXME: Merge into imm.ll
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
|
||||
store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
|
||||
@ -12,7 +13,7 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
|
||||
@ -20,7 +21,7 @@ define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
|
||||
@ -28,7 +29,7 @@ define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
|
||||
@ -36,7 +37,7 @@ define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
|
||||
@ -44,7 +45,7 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
|
||||
@ -52,7 +53,7 @@ define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
|
||||
@ -60,7 +61,7 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
|
||||
@ -68,7 +69,7 @@ define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
|
||||
@ -76,7 +77,7 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
|
||||
@ -84,7 +85,7 @@ define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
|
||||
@ -92,7 +93,7 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
|
||||
@ -100,7 +101,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
|
||||
store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
|
||||
@ -117,7 +118,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0 ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -137,8 +138,12 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
|
||||
; GFX10: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe0,0x01,0x08]
|
||||
; GFX10: buffer_store_dword [[REG]]
|
||||
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe0,0x01,0x08]
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -158,8 +163,12 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
|
||||
; GFX10: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe2,0x01,0x08]
|
||||
; GFX10: buffer_store_dword [[REG]]
|
||||
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe2,0x01,0x08]
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -180,7 +189,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -201,7 +210,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
|
||||
@ -223,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -244,7 +253,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -265,7 +274,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -286,7 +295,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
@ -325,12 +334,14 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}commute_add_literal_v2f16:
|
||||
; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x00,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00]
|
||||
|
||||
; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
|
||||
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x00,0x09,0x00,0x08]
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
|
||||
; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding
|
||||
; VI-DAG: buffer_load_dword
|
||||
; VI-NOT: and
|
||||
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
|
||||
@ -348,17 +359,17 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1{{$}}
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 ; encoding
|
||||
; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
|
||||
; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
|
||||
|
||||
; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1{{$}}
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1 ; encoding
|
||||
; VI: v_or_b32
|
||||
; VI: buffer_store_dword
|
||||
define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -369,18 +380,18 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2{{$}}
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 ; encoding
|
||||
; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
|
||||
; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
|
||||
|
||||
; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2{{$}}
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2 ; encoding
|
||||
; VI: v_or_b32
|
||||
; VI: buffer_store_dword
|
||||
define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -391,18 +402,18 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
|
||||
|
||||
; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
|
||||
; GFX9: s_load_dword [[VAL:s[0-9]+]]
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}}
|
||||
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
|
||||
; FIXME: Shouldn't need right shift and SDWA, also extra copy
|
||||
; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16{{$}}
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 ; encoding
|
||||
; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
|
||||
; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
|
||||
|
||||
; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16{{$}}
|
||||
; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16 ; encoding
|
||||
; VI: v_or_b32
|
||||
; VI: buffer_store_dword
|
||||
define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -417,7 +428,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1{{$}}
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1 ; encoding
|
||||
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
|
||||
; VI: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -434,7 +445,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
|
||||
; GFX9: buffer_store_dword [[REG]]
|
||||
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe{{$}}
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe ; encoding
|
||||
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
|
||||
; VI: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -452,7 +463,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
|
||||
|
||||
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]]
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0{{$}}
|
||||
; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0 ; encoding
|
||||
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
|
||||
; VI: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
|
||||
@ -505,4 +516,86 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
|
||||
; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
|
||||
%y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
|
||||
; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
|
||||
%y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
|
||||
; GFX9: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0]
|
||||
|
||||
; GFX10: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
|
||||
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
|
||||
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
|
||||
ret <2 x i16> %y
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
x
Reference in New Issue
Block a user