1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
Sanjay Patel 6fae7ed86a [DAGCombiner] allow narrowing of add followed by truncate
trunc (add X, C ) --> add (trunc X), C'

If we're throwing away the top bits of an 'add' instruction, do it in the narrow destination type.
This makes the truncate-able opcode list identical to the sibling transform done in IR (in instcombine).

This change used to show regressions for x86, but those are gone after D55494. 
This gets us closer to deleting the x86 custom function (combineTruncatedArithmetic) 
that does almost the same thing.

Differential Revision: https://reviews.llvm.org/D55866

llvm-svn: 350006
2018-12-22 17:10:31 +00:00

320 lines
11 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; FIXME: Merge into imm.ll
; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
store volatile i16 -32768, i16 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_0.0_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
store half 0.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_imm_neg_0.0_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
store half -0.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_0.5_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
store half 0.5, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
store half -0.5, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_1.0_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
store half 1.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
store half -1.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_2.0_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
store half 2.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
store half -2.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_4.0_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
store half 4.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
store half -4.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
store half 0xH3118, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}}
; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}}
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
store half 0xHB118, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}store_literal_imm_f16:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00
; GCN: buffer_store_short [[REG]]
define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
store half 4096.0, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_0.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_0.5_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0.5
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, -0.5
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_1.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 1.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, -1.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_2.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 2.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, -2.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_4.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 4.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, -4.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16:
; VI: buffer_load_ushort [[VAL:v[0-9]+]]
; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%x = load half, half addrspace(1)* %in
%y = fadd half %x, 0.5
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}commute_add_literal_f16:
; VI: buffer_load_ushort [[VAL:v[0-9]+]]
; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%x = load half, half addrspace(1)* %in
%y = fadd half %x, 1024.0
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_1_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0xH0001
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_2_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0xH0002
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_16_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 16{{$}}
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0xH0010
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16:
; VI: v_add_u16_e32 [[REG:v[0-9]+]], -1, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
%y = add i16 %x, -1
%ybc = bitcast i16 %y to half
store half %ybc, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16:
; VI: v_add_u16_e32 [[REG:v[0-9]+]], -2, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
%y = add i16 %x, -2
%ybc = bitcast i16 %y to half
store half %ybc, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16:
; VI: v_add_u16_e32 [[REG:v[0-9]+]], -16, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
%y = add i16 %x, -16
%ybc = bitcast i16 %y to half
store half %ybc, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_63_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 63
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0xH003F
store half %y, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}add_inline_imm_64_f16:
; VI: s_load_dword [[VAL:s[0-9]+]]
; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 64
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
%y = fadd half %x, 0xH0040
store half %y, half addrspace(1)* %out
ret void
}