mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
248de533d0
These used to consistently be zeroed pre-gfx9, but gfx9 made the situation complicated since now some still do and some don't. This also manages to pick up a few cases that the pattern fails to optimize away. We handle some cases with instruction patterns, but some get through. In particular this improves the integer cases.
287 lines
8.1 KiB
LLVM
287 lines
8.1 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX900 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX906 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
|
|
|
; GCN-LABEL: {{^}}shl_i16:
|
|
; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @shl_i16(i16 %x, i16 %y) {
|
|
%res = shl i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}lshr_i16:
|
|
; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @lshr_i16(i16 %x, i16 %y) {
|
|
%res = lshr i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ashr_i16:
|
|
; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @ashr_i16(i16 %x, i16 %y) {
|
|
%res = ashr i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}add_u16:
|
|
; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @add_u16(i16 %x, i16 %y) {
|
|
%res = add i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}sub_u16:
|
|
; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @sub_u16(i16 %x, i16 %y) {
|
|
%res = sub i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}mul_lo_u16:
|
|
; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @mul_lo_u16(i16 %x, i16 %y) {
|
|
%res = mul i16 %x, %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}min_u16:
|
|
; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @min_u16(i16 %x, i16 %y) {
|
|
%cmp = icmp ule i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}min_i16:
|
|
; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @min_i16(i16 %x, i16 %y) {
|
|
%cmp = icmp sle i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}max_u16:
|
|
; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @max_u16(i16 %x, i16 %y) {
|
|
%cmp = icmp uge i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}max_i16:
|
|
; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i16 @max_i16(i16 %x, i16 %y) {
|
|
%cmp = icmp sge i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
ret i16 %res
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}shl_i16_zext_i32:
|
|
; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
|
|
%res = shl i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}lshr_i16_zext_i32:
|
|
; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
|
|
%res = lshr i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ashr_i16_zext_i32:
|
|
; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
|
|
%res = ashr i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}add_u16_zext_i32:
|
|
; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
|
|
%res = add i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}sub_u16_zext_i32:
|
|
; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
|
|
%res = sub i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}mul_lo_u16_zext_i32:
|
|
; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
|
|
%res = mul i16 %x, %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}min_u16_zext_i32:
|
|
; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
|
|
%cmp = icmp ule i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}min_i16_zext_i32:
|
|
; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
|
|
%cmp = icmp sle i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}max_u16_zext_i32:
|
|
; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
|
|
%cmp = icmp uge i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}max_i16_zext_i32:
|
|
; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]],
|
|
; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define i32 @max_i16_zext_i32(i16 %x, i16 %y) {
|
|
%cmp = icmp sge i16 %x, %y
|
|
%res = select i1 %cmp, i16 %x, i16 %y
|
|
%zext = zext i16 %res to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}zext_fadd_f16:
|
|
; GFX8: v_add_f16_e32 [[ADD:v[0-9]+]], v0, v1
|
|
; GFX8-NEXT: s_setpc_b64
|
|
|
|
; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], v0, v1
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
; GFX10: v_add_f16_e32 [[ADD:v[0-9]+]], v0, v1
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, [[ADD]]
|
|
define i32 @zext_fadd_f16(half %x, half %y) {
|
|
%add = fadd half %x, %y
|
|
%cast = bitcast half %add to i16
|
|
%zext = zext i16 %cast to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}zext_fma_f16:
|
|
; GFX8: v_fma_f16 [[FMA:v[0-9]+]], v0, v1, v2
|
|
; GFX8-NEXT: s_setpc_b64
|
|
|
|
; GFX9: v_fma_f16 [[FMA:v[0-9]+]], v0, v1, v2
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]]
|
|
|
|
; GFX10: v_fmac_f16_e32 [[FMA:v[0-9]+]], v0, v1
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]]
|
|
define i32 @zext_fma_f16(half %x, half %y, half %z) {
|
|
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
|
|
%cast = bitcast half %fma to i16
|
|
%zext = zext i16 %cast to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}zext_div_fixup_f16:
|
|
; GFX8: v_div_fixup_f16 v0, v0, v1, v2
|
|
; GFX8-NEXT: s_setpc_b64
|
|
|
|
; GFX9: v_div_fixup_f16 v0, v0, v1, v2
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
|
|
; GFX10: v_div_fixup_f16 v0, v0, v1, v2
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
|
|
%div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z)
|
|
%cast = bitcast half %div.fixup to i16
|
|
%zext = zext i16 %cast to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; We technically could eliminate the and on gfx9 here but we don't try
|
|
; to inspect the source of the fptrunc. We're only worried about cases
|
|
; that lower to v_fma_mix* instructions.
|
|
|
|
; GCN-LABEL: {{^}}zext_fptrunc_f16:
|
|
; GFX8: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64
|
|
|
|
; GFX9: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
; GFX10: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
define i32 @zext_fptrunc_f16(float %x) {
|
|
%fptrunc = fptrunc float %x to half
|
|
%cast = bitcast half %fptrunc to i16
|
|
%zext = zext i16 %cast to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}zext_fptrunc_fma_f16:
|
|
; GFX8: v_fma_f32 v0, v0, v1, v2
|
|
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX8-NEXT: s_setpc_b64
|
|
|
|
; GFX900: v_fma_f32 v0, v0, v1, v2
|
|
; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX900-NEXT: s_setpc_b64
|
|
|
|
; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2
|
|
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
|
|
; GFX10: v_fma_mixlo_f16 v0, v0, v1, v2
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
define i32 @zext_fptrunc_fma_f16(float %x, float %y, float %z) {
|
|
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
|
|
%fptrunc = fptrunc float %fma to half
|
|
%cast = bitcast half %fptrunc to i16
|
|
%zext = zext i16 %cast to i32
|
|
ret i32 %zext
|
|
}
|
|
|
|
declare half @llvm.amdgcn.div.fixup.f16(half, half, half)
|
|
declare half @llvm.fma.f16(half, half, half)
|
|
declare float @llvm.fma.f32(float, float, float)
|