1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00
llvm-mirror/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
Petar Avramovic 495d2a275a AMDGPU/GlobalISel: Add integer med3 combines
Add signed and unsigned integer version of med3 combine.
Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1. Destination is med3
that corresponds to signedness of min/max in source.

Differential Revision: https://reviews.llvm.org/D90050
2021-04-27 11:52:23 +02:00

128 lines
4.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: test_min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
define i32 @test_min_K1max_ValK0__i32(i32 %a) {
; GFX10-LABEL: test_min_K1max_ValK0__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
ret i32 %smed
}
define i32 @test_min_K1max_K0Val__i32(i32 %a) {
; GFX10-LABEL: test_min_K1max_K0Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
ret i32 %smed
}
define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
; GFX10-LABEL: test_max_min_ValK1_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
ret i32 %smed
}
define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
; GFX10-LABEL: test_max_min_K1Val_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
ret i32 %smed
}
define i32 @test_max_K0min_ValK1__i32(i32 %a) {
; GFX10-LABEL: test_max_K0min_ValK1__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
ret i32 %smed
}
define i32 @test_max_K0min_K1Val__i32(i32 %a) {
; GFX10-LABEL: test_max_K0min_K1Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
ret i32 %smed
}
define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
%smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 -12, i16 -12>, <2 x i16> %smin)
ret <2 x i16> %smed
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_i32 s0, s2, -12
; GFX10-NEXT: s_min_i32 s0, s0, 17
; GFX10-NEXT: ; return to shader part epilog
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)