mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
8450c87080
Begin transitioning the X86 vector code to recognise sub(umax(a,b) ,b) or sub(a,umin(a,b)) USUBSAT patterns to make it more generic and available to all targets. This initial patch just moves the basic umin/umax patterns to DAG, removing some vector-only checks on the way - these are some of the patterns that the legalizer will try to expand back to so we can be reasonably relaxed about matching these pre-legalization. We can handle the trunc(sub(..))) variants as well, which helps with patterns where we were promoting to a wider type to detect overflow/saturation. The remaining x86 code requires some cleanup first - some of it isn't actually tested etc. I also need to resurrect D25987. Differential Revision: https://reviews.llvm.org/D96413
72 lines
2.0 KiB
LLVM
72 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
|
|
|
declare i4 @llvm.usub.sat.i4(i4, i4)
|
|
declare i8 @llvm.usub.sat.i8(i8, i8)
|
|
declare i16 @llvm.usub.sat.i16(i16, i16)
|
|
declare i32 @llvm.usub.sat.i32(i32, i32)
|
|
declare i64 @llvm.usub.sat.i64(i64, i64)
|
|
|
|
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
|
; CHECK-LABEL: func32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mul w8, w1, w2
|
|
; CHECK-NEXT: subs w8, w0, w8
|
|
; CHECK-NEXT: csel w0, wzr, w8, lo
|
|
; CHECK-NEXT: ret
|
|
%a = mul i32 %y, %z
|
|
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %a)
|
|
ret i32 %tmp
|
|
}
|
|
|
|
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
|
; CHECK-LABEL: func64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x8, x0, x2
|
|
; CHECK-NEXT: csel x0, xzr, x8, lo
|
|
; CHECK-NEXT: ret
|
|
%a = mul i64 %y, %z
|
|
%tmp = call i64 @llvm.usub.sat.i64(i64 %x, i64 %z)
|
|
ret i64 %tmp
|
|
}
|
|
|
|
define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
|
|
; CHECK-LABEL: func16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xffff
|
|
; CHECK-NEXT: mul w9, w1, w2
|
|
; CHECK-NEXT: subs w8, w8, w9, uxth
|
|
; CHECK-NEXT: csel w0, wzr, w8, lo
|
|
; CHECK-NEXT: ret
|
|
%a = mul i16 %y, %z
|
|
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
|
|
; CHECK-LABEL: func8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w0, #0xff
|
|
; CHECK-NEXT: mul w9, w1, w2
|
|
; CHECK-NEXT: subs w8, w8, w9, uxtb
|
|
; CHECK-NEXT: csel w0, wzr, w8, lo
|
|
; CHECK-NEXT: ret
|
|
%a = mul i8 %y, %z
|
|
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
|
|
ret i8 %tmp
|
|
}
|
|
|
|
define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
|
|
; CHECK-LABEL: func4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mul w9, w1, w2
|
|
; CHECK-NEXT: and w8, w0, #0xf
|
|
; CHECK-NEXT: and w9, w9, #0xf
|
|
; CHECK-NEXT: subs w8, w8, w9
|
|
; CHECK-NEXT: csel w0, wzr, w8, lo
|
|
; CHECK-NEXT: ret
|
|
%a = mul i4 %y, %z
|
|
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)
|
|
ret i4 %tmp
|
|
}
|