mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
a66bd4e046
This is a simple hack based on what's proposed in D37686, but we can extend it if needed in follow-ups. It gets us most of the FMF functionality that we want without adding any state bits to the flags. It also intentionally leaves out non-FMF flags (nsw, etc) to minimize the patch. It should provide a superset of the functionality from D46563 - the extra tests show propagation and codegen diffs for fcmp, vecreduce, and FP libcalls. The PPC log2() test shows the limits of this most basic approach - we only applied 'afn' to the last node created for the call. AFAIK, there aren't any libcall optimizations based on the flags currently, so that shouldn't make any difference. Differential Revision: https://reviews.llvm.org/D46854 llvm-svn: 332358
37 lines
1.2 KiB
LLVM
37 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
|
|
|
|
declare <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y)
|
|
declare <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y)
|
|
|
|
define <4 x double> @via_minnum(<4 x double> %x, <4 x double> %y) {
|
|
; CHECK-LABEL: via_minnum:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%z = call fast <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
|
|
ret <4 x double> %z
|
|
}
|
|
|
|
define <4 x double> @via_maxnum(<4 x double> %x, <4 x double> %y) {
|
|
; CHECK-LABEL: via_maxnum:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%z = call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
|
|
ret <4 x double> %z
|
|
}
|
|
|
|
define <4 x double> @via_fcmp(<4 x double> %x, <4 x double> %y) {
|
|
; CHECK-LABEL: via_fcmp:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vminpd %ymm0, %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%c = fcmp ule <4 x double> %x, %y
|
|
%z = select <4 x i1> %c, <4 x double> %x, <4 x double> %y
|
|
ret <4 x double> %z
|
|
}
|
|
|
|
|