mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
a66bd4e046
This is a simple hack based on what's proposed in D37686, but we can extend it if needed in follow-ups. It gets us most of the FMF functionality that we want without adding any state bits to the flags. It also intentionally leaves out non-FMF flags (nsw, etc) to minimize the patch. It should provide a superset of the functionality from D46563 - the extra tests show propagation and codegen diffs for fcmp, vecreduce, and FP libcalls. The PPC log2() test shows the limits of this most basic approach - we only applied 'afn' to the last node created for the call. AFAIK, there aren't any libcall optimizations based on the flags currently, so that shouldn't make any difference. Differential Revision: https://reviews.llvm.org/D46854 llvm-svn: 332358
32 lines
1.2 KiB
LLVM
32 lines
1.2 KiB
LLVM
; REQUIRES: arm-registered-target
|
|
; REQUIRES: asserts
|
|
; RUN: llc -o /dev/null %s -debug-only=legalize-types 2>&1 | FileCheck %s
|
|
|
|
; This test check that when v4f64 gets broken down to two v2f64 it maintains
|
|
; the "nnan" flags.
|
|
|
|
; CHECK: Legalizing node: [[VFOUR:t.*]]: v4f64 = BUILD_VECTOR
|
|
; CHECK-NEXT: Analyzing result type: v4f64
|
|
; CHECK-NEXT: Split node result: [[VFOUR]]: v4f64 = BUILD_VECTOR
|
|
|
|
; CHECK: Legalizing node: [[VTWO:t.*]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legally typed node: [[VTWO]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legalizing node: t26: v2f64 = fmaxnum nnan reassoc [[VTWO]], [[VTWO]]
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
|
|
; Function Attrs: norecurse nounwind
|
|
define fastcc double @test() unnamed_addr #1 {
|
|
entry:
|
|
%0 = insertelement <4 x double> undef, double 1.0, i32 0
|
|
%1 = insertelement <4 x double> %0, double 1.0, i32 1
|
|
%2 = insertelement <4 x double> %1, double 1.0, i32 2
|
|
%3 = insertelement <4 x double> %2, double 1.0, i32 3
|
|
%4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %3)
|
|
ret double %4
|
|
}
|
|
|
|
declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>)
|