mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
5f91239343
This patch uses the mechanism from D62995 to strengthen the definitions of the reduction intrinsics by letting the scalar result/accumulator type be overloaded from the vector element type. For example: ; The LLVM LangRef specifies that the scalar result must equal the ; vector element type, but this is not checked/enforced by LLVM. declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a) This patch changes that into: declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a) Which has the type-constraint more explicit and causes LLVM to check the result type with the vector element type. Reviewers: RKSimon, arsenm, rnk, greened, aemerson Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D62996 llvm-svn: 363240
32 lines
1.2 KiB
LLVM
32 lines
1.2 KiB
LLVM
; REQUIRES: arm-registered-target
|
|
; REQUIRES: asserts
|
|
; RUN: llc -o /dev/null %s -debug-only=legalize-types 2>&1 | FileCheck %s
|
|
|
|
; This test check that when v4f64 gets broken down to two v2f64 it maintains
|
|
; the "nnan" flags.
|
|
|
|
; CHECK: Legalizing node: [[VFOUR:t.*]]: v4f64 = BUILD_VECTOR
|
|
; CHECK-NEXT: Analyzing result type: v4f64
|
|
; CHECK-NEXT: Split node result: [[VFOUR]]: v4f64 = BUILD_VECTOR
|
|
|
|
; CHECK: Legalizing node: [[VTWO:t.*]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legally typed node: [[VTWO]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legalizing node: t26: v2f64 = fmaxnum nnan reassoc [[VTWO]], [[VTWO]]
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
|
|
; Function Attrs: norecurse nounwind
|
|
define fastcc double @test() unnamed_addr #1 {
|
|
entry:
|
|
%0 = insertelement <4 x double> undef, double 1.0, i32 0
|
|
%1 = insertelement <4 x double> %0, double 1.0, i32 1
|
|
%2 = insertelement <4 x double> %1, double 1.0, i32 2
|
|
%3 = insertelement <4 x double> %2, double 1.0, i32 3
|
|
%4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %3)
|
|
ret double %4
|
|
}
|
|
|
|
declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
|