mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
2c86671523
As discussed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140729.html This is hopefully the final remaining showstopper before we can remove the 'experimental' from the reduction intrinsics. No behavior was specified for the FP min/max reductions, so we have a mess of different interpretations. There are a few potential options for the semantics of these max/min ops. I think this is the simplest based on current behavior/implementation: make the reductions inherit from the existing llvm.maxnum/minnum intrinsics. These correspond to libm fmax/fmin, and those are similar to the (now deprecated?) IEEE-754 maxNum/minNum functions (NaNs are treated as missing data). So the default expansion creates calls to libm functions. Another option would be to inherit from llvm.maximum/minimum (NaNs propagate), but most targets just crash in codegen when given those nodes because no default expansion was ever implemented AFAICT. We could also just assume 'nnan' semantics by default (we are already assuming 'nsz' semantics in the maxnum/minnum intrinsics), but some targets (AArch64, PowerPC) support the more defined behavior, so it doesn't make much sense to not allow a tighter spec. Fast-math-flags (nnan) can be used to loosen the semantics. (Note that D67507 was proposed to update the LangRef to acknowledge the more recent IEEE-754 2019 standard, but that patch seems to have stalled. If we do update based on the new standard, the reduction instructions can seamlessly inherit from whatever updates are made to the max/min intrinsics.) x86 sees a regression here on 'nnan' tests because we have underlying, longstanding bugs in FMF creation/propagation. Those need to be fixed apart from this change (for example: https://llvm.org/PR35538). The expansion sequence before this patch may not have been correct. Differential Revision: https://reviews.llvm.org/D87391
319 lines
14 KiB
LLVM
319 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -expand-reductions -S | FileCheck %s
|
|
; Tests without a target which should expand all reductions
|
|
declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
|
|
|
|
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
|
|
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
|
|
|
|
declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
|
|
declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
|
|
|
|
declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
|
|
declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
|
|
|
|
declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8>)
|
|
|
|
define i64 @add_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @add_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @mul_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @mul_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @and_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @and_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @or_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @or_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @xor_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @xor_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define float @fadd_f32(<4 x float> %vec) {
|
|
; CHECK-LABEL: @fadd_f32(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
|
|
; CHECK-LABEL: @fadd_f32_accum(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fadd_f32_strict(<4 x float> %vec) {
|
|
; CHECK-LABEL: @fadd_f32_strict(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
|
|
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
|
|
; CHECK-LABEL: @fadd_f32_strict_accum(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
|
|
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fmul_f32(<4 x float> %vec) {
|
|
; CHECK-LABEL: @fmul_f32(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
|
|
; CHECK-LABEL: @fmul_f32_accum(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fmul_f32_strict(<4 x float> %vec) {
|
|
; CHECK-LABEL: @fmul_f32_strict(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
|
|
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
|
|
; CHECK-LABEL: @fmul_f32_strict_accum(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
|
|
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
|
|
; CHECK-NEXT: ret float [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
|
|
ret float %r
|
|
}
|
|
|
|
define i64 @smax_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @smax_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @smin_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @smin_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @umax_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @umax_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
define i64 @umin_i64(<2 x i64> %vec) {
|
|
; CHECK-LABEL: @umin_i64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
|
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
|
; CHECK-NEXT: ret i64 [[TMP0]]
|
|
;
|
|
entry:
|
|
%r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
|
|
ret i64 %r
|
|
}
|
|
|
|
; FIXME: Expand using maxnum intrinsic?
|
|
|
|
define double @fmax_f64(<2 x double> %vec) {
|
|
; CHECK-LABEL: @fmax_f64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
|
|
; CHECK-NEXT: ret double [[R]]
|
|
;
|
|
entry:
|
|
%r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec)
|
|
ret double %r
|
|
}
|
|
|
|
; FIXME: Expand using minnum intrinsic?
|
|
|
|
define double @fmin_f64(<2 x double> %vec) {
|
|
; CHECK-LABEL: @fmin_f64(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
|
|
; CHECK-NEXT: ret double [[R]]
|
|
;
|
|
entry:
|
|
%r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec)
|
|
ret double %r
|
|
}
|
|
|
|
; FIXME: Why is this not expanded?
|
|
|
|
; Test when the vector size is not power of two.
|
|
define i8 @test_v3i8(<3 x i8> %a) nounwind {
|
|
; CHECK-LABEL: @test_v3i8(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
|
|
; CHECK-NEXT: ret i8 [[B]]
|
|
;
|
|
entry:
|
|
%b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
|
|
ret i8 %b
|
|
}
|