1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00
llvm-mirror/test/CodeGen/Generic/expand-experimental-reductions.ll
Sander de Smalen 5f91239343 Improve reduction intrinsics by overloading result value.
This patch uses the mechanism from D62995 to strengthen the
definitions of the reduction intrinsics by letting the scalar
result/accumulator type be overloaded from the vector element type.

For example:

  ; The LLVM LangRef specifies that the scalar result must equal the
  ; vector element type, but this is not checked/enforced by LLVM.
  declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a)

This patch changes that into:

  declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a)

Which has the type-constraint more explicit and causes LLVM to check
the result type with the vector element type.

Reviewers: RKSimon, arsenm, rnk, greened, aemerson

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D62996

llvm-svn: 363240
2019-06-13 09:37:38 +00:00

306 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -expand-reductions -S | FileCheck %s
; Tests without a target which should expand all reductions
declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
define i64 @add_i64(<2 x i64> %vec) {
; CHECK-LABEL: @add_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @mul_i64(<2 x i64> %vec) {
; CHECK-LABEL: @mul_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @and_i64(<2 x i64> %vec) {
; CHECK-LABEL: @and_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @or_i64(<2 x i64> %vec) {
; CHECK-LABEL: @or_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @xor_i64(<2 x i64> %vec) {
; CHECK-LABEL: @xor_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec)
ret i64 %r
}
define float @fadd_f32(<4 x float> %vec) {
; CHECK-LABEL: @fadd_f32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
; CHECK-NEXT: ret float [[TMP1]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
ret float %r
}
define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
; CHECK-LABEL: @fadd_f32_accum(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float %accum, [[TMP0]]
; CHECK-NEXT: ret float [[TMP1]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}
define float @fadd_f32_strict(<4 x float> %vec) {
; CHECK-LABEL: @fadd_f32_strict(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %vec)
ret float %r
}
define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
; CHECK-LABEL: @fadd_f32_strict_accum(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}
define float @fmul_f32(<4 x float> %vec) {
; CHECK-LABEL: @fmul_f32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
; CHECK-NEXT: ret float [[TMP1]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
ret float %r
}
define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
; CHECK-LABEL: @fmul_f32_accum(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float %accum, [[TMP0]]
; CHECK-NEXT: ret float [[TMP1]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}
define float @fmul_f32_strict(<4 x float> %vec) {
; CHECK-LABEL: @fmul_f32_strict(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %vec)
ret float %r
}
define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
; CHECK-LABEL: @fmul_f32_strict_accum(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}
define i64 @smax_i64(<2 x i64> %vec) {
; CHECK-LABEL: @smax_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @smin_i64(<2 x i64> %vec) {
; CHECK-LABEL: @smin_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @umax_i64(<2 x i64> %vec) {
; CHECK-LABEL: @umax_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
ret i64 %r
}
define i64 @umin_i64(<2 x i64> %vec) {
; CHECK-LABEL: @umin_i64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
ret i64 %r
}
define double @fmax_f64(<2 x double> %vec) {
; CHECK-LABEL: @fmax_f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
%r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec)
ret double %r
}
define double @fmin_f64(<2 x double> %vec) {
; CHECK-LABEL: @fmin_f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
%r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec)
ret double %r
}