1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[RISCV] Define vector widening reduction intrinsic.

Define vwredsumu/vwredsum/vfwredosum/vfwredsum

We work with @rogfer01 from BSC to come out this patch.

Authored-by: Roger Ferrer Ibanez <rofirrim@gmail.com>
Co-Authored-by: Zakk Chen <zakk.chen@sifive.com>

Differential Revision: https://reviews.llvm.org/D93807
This commit is contained in:
Monk Chiang 2020-12-26 21:21:46 +08:00
parent 5dfa98f2df
commit 20dc5d0c2f
6 changed files with 302 additions and 0 deletions

View File

@ -663,11 +663,17 @@ let TargetPrefix = "riscv" in {
defm vredmaxu : RISCVReduction;
defm vredmax : RISCVReduction;
defm vwredsumu : RISCVReduction;
defm vwredsum : RISCVReduction;
defm vfredosum : RISCVReduction;
defm vfredsum : RISCVReduction;
defm vfredmin : RISCVReduction;
defm vfredmax : RISCVReduction;
defm vfwredsum : RISCVReduction;
defm vfwredosum : RISCVReduction;
def int_riscv_vmand: RISCVBinaryAAANoMask;
def int_riscv_vmnand: RISCVBinaryAAANoMask;
def int_riscv_vmandnot: RISCVBinaryAAANoMask;

View File

@ -1799,6 +1799,22 @@ multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat =
}
}
multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> {
foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in
{
defvar wtiSEW = !mul(vti.SEW, 2);
if !le(wtiSEW, 64) then {
defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1");
defm : VPatTernary<intrinsic, instruction, "VS",
wtiM1.Vector, vti.Vector,
wtiM1.Vector, vti.Mask,
vti.SEW, vti.LMul,
wtiM1.RegClass, vti.RegClass,
wtiM1.RegClass>;
}
}
}
//===----------------------------------------------------------------------===//
// Pseudo instructions and patterns.
//===----------------------------------------------------------------------===//
@ -2138,6 +2154,12 @@ defm PseudoVREDMINU : VPseudoReductionV_VS;
defm PseudoVREDMIN : VPseudoReductionV_VS;
defm PseudoVREDMAXU : VPseudoReductionV_VS;
defm PseudoVREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.2. Vector Widening Integer Reduction Instructions
//===----------------------------------------------------------------------===//
defm PseudoVWREDSUMU : VPseudoReductionV_VS;
defm PseudoVWREDSUM : VPseudoReductionV_VS;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
@ -2148,6 +2170,12 @@ defm PseudoVFREDOSUM : VPseudoReductionV_VS;
defm PseudoVFREDSUM : VPseudoReductionV_VS;
defm PseudoVFREDMIN : VPseudoReductionV_VS;
defm PseudoVFREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm PseudoVFWREDSUM : VPseudoReductionV_VS;
defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//
@ -2630,6 +2658,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vredminu", "PseudoVREDMINU">;
defm "" : VPatReductionV_VS<"int_riscv_vredmin", "PseudoVREDMIN">;
defm "" : VPatReductionV_VS<"int_riscv_vredmaxu", "PseudoVREDMAXU">;
defm "" : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
//===----------------------------------------------------------------------===//
// 15.2. Vector Widening Integer Reduction Instructions
//===----------------------------------------------------------------------===//
defm "" : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
defm "" : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
@ -2640,6 +2674,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=
defm "" : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>;
defm "" : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
defm "" : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm "" : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>;
defm "" : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,43 @@
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
i32);
define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
i32 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
<vscale x 32 x i1>,
i32);
define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
<vscale x 32 x i1> %3,
i32 %4)
ret <vscale x 2 x float> %a
}

View File

@ -0,0 +1,85 @@
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
i64);
define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
i64 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
<vscale x 32 x i1>,
i64);
define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
<vscale x 32 x i1> %3,
i64 %4)
ret <vscale x 2 x float> %a
}
declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
<vscale x 1 x double>,
<vscale x 16 x float>,
<vscale x 1 x double>,
i64);
define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
<vscale x 1 x double> %0,
<vscale x 16 x float> %1,
<vscale x 1 x double> %2,
i64 %3)
ret <vscale x 1 x double> %a
}
declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1(
<vscale x 1 x double>,
<vscale x 16 x float>,
<vscale x 1 x double>,
<vscale x 16 x i1>,
i64);
define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1(
<vscale x 1 x double> %0,
<vscale x 16 x float> %1,
<vscale x 1 x double> %2,
<vscale x 16 x i1> %3,
i64 %4)
ret <vscale x 1 x double> %a
}

View File

@ -0,0 +1,43 @@
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
i32);
define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
i32 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
<vscale x 32 x i1>,
i32);
define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
<vscale x 32 x i1> %3,
i32 %4)
ret <vscale x 2 x float> %a
}

View File

@ -0,0 +1,85 @@
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
i64);
define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
i64 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float>,
<vscale x 32 x half>,
<vscale x 2 x float>,
<vscale x 32 x i1>,
i64);
define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
<vscale x 2 x float> %0,
<vscale x 32 x half> %1,
<vscale x 2 x float> %2,
<vscale x 32 x i1> %3,
i64 %4)
ret <vscale x 2 x float> %a
}
declare <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32(
<vscale x 1 x double>,
<vscale x 16 x float>,
<vscale x 1 x double>,
i64);
define <vscale x 1 x double> @intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
%a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32(
<vscale x 1 x double> %0,
<vscale x 16 x float> %1,
<vscale x 1 x double> %2,
i64 %3)
ret <vscale x 1 x double> %a
}
declare <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1(
<vscale x 1 x double>,
<vscale x 16 x float>,
<vscale x 1 x double>,
<vscale x 16 x i1>,
i64);
define <vscale x 1 x double> @intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
entry:
; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64
; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
%a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1(
<vscale x 1 x double> %0,
<vscale x 16 x float> %1,
<vscale x 1 x double> %2,
<vscale x 16 x i1> %3,
i64 %4)
ret <vscale x 1 x double> %a
}