[RISCV] Define vector widening reduction intrinsic.

Define vwredsumu/vwredsum/vfwredosum/vfwredsum We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez <rofirrim@gmail.com> Co-Authored-by: Zakk Chen <zakk.chen@sifive.com> Differential Revision: https://reviews.llvm.org/D93807
2024-11-25 20:23:11 +01:00 · 2020-12-26 21:21:46 +08:00 · 2020-12-26 21:21:46 +08:00 · 20dc5d0c2f
commit 20dc5d0c2f
parent 5dfa98f2df
6 changed files with 302 additions and 0 deletions
--- a/include/llvm/IR/IntrinsicsRISCV.td
+++ b/include/llvm/IR/IntrinsicsRISCV.td
@ -663,11 +663,17 @@ let TargetPrefix = "riscv" in {
  defm vredmaxu : RISCVReduction;
  defm vredmax : RISCVReduction;

+  defm vwredsumu : RISCVReduction;
+  defm vwredsum : RISCVReduction;
+
  defm vfredosum : RISCVReduction;
  defm vfredsum : RISCVReduction;
  defm vfredmin : RISCVReduction;
  defm vfredmax : RISCVReduction;

+  defm vfwredsum : RISCVReduction;
+  defm vfwredosum : RISCVReduction;
+
  def int_riscv_vmand: RISCVBinaryAAANoMask;
  def int_riscv_vmnand: RISCVBinaryAAANoMask;
  def int_riscv_vmandnot: RISCVBinaryAAANoMask;
--- a/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@ -1799,6 +1799,22 @@ multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat =
  }
 }

+multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> {
+  foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in
+  {
+    defvar wtiSEW = !mul(vti.SEW, 2);
+    if !le(wtiSEW, 64) then {
+      defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1");
+      defm : VPatTernary<intrinsic, instruction, "VS",
+                         wtiM1.Vector, vti.Vector,
+                         wtiM1.Vector, vti.Mask,
+                         vti.SEW, vti.LMul,
+                         wtiM1.RegClass, vti.RegClass,
+                         wtiM1.RegClass>;
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Pseudo instructions and patterns.
 //===----------------------------------------------------------------------===//
@ -2138,6 +2154,12 @@ defm PseudoVREDMINU    : VPseudoReductionV_VS;
 defm PseudoVREDMIN     : VPseudoReductionV_VS;
 defm PseudoVREDMAXU    : VPseudoReductionV_VS;
 defm PseudoVREDMAX     : VPseudoReductionV_VS;
+
+//===----------------------------------------------------------------------===//
+// 15.2. Vector Widening Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVWREDSUMU   : VPseudoReductionV_VS;
+defm PseudoVWREDSUM    : VPseudoReductionV_VS;
 } // Predicates = [HasStdExtV]

 let Predicates = [HasStdExtV, HasStdExtF] in {
@ -2148,6 +2170,12 @@ defm PseudoVFREDOSUM   : VPseudoReductionV_VS;
 defm PseudoVFREDSUM    : VPseudoReductionV_VS;
 defm PseudoVFREDMIN    : VPseudoReductionV_VS;
 defm PseudoVFREDMAX    : VPseudoReductionV_VS;
+
+//===----------------------------------------------------------------------===//
+// 15.4. Vector Widening Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm PseudoVFWREDSUM   : VPseudoReductionV_VS;
+defm PseudoVFWREDOSUM  : VPseudoReductionV_VS;
 } // Predicates = [HasStdExtV, HasStdExtF]

 //===----------------------------------------------------------------------===//
@ -2630,6 +2658,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vredminu", "PseudoVREDMINU">;
 defm "" : VPatReductionV_VS<"int_riscv_vredmin", "PseudoVREDMIN">;
 defm "" : VPatReductionV_VS<"int_riscv_vredmaxu", "PseudoVREDMAXU">;
 defm "" : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
+
+//===----------------------------------------------------------------------===//
+// 15.2. Vector Widening Integer Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
+defm "" : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
 } // Predicates = [HasStdExtV]

 let Predicates = [HasStdExtV, HasStdExtF] in {
@ -2640,6 +2674,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=
 defm "" : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>;
 defm "" : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
 defm "" : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
+
+//===----------------------------------------------------------------------===//
+// 15.4. Vector Widening Floating-Point Reduction Instructions
+//===----------------------------------------------------------------------===//
+defm "" : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>;
+defm "" : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
 } // Predicates = [HasStdExtV, HasStdExtF]

 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
+++ b/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
@ -0,0 +1,43 @@
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 32 x i1> %3,
+    i32 %4)
+
+  ret <vscale x 2 x float> %a
+}
--- a/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
+++ b/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
@ -0,0 +1,85 @@
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 32 x i1> %3,
+    i64 %4)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
+  <vscale x 1 x double>,
+  <vscale x 16 x float>,
+  <vscale x 1 x double>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
+    <vscale x 1 x double> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 1 x double> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1(
+  <vscale x 1 x double>,
+  <vscale x 16 x float>,
+  <vscale x 1 x double>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
+; CHECK:       vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1(
+    <vscale x 1 x double> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 16 x i1> %3,
+    i64 %4)
+
+  ret <vscale x 1 x double> %a
+}
--- a/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll
+++ b/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll
@ -0,0 +1,43 @@
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 32 x i1> %3,
+    i32 %4)
+
+  ret <vscale x 2 x float> %a
+}
--- a/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll
+++ b/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll
@ -0,0 +1,85 @@
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
+  <vscale x 2 x float>,
+  <vscale x 32 x half>,
+  <vscale x 2 x float>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1(
+    <vscale x 2 x float> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 32 x i1> %3,
+    i64 %4)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32(
+  <vscale x 1 x double>,
+  <vscale x 16 x float>,
+  <vscale x 1 x double>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+  %a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32(
+    <vscale x 1 x double> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 1 x double> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1(
+  <vscale x 1 x double>,
+  <vscale x 16 x float>,
+  <vscale x 1 x double>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+entry:
+; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64
+; CHECK:       vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu
+; CHECK:       vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t
+  %a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1(
+    <vscale x 1 x double> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 16 x i1> %3,
+    i64 %4)
+
+  ret <vscale x 1 x double> %a
+}