Add support for VSX scalar single-precision arithmetic in the PPC target

http://reviews.llvm.org/D9891 Following up on the VSX single precision loads and stores added earlier, this adds support for elementary arithmetic operations on single precision values in VSX registers. These instructions utilize the new VSSRC register class. Instructions added: xsaddsp xsdivsp xsmulsp xsresp xsrsqrtesp xssqrtsp xssubsp llvm-svn: 237937
2024-10-19 11:02:59 +02:00 · 2015-05-21 19:32:49 +00:00 · 2015-05-21 19:32:49 +00:00 · 78592ebe3a
commit 78592ebe3a
parent dda3f1317e
5 changed files with 306 additions and 46 deletions
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -1000,10 +1000,12 @@ let isCommutable = 1 in {
                          [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
                                                    v4i32:$XB)))]>;
  } // isCommutable
  def XXLORC : XX3Form<60, 170,
                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                       "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
                       [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
  // VSX scalar loads introduced in ISA 2.07
  let mayLoad = 1 in {
    def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
@ -1026,6 +1028,7 @@ def XXLORC : XX3Form<60, 170,
                          "stxsiwx $XT, $dst", IIC_LdStSTFD,
                          [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
  } // mayStore
  def : Pat<(f64 (extloadf32 xoaddr:$src)),
            (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
  def : Pat<(f64 (fextend f32:$src)),
@ -1042,6 +1045,39 @@ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
  // VSX Elementary Scalar FP arithmetic (SP)
  let isCommutable = 1 in {
    def XSADDSP : XX3Form<60, 0,
                          (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                          "xsaddsp $XT, $XA, $XB", IIC_VecFP,
                          [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
    def XSMULSP : XX3Form<60, 16,
                          (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                          "xsmulsp $XT, $XA, $XB", IIC_VecFP,
                          [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
  } // isCommutable
  def XSDIVSP : XX3Form<60, 24,
                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                        "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
                        [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
  def XSRESP : XX2Form<60, 26,
                        (outs vssrc:$XT), (ins vssrc:$XB),
                        "xsresp $XT, $XB", IIC_VecFP,
                        [(set f32:$XT, (PPCfre f32:$XB))]>;
  def XSSQRTSP : XX2Form<60, 11,
                        (outs vssrc:$XT), (ins vssrc:$XB),
                        "xssqrtsp $XT, $XB", IIC_FPSqrtS,
                        [(set f32:$XT, (fsqrt f32:$XB))]>;
  def XSRSQRTESP : XX2Form<60, 10,
                           (outs vssrc:$XT), (ins vssrc:$XB),
                           "xsrsqrtesp $XT, $XB", IIC_VecFP,
                           [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
  def XSSUBSP : XX3Form<60, 8,
                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
 } // AddedComplexity = 400
 } // HasP8Vector
--- a/test/CodeGen/PowerPC/vsx-elementary-arith.ll
+++ b/test/CodeGen/PowerPC/vsx-elementary-arith.ll
@ -0,0 +1,120 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
@a = global float 3.000000e+00, align 4
@b = global float 4.000000e+00, align 4
@c = global double 3.000000e+00, align 8
@d = global double 4.000000e+00, align 8
 ; Function Attrs: nounwind
 define float @emit_xsaddsp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
  %add = fadd float %0, %1
  ret float %add
 ; CHECK-LABEL: @emit_xsaddsp
 ; CHECK: xsaddsp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define float @emit_xssubsp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
  %sub = fsub float %0, %1
  ret float %sub
 ; CHECK-LABEL: @emit_xssubsp
 ; CHECK: xssubsp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define float @emit_xsdivsp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
  %div = fdiv float %0, %1
  ret float %div
 ; CHECK-LABEL: @emit_xsdivsp
 ; CHECK: xsdivsp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define float @emit_xsmulsp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
  %mul = fmul float %0, %1
  ret float %mul
 ; CHECK-LABEL: @emit_xsmulsp
 ; CHECK: xsmulsp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define float @emit_xssqrtsp() {
 entry:
  %0 = load float, float* @b, align 4
  %call = call float @sqrtf(float %0)
  ret float %call
 ; CHECK-LABEL: @emit_xssqrtsp
 ; CHECK: xssqrtsp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 declare float @sqrtf(float)
 ; Function Attrs: nounwind
 define double @emit_xsadddp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
  %add = fadd double %0, %1
  ret double %add
 ; CHECK-LABEL: @emit_xsadddp
 ; CHECK: xsadddp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define double @emit_xssubdp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
  %sub = fsub double %0, %1
  ret double %sub
 ; CHECK-LABEL: @emit_xssubdp
 ; CHECK: xssubdp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define double @emit_xsdivdp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
  %div = fdiv double %0, %1
  ret double %div
 ; CHECK-LABEL: @emit_xsdivdp
 ; CHECK: xsdivdp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define double @emit_xsmuldp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
  %mul = fmul double %0, %1
  ret double %mul
 ; CHECK-LABEL: @emit_xsmuldp
 ; CHECK: xsmuldp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define double @emit_xssqrtdp() {
 entry:
  %0 = load double, double* @d, align 8
  %call = call double @sqrt(double %0)
  ret double %call
 ; CHECK-LABEL: @emit_xssqrtdp
 ; CHECK: xssqrtdp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 declare double @sqrt(double)
--- a/test/CodeGen/PowerPC/vsx-recip-est.ll
+++ b/test/CodeGen/PowerPC/vsx-recip-est.ll
@ -0,0 +1,62 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
@a = global float 3.000000e+00, align 4
@b = global float 4.000000e+00, align 4
@c = global double 3.000000e+00, align 8
@d = global double 4.000000e+00, align 8
 ; Function Attrs: nounwind
 define float @emit_xsresp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
  %div = fdiv fast float %0, %1
  ret float %div
 ; CHECK-LABEL: @emit_xsresp
 ; CHECK: xsresp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define float @emit_xsrsqrtesp(float %f) {
 entry:
  %f.addr = alloca float, align 4
  store float %f, float* %f.addr, align 4
  %0 = load float, float* %f.addr, align 4
  %1 = load float, float* @b, align 4
  %2 = call float @llvm.sqrt.f32(float %1)
  %div = fdiv fast float %0, %2
  ret float %div
 ; CHECK-LABEL: @emit_xsrsqrtesp
 ; CHECK: xsrsqrtesp {{[0-9]+}}
 }
 ; Function Attrs: nounwind readnone
 declare float @llvm.sqrt.f32(float)
 ; Function Attrs: nounwind
 define double @emit_xsredp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
  %div = fdiv fast double %0, %1
  ret double %div
 ; CHECK-LABEL: @emit_xsredp
 ; CHECK: xsredp {{[0-9]+}}
 }
 ; Function Attrs: nounwind
 define double @emit_xsrsqrtedp(double %f) {
 entry:
  %f.addr = alloca double, align 8
  store double %f, double* %f.addr, align 8
  %0 = load double, double* %f.addr, align 8
  %1 = load double, double* @d, align 8
  %2 = call double @llvm.sqrt.f64(double %1)
  %div = fdiv fast double %0, %2
  ret double %div
 ; CHECK-LABEL: @emit_xsrsqrtedp
 ; CHECK: xsrsqrtedp {{[0-9]+}}
 }
 ; Function Attrs: nounwind readnone
 declare double @llvm.sqrt.f64(double) #1
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@ -39,6 +39,9 @@
 # CHECK: xsabsdp 7, 27
 0xf0 0xe0 0xdd 0x64
 # CHECK: xsaddsp 7, 63, 27
 0xf0 0xff 0xd8 0x04
 # CHECK: xsadddp 7, 63, 27
 0xf0 0xff 0xd9 0x04
@ -75,6 +78,9 @@
 # CHECK: xscvuxddp 7, 27
 0xf0 0xe0 0xdd 0xa0
 # CHECK: xsdivsp 7, 63, 27
 0xf0 0xff 0xd8 0xc4
 # CHECK: xsdivdp 7, 63, 27
 0xf0 0xff 0xd9 0xc4
@ -96,6 +102,9 @@
 # CHECK: xsmsubmdp 7, 63, 27
 0xf0 0xff 0xd9 0xcc
 # CHECK: xsmulsp 7, 63, 27
 0xf0 0xff 0xd8 0x84
 # CHECK: xsmuldp 7, 63, 27
 0xf0 0xff 0xd9 0x84
@ -132,15 +141,27 @@
 # CHECK: xsrdpiz 7, 27
 0xf0 0xe0 0xd9 0x64
 # CHECK: xsresp 7, 27
 0xf0 0xe0 0xd8 0x68
 # CHECK: xsredp 7, 27
 0xf0 0xe0 0xd9 0x68
 # CHECK: xsrsqrtesp 7, 27
 0xf0 0xe0 0xd8 0x28
 # CHECK: xsrsqrtedp 7, 27
 0xf0 0xe0 0xd9 0x28
 # CHECK: xssqrtsp 7, 27
 0xf0 0xe0 0xd8 0x2c
 # CHECK: xssqrtdp 7, 27
 0xf0 0xe0 0xd9 0x2c
 # CHECK: xssubsp 7, 63, 27
 0xf0 0xff 0xd8 0x44
 # CHECK: xssubdp 7, 63, 27
 0xf0 0xff 0xd9 0x44
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@ -44,6 +44,9 @@
 # CHECK-BE: xsabsdp 7, 27                      # encoding: [0xf0,0xe0,0xdd,0x64]
 # CHECK-LE: xsabsdp 7, 27                      # encoding: [0x64,0xdd,0xe0,0xf0]
            xsabsdp 7, 27
 # CHECK-BE: xsaddsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x04]
 # CHECK-LE: xsaddsp 7, 63, 27                  # encoding: [0x04,0xd8,0xff,0xf0]
            xsaddsp 7, 63, 27
 # CHECK-BE: xsadddp 7, 63, 27                  # encoding: [0xf0,0xff,0xd9,0x04]
 # CHECK-LE: xsadddp 7, 63, 27                  # encoding: [0x04,0xd9,0xff,0xf0]
            xsadddp 7, 63, 27
@ -80,6 +83,9 @@
 # CHECK-BE: xscvuxddp 7, 27                    # encoding: [0xf0,0xe0,0xdd,0xa0]
 # CHECK-LE: xscvuxddp 7, 27                    # encoding: [0xa0,0xdd,0xe0,0xf0]
            xscvuxddp 7, 27
 # CHECK-BE: xsdivsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0xc4]
 # CHECK-LE: xsdivsp 7, 63, 27                  # encoding: [0xc4,0xd8,0xff,0xf0]
            xsdivsp 7, 63, 27
 # CHECK-BE: xsdivdp 7, 63, 27                  # encoding: [0xf0,0xff,0xd9,0xc4]
 # CHECK-LE: xsdivdp 7, 63, 27                  # encoding: [0xc4,0xd9,0xff,0xf0]
            xsdivdp 7, 63, 27
@ -101,6 +107,9 @@
 # CHECK-BE: xsmsubmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0xcc]
 # CHECK-LE: xsmsubmdp 7, 63, 27                # encoding: [0xcc,0xd9,0xff,0xf0]
            xsmsubmdp 7, 63, 27
 # CHECK-BE: xsmulsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x84]
 # CHECK-LE: xsmulsp 7, 63, 27                  # encoding: [0x84,0xd8,0xff,0xf0]
            xsmulsp 7, 63, 27
 # CHECK-BE: xsmuldp 7, 63, 27                  # encoding: [0xf0,0xff,0xd9,0x84]
 # CHECK-LE: xsmuldp 7, 63, 27                  # encoding: [0x84,0xd9,0xff,0xf0]
            xsmuldp 7, 63, 27
@ -137,15 +146,27 @@
 # CHECK-BE: xsrdpiz 7, 27                      # encoding: [0xf0,0xe0,0xd9,0x64]
 # CHECK-LE: xsrdpiz 7, 27                      # encoding: [0x64,0xd9,0xe0,0xf0]
            xsrdpiz 7, 27
 # CHECK-BE: xsresp 7, 27                       # encoding: [0xf0,0xe0,0xd8,0x68]
 # CHECK-LE: xsresp 7, 27                       # encoding: [0x68,0xd8,0xe0,0xf0]
            xsresp 7, 27
 # CHECK-BE: xsredp 7, 27                       # encoding: [0xf0,0xe0,0xd9,0x68]
 # CHECK-LE: xsredp 7, 27                       # encoding: [0x68,0xd9,0xe0,0xf0]
            xsredp 7, 27
 # CHECK-BE: xsrsqrtesp 7, 27                   # encoding: [0xf0,0xe0,0xd8,0x28]
 # CHECK-LE: xsrsqrtesp 7, 27                   # encoding: [0x28,0xd8,0xe0,0xf0]
            xsrsqrtesp 7, 27
 # CHECK-BE: xsrsqrtedp 7, 27                   # encoding: [0xf0,0xe0,0xd9,0x28]
 # CHECK-LE: xsrsqrtedp 7, 27                   # encoding: [0x28,0xd9,0xe0,0xf0]
            xsrsqrtedp 7, 27
 # CHECK-BE: xssqrtsp 7, 27                     # encoding: [0xf0,0xe0,0xd8,0x2c]
 # CHECK-LE: xssqrtsp 7, 27                     # encoding: [0x2c,0xd8,0xe0,0xf0]
            xssqrtsp 7, 27
 # CHECK-BE: xssqrtdp 7, 27                     # encoding: [0xf0,0xe0,0xd9,0x2c]
 # CHECK-LE: xssqrtdp 7, 27                     # encoding: [0x2c,0xd9,0xe0,0xf0]
            xssqrtdp 7, 27
 # CHECK-BE: xssubsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x44]
 # CHECK-LE: xssubsp 7, 63, 27                  # encoding: [0x44,0xd8,0xff,0xf0]
            xssubsp 7, 63, 27
 # CHECK-BE: xssubdp 7, 63, 27                  # encoding: [0xf0,0xff,0xd9,0x44]
 # CHECK-LE: xssubdp 7, 63, 27                  # encoding: [0x44,0xd9,0xff,0xf0]
            xssubdp 7, 63, 27