1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

Add support for VSX scalar single-precision arithmetic in the PPC target

http://reviews.llvm.org/D9891
Following up on the VSX single precision loads and stores added earlier, this
adds support for elementary arithmetic operations on single precision values
in VSX registers. These instructions utilize the new VSSRC register class.
Instructions added:
xsaddsp
xsdivsp
xsmulsp
xsresp
xsrsqrtesp
xssqrtsp
xssubsp

llvm-svn: 237937
This commit is contained in:
Nemanja Ivanovic 2015-05-21 19:32:49 +00:00
parent dda3f1317e
commit 78592ebe3a
5 changed files with 306 additions and 46 deletions

View File

@ -1000,10 +1000,12 @@ let isCommutable = 1 in {
[(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
v4i32:$XB)))]>; v4i32:$XB)))]>;
} // isCommutable } // isCommutable
def XXLORC : XX3Form<60, 170, def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral, "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07 // VSX scalar loads introduced in ISA 2.07
let mayLoad = 1 in { let mayLoad = 1 in {
def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
@ -1026,6 +1028,7 @@ def XXLORC : XX3Form<60, 170,
"stxsiwx $XT, $dst", IIC_LdStSTFD, "stxsiwx $XT, $dst", IIC_LdStSTFD,
[(PPCstfiwx f64:$XT, xoaddr:$dst)]>; [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore } // mayStore
def : Pat<(f64 (extloadf32 xoaddr:$src)), def : Pat<(f64 (extloadf32 xoaddr:$src)),
(COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
def : Pat<(f64 (fextend f32:$src)), def : Pat<(f64 (fextend f32:$src)),
@ -1042,6 +1045,39 @@ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
(SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
(SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
// VSX Elementary Scalar FP arithmetic (SP)
let isCommutable = 1 in {
def XSADDSP : XX3Form<60, 0,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsaddsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
def XSMULSP : XX3Form<60, 16,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsmulsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
} // isCommutable
def XSDIVSP : XX3Form<60, 24,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
[(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
def XSRESP : XX2Form<60, 26,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
def XSSQRTSP : XX2Form<60, 11,
(outs vssrc:$XT), (ins vssrc:$XB),
"xssqrtsp $XT, $XB", IIC_FPSqrtS,
[(set f32:$XT, (fsqrt f32:$XB))]>;
def XSRSQRTESP : XX2Form<60, 10,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
def XSSUBSP : XX3Form<60, 8,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xssubsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
} // AddedComplexity = 400 } // AddedComplexity = 400
} // HasP8Vector } // HasP8Vector

View File

@ -0,0 +1,120 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
@a = global float 3.000000e+00, align 4
@b = global float 4.000000e+00, align 4
@c = global double 3.000000e+00, align 8
@d = global double 4.000000e+00, align 8
; Function Attrs: nounwind
define float @emit_xsaddsp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%add = fadd float %0, %1
ret float %add
; CHECK-LABEL: @emit_xsaddsp
; CHECK: xsaddsp {{[0-9]+}}
}
; Function Attrs: nounwind
define float @emit_xssubsp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%sub = fsub float %0, %1
ret float %sub
; CHECK-LABEL: @emit_xssubsp
; CHECK: xssubsp {{[0-9]+}}
}
; Function Attrs: nounwind
define float @emit_xsdivsp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%div = fdiv float %0, %1
ret float %div
; CHECK-LABEL: @emit_xsdivsp
; CHECK: xsdivsp {{[0-9]+}}
}
; Function Attrs: nounwind
define float @emit_xsmulsp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%mul = fmul float %0, %1
ret float %mul
; CHECK-LABEL: @emit_xsmulsp
; CHECK: xsmulsp {{[0-9]+}}
}
; Function Attrs: nounwind
define float @emit_xssqrtsp() {
entry:
%0 = load float, float* @b, align 4
%call = call float @sqrtf(float %0)
ret float %call
; CHECK-LABEL: @emit_xssqrtsp
; CHECK: xssqrtsp {{[0-9]+}}
}
; Function Attrs: nounwind
declare float @sqrtf(float)
; Function Attrs: nounwind
define double @emit_xsadddp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%add = fadd double %0, %1
ret double %add
; CHECK-LABEL: @emit_xsadddp
; CHECK: xsadddp {{[0-9]+}}
}
; Function Attrs: nounwind
define double @emit_xssubdp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%sub = fsub double %0, %1
ret double %sub
; CHECK-LABEL: @emit_xssubdp
; CHECK: xssubdp {{[0-9]+}}
}
; Function Attrs: nounwind
define double @emit_xsdivdp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%div = fdiv double %0, %1
ret double %div
; CHECK-LABEL: @emit_xsdivdp
; CHECK: xsdivdp {{[0-9]+}}
}
; Function Attrs: nounwind
define double @emit_xsmuldp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%mul = fmul double %0, %1
ret double %mul
; CHECK-LABEL: @emit_xsmuldp
; CHECK: xsmuldp {{[0-9]+}}
}
; Function Attrs: nounwind
define double @emit_xssqrtdp() {
entry:
%0 = load double, double* @d, align 8
%call = call double @sqrt(double %0)
ret double %call
; CHECK-LABEL: @emit_xssqrtdp
; CHECK: xssqrtdp {{[0-9]+}}
}
; Function Attrs: nounwind
declare double @sqrt(double)

View File

@ -0,0 +1,62 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
@a = global float 3.000000e+00, align 4
@b = global float 4.000000e+00, align 4
@c = global double 3.000000e+00, align 8
@d = global double 4.000000e+00, align 8
; Function Attrs: nounwind
define float @emit_xsresp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%div = fdiv fast float %0, %1
ret float %div
; CHECK-LABEL: @emit_xsresp
; CHECK: xsresp {{[0-9]+}}
}
; Function Attrs: nounwind
define float @emit_xsrsqrtesp(float %f) {
entry:
%f.addr = alloca float, align 4
store float %f, float* %f.addr, align 4
%0 = load float, float* %f.addr, align 4
%1 = load float, float* @b, align 4
%2 = call float @llvm.sqrt.f32(float %1)
%div = fdiv fast float %0, %2
ret float %div
; CHECK-LABEL: @emit_xsrsqrtesp
; CHECK: xsrsqrtesp {{[0-9]+}}
}
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float)
; Function Attrs: nounwind
define double @emit_xsredp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%div = fdiv fast double %0, %1
ret double %div
; CHECK-LABEL: @emit_xsredp
; CHECK: xsredp {{[0-9]+}}
}
; Function Attrs: nounwind
define double @emit_xsrsqrtedp(double %f) {
entry:
%f.addr = alloca double, align 8
store double %f, double* %f.addr, align 8
%0 = load double, double* %f.addr, align 8
%1 = load double, double* @d, align 8
%2 = call double @llvm.sqrt.f64(double %1)
%div = fdiv fast double %0, %2
ret double %div
; CHECK-LABEL: @emit_xsrsqrtedp
; CHECK: xsrsqrtedp {{[0-9]+}}
}
; Function Attrs: nounwind readnone
declare double @llvm.sqrt.f64(double) #1

View File

@ -39,6 +39,9 @@
# CHECK: xsabsdp 7, 27 # CHECK: xsabsdp 7, 27
0xf0 0xe0 0xdd 0x64 0xf0 0xe0 0xdd 0x64
# CHECK: xsaddsp 7, 63, 27
0xf0 0xff 0xd8 0x04
# CHECK: xsadddp 7, 63, 27 # CHECK: xsadddp 7, 63, 27
0xf0 0xff 0xd9 0x04 0xf0 0xff 0xd9 0x04
@ -75,6 +78,9 @@
# CHECK: xscvuxddp 7, 27 # CHECK: xscvuxddp 7, 27
0xf0 0xe0 0xdd 0xa0 0xf0 0xe0 0xdd 0xa0
# CHECK: xsdivsp 7, 63, 27
0xf0 0xff 0xd8 0xc4
# CHECK: xsdivdp 7, 63, 27 # CHECK: xsdivdp 7, 63, 27
0xf0 0xff 0xd9 0xc4 0xf0 0xff 0xd9 0xc4
@ -96,6 +102,9 @@
# CHECK: xsmsubmdp 7, 63, 27 # CHECK: xsmsubmdp 7, 63, 27
0xf0 0xff 0xd9 0xcc 0xf0 0xff 0xd9 0xcc
# CHECK: xsmulsp 7, 63, 27
0xf0 0xff 0xd8 0x84
# CHECK: xsmuldp 7, 63, 27 # CHECK: xsmuldp 7, 63, 27
0xf0 0xff 0xd9 0x84 0xf0 0xff 0xd9 0x84
@ -132,15 +141,27 @@
# CHECK: xsrdpiz 7, 27 # CHECK: xsrdpiz 7, 27
0xf0 0xe0 0xd9 0x64 0xf0 0xe0 0xd9 0x64
# CHECK: xsresp 7, 27
0xf0 0xe0 0xd8 0x68
# CHECK: xsredp 7, 27 # CHECK: xsredp 7, 27
0xf0 0xe0 0xd9 0x68 0xf0 0xe0 0xd9 0x68
# CHECK: xsrsqrtesp 7, 27
0xf0 0xe0 0xd8 0x28
# CHECK: xsrsqrtedp 7, 27 # CHECK: xsrsqrtedp 7, 27
0xf0 0xe0 0xd9 0x28 0xf0 0xe0 0xd9 0x28
# CHECK: xssqrtsp 7, 27
0xf0 0xe0 0xd8 0x2c
# CHECK: xssqrtdp 7, 27 # CHECK: xssqrtdp 7, 27
0xf0 0xe0 0xd9 0x2c 0xf0 0xe0 0xd9 0x2c
# CHECK: xssubsp 7, 63, 27
0xf0 0xff 0xd8 0x44
# CHECK: xssubdp 7, 63, 27 # CHECK: xssubdp 7, 63, 27
0xf0 0xff 0xd9 0x44 0xf0 0xff 0xd9 0x44

View File

@ -44,6 +44,9 @@
# CHECK-BE: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64] # CHECK-BE: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64]
# CHECK-LE: xsabsdp 7, 27 # encoding: [0x64,0xdd,0xe0,0xf0] # CHECK-LE: xsabsdp 7, 27 # encoding: [0x64,0xdd,0xe0,0xf0]
xsabsdp 7, 27 xsabsdp 7, 27
# CHECK-BE: xsaddsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x04]
# CHECK-LE: xsaddsp 7, 63, 27 # encoding: [0x04,0xd8,0xff,0xf0]
xsaddsp 7, 63, 27
# CHECK-BE: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04] # CHECK-BE: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04]
# CHECK-LE: xsadddp 7, 63, 27 # encoding: [0x04,0xd9,0xff,0xf0] # CHECK-LE: xsadddp 7, 63, 27 # encoding: [0x04,0xd9,0xff,0xf0]
xsadddp 7, 63, 27 xsadddp 7, 63, 27
@ -80,6 +83,9 @@
# CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0] # CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
# CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0] # CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0]
xscvuxddp 7, 27 xscvuxddp 7, 27
# CHECK-BE: xsdivsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0xc4]
# CHECK-LE: xsdivsp 7, 63, 27 # encoding: [0xc4,0xd8,0xff,0xf0]
xsdivsp 7, 63, 27
# CHECK-BE: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4] # CHECK-BE: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4]
# CHECK-LE: xsdivdp 7, 63, 27 # encoding: [0xc4,0xd9,0xff,0xf0] # CHECK-LE: xsdivdp 7, 63, 27 # encoding: [0xc4,0xd9,0xff,0xf0]
xsdivdp 7, 63, 27 xsdivdp 7, 63, 27
@ -101,6 +107,9 @@
# CHECK-BE: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc] # CHECK-BE: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
# CHECK-LE: xsmsubmdp 7, 63, 27 # encoding: [0xcc,0xd9,0xff,0xf0] # CHECK-LE: xsmsubmdp 7, 63, 27 # encoding: [0xcc,0xd9,0xff,0xf0]
xsmsubmdp 7, 63, 27 xsmsubmdp 7, 63, 27
# CHECK-BE: xsmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x84]
# CHECK-LE: xsmulsp 7, 63, 27 # encoding: [0x84,0xd8,0xff,0xf0]
xsmulsp 7, 63, 27
# CHECK-BE: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84] # CHECK-BE: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84]
# CHECK-LE: xsmuldp 7, 63, 27 # encoding: [0x84,0xd9,0xff,0xf0] # CHECK-LE: xsmuldp 7, 63, 27 # encoding: [0x84,0xd9,0xff,0xf0]
xsmuldp 7, 63, 27 xsmuldp 7, 63, 27
@ -137,15 +146,27 @@
# CHECK-BE: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64] # CHECK-BE: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64]
# CHECK-LE: xsrdpiz 7, 27 # encoding: [0x64,0xd9,0xe0,0xf0] # CHECK-LE: xsrdpiz 7, 27 # encoding: [0x64,0xd9,0xe0,0xf0]
xsrdpiz 7, 27 xsrdpiz 7, 27
# CHECK-BE: xsresp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x68]
# CHECK-LE: xsresp 7, 27 # encoding: [0x68,0xd8,0xe0,0xf0]
xsresp 7, 27
# CHECK-BE: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68] # CHECK-BE: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68]
# CHECK-LE: xsredp 7, 27 # encoding: [0x68,0xd9,0xe0,0xf0] # CHECK-LE: xsredp 7, 27 # encoding: [0x68,0xd9,0xe0,0xf0]
xsredp 7, 27 xsredp 7, 27
# CHECK-BE: xsrsqrtesp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x28]
# CHECK-LE: xsrsqrtesp 7, 27 # encoding: [0x28,0xd8,0xe0,0xf0]
xsrsqrtesp 7, 27
# CHECK-BE: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28] # CHECK-BE: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28]
# CHECK-LE: xsrsqrtedp 7, 27 # encoding: [0x28,0xd9,0xe0,0xf0] # CHECK-LE: xsrsqrtedp 7, 27 # encoding: [0x28,0xd9,0xe0,0xf0]
xsrsqrtedp 7, 27 xsrsqrtedp 7, 27
# CHECK-BE: xssqrtsp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x2c]
# CHECK-LE: xssqrtsp 7, 27 # encoding: [0x2c,0xd8,0xe0,0xf0]
xssqrtsp 7, 27
# CHECK-BE: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c] # CHECK-BE: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c]
# CHECK-LE: xssqrtdp 7, 27 # encoding: [0x2c,0xd9,0xe0,0xf0] # CHECK-LE: xssqrtdp 7, 27 # encoding: [0x2c,0xd9,0xe0,0xf0]
xssqrtdp 7, 27 xssqrtdp 7, 27
# CHECK-BE: xssubsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x44]
# CHECK-LE: xssubsp 7, 63, 27 # encoding: [0x44,0xd8,0xff,0xf0]
xssubsp 7, 63, 27
# CHECK-BE: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44] # CHECK-BE: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44]
# CHECK-LE: xssubdp 7, 63, 27 # encoding: [0x44,0xd9,0xff,0xf0] # CHECK-LE: xssubdp 7, 63, 27 # encoding: [0x44,0xd9,0xff,0xf0]
xssubdp 7, 63, 27 xssubdp 7, 63, 27