mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic
This change implements the following vsx instructions: - quad-precision move xscpsgnqp, xsabsqp, xsnegqp, xsnabsqp - quad-precision fp-arithmetic xsaddqp(o) xsdivqp(o) xsmulqp(o) xssqrtqp(o) xssubqp(o) xsmaddqp(o) xsmsubqp(o) xsnmaddqp(o) xsnmsubqp(o) 22 instructions Thanks Nemanja and Kit for careful review and invaluable discussion! Reviewers: hal, nemanja, kbarton, tjablin, amehsan http://reviews.llvm.org/D16110 llvm-svn: 264565
This commit is contained in:
parent
fc74c1202b
commit
f63e6b8f62
@ -1819,6 +1819,55 @@ let Predicates = [HasP9Vector] in {
|
||||
: XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
|
||||
!strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
|
||||
|
||||
// [PO VRT VRA VRB XO /]
|
||||
class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
|
||||
list<dag> pattern>
|
||||
: XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
|
||||
!strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
|
||||
|
||||
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
|
||||
class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
|
||||
list<dag> pattern>
|
||||
: X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Quad-Precision Scalar Move Instructions:
|
||||
|
||||
// Copy Sign
|
||||
def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", []>;
|
||||
|
||||
// Absolute/Negative-Absolute/Negate
|
||||
def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp" , []>;
|
||||
def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", []>;
|
||||
def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp" , []>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Quad-Precision Scalar Floating-Point Arithmetic Instructions:
|
||||
|
||||
// Add/Divide/Multiply/Subtract
|
||||
def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp" , []>;
|
||||
def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", []>;
|
||||
def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp" , []>;
|
||||
def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>;
|
||||
def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp" , []>;
|
||||
def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", []>;
|
||||
def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , []>;
|
||||
def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>;
|
||||
|
||||
// Square-Root
|
||||
def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp" , []>;
|
||||
def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>;
|
||||
|
||||
// (Negative) Multiply-{Add/Subtract}
|
||||
def XSMADDQP : X_VT5_VA5_VB5 <63, 388, "xsmaddqp" , []>;
|
||||
def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>;
|
||||
def XSMSUBQP : X_VT5_VA5_VB5 <63, 420, "xsmsubqp" , []>;
|
||||
def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>;
|
||||
def XSNMADDQP : X_VT5_VA5_VB5 <63, 452, "xsnmaddqp" , []>;
|
||||
def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>;
|
||||
def XSNMSUBQP : X_VT5_VA5_VB5 <63, 484, "xsnmsubqp" , []>;
|
||||
def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Quad/Double-Precision Compare Instructions:
|
||||
|
||||
|
@ -141,6 +141,128 @@ Altivec:
|
||||
VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
|
||||
|
||||
VSX:
|
||||
- QP Copy Sign: xscpsgnqp
|
||||
. Similar to xscpsgndp
|
||||
. (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
|
||||
|
||||
- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
|
||||
. Similar to xsabsdp/xsnabsdp/xsnegdp
|
||||
. (set f128:$vT, (fabs f128:$vB)) // xsabsqp
|
||||
(set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp
|
||||
(set f128:$vT, (fneg f128:$vB)) // xsnegqp
|
||||
|
||||
- QP Add/Divide/Multiply/Subtract/Square-Root:
|
||||
xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
|
||||
. Similar to xsadddp
|
||||
. isCommutable = 1
|
||||
(set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp
|
||||
(set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp
|
||||
|
||||
. isCommutable = 0
|
||||
(set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp
|
||||
(set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp
|
||||
(set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp
|
||||
|
||||
- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
|
||||
xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
|
||||
. Similar to xsrsqrtedp??
|
||||
def XSRSQRTEDP : XX2Form<60, 74,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xsrsqrtedp $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
|
||||
|
||||
. Define DAG Node in PPCInstrInfo.td:
|
||||
def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
|
||||
def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
|
||||
def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
|
||||
def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
|
||||
def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
|
||||
|
||||
DAG patterns of each instruction (PPCInstrVSX.td):
|
||||
. isCommutable = 1
|
||||
(set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo
|
||||
(set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo
|
||||
|
||||
. isCommutable = 0
|
||||
(set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo
|
||||
(set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo
|
||||
(set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo
|
||||
|
||||
- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
|
||||
. Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
|
||||
|
||||
. isCommutable = 1
|
||||
// xsmaddqp
|
||||
[(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsmsubqp
|
||||
[(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsnmaddqp
|
||||
[(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsnmsubqp
|
||||
[(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
|
||||
xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
|
||||
. Similar to xsrsqrtedp??
|
||||
|
||||
. Define DAG Node in PPCInstrInfo.td:
|
||||
def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
|
||||
|
||||
It looks like we only need to define "PPCfmarto" for these instructions,
|
||||
because according to PowerISA_V3.0, these instructions perform RTO on
|
||||
fma's result:
|
||||
xsmaddqp(o)
|
||||
v ← bfp_MULTIPLY_ADD(src1, src3, src2)
|
||||
rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
|
||||
result ← bfp_CONVERT_TO_BFP128(rnd)
|
||||
|
||||
xsmsubqp(o)
|
||||
v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
|
||||
rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
|
||||
result ← bfp_CONVERT_TO_BFP128(rnd)
|
||||
|
||||
xsnmaddqp(o)
|
||||
v ← bfp_MULTIPLY_ADD(src1,src3,src2)
|
||||
rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
|
||||
result ← bfp_CONVERT_TO_BFP128(rnd)
|
||||
|
||||
xsnmsubqp(o)
|
||||
v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
|
||||
rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
|
||||
result ← bfp_CONVERT_TO_BFP128(rnd)
|
||||
|
||||
DAG patterns of each instruction (PPCInstrVSX.td):
|
||||
. isCommutable = 1
|
||||
// xsmaddqpo
|
||||
[(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsmsubqpo
|
||||
[(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsnmaddqpo
|
||||
[(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
// xsnmsubqpo
|
||||
[(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
|
||||
AltVSXFMARel;
|
||||
|
||||
- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
|
||||
. ref: XSCMPUDP
|
||||
|
@ -542,6 +542,72 @@
|
||||
|
||||
# Power9 Instructions:
|
||||
|
||||
# CHECK: xscpsgnqp 7, 31, 27
|
||||
0xfc 0xff 0xd8 0xc8
|
||||
|
||||
# CHECK: xsabsqp 7, 27
|
||||
0xfc 0xe0 0xde 0x48
|
||||
|
||||
# CHECK: xsnegqp 7, 27
|
||||
0xfc 0xf0 0xde 0x48
|
||||
|
||||
# CHECK: xsnabsqp 7, 27
|
||||
0xfc 0xe8 0xde 0x48
|
||||
|
||||
# CHECK: xsaddqp 7, 31, 27
|
||||
0xfc 0xff 0xd8 0x08
|
||||
|
||||
# CHECK: xsaddqpo 7, 31, 27
|
||||
0xfc 0xff 0xd8 0x09
|
||||
|
||||
# CHECK: xsdivqp 7, 31, 27
|
||||
0xfc 0xff 0xdc 0x48
|
||||
|
||||
# CHECK: xsdivqpo 7, 31, 27
|
||||
0xfc 0xff 0xdc 0x49
|
||||
|
||||
# CHECK: xsmulqp 7, 31, 27
|
||||
0xfc 0xff 0xd8 0x48
|
||||
|
||||
# CHECK: xsmulqpo 7, 31, 27
|
||||
0xfc 0xff 0xd8 0x49
|
||||
|
||||
# CHECK: xssqrtqp 7, 31
|
||||
0xfc 0xfb 0xfe 0x48
|
||||
|
||||
# CHECK: xssqrtqpo 7, 31
|
||||
0xfc 0xfb 0xfe 0x49
|
||||
|
||||
# CHECK: xssubqp 7, 31, 27
|
||||
0xfc 0xff 0xdc 0x08
|
||||
|
||||
# CHECK: xssubqpo 7, 31, 27
|
||||
0xfc 0xff 0xdc 0x09
|
||||
|
||||
# CHECK: xsmaddqp 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x08
|
||||
|
||||
# CHECK: xsmaddqpo 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x09
|
||||
|
||||
# CHECK: xsmsubqp 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x48
|
||||
|
||||
# CHECK: xsmsubqpo 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x49
|
||||
|
||||
# CHECK: xsnmaddqp 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x88
|
||||
|
||||
# CHECK: xsnmaddqpo 7, 31, 27
|
||||
0xfc 0xff 0xdb 0x89
|
||||
|
||||
# CHECK: xsnmsubqp 7, 31, 27
|
||||
0xfc 0xff 0xdb 0xc8
|
||||
|
||||
# CHECK: xsnmsubqpo 7, 31, 27
|
||||
0xfc 0xff 0xdb 0xc9
|
||||
|
||||
# CHECK: xscmpoqp 6, 31, 27
|
||||
0xff 0x1f 0xd9 0x08
|
||||
|
||||
|
@ -550,6 +550,80 @@
|
||||
|
||||
# Power9 Instructions:
|
||||
|
||||
# Copy Sign
|
||||
# CHECK-BE: xscpsgnqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0xc8]
|
||||
# CHECK-LE: xscpsgnqp 7, 31, 27 # encoding: [0xc8,0xd8,0xff,0xfc]
|
||||
xscpsgnqp 7, 31, 27
|
||||
|
||||
# Absolute/Negative Absolute/Negate
|
||||
# CHECK-BE: xsabsqp 7, 27 # encoding: [0xfc,0xe0,0xde,0x48]
|
||||
# CHECK-LE: xsabsqp 7, 27 # encoding: [0x48,0xde,0xe0,0xfc]
|
||||
xsabsqp 7, 27
|
||||
# CHECK-BE: xsnegqp 7, 27 # encoding: [0xfc,0xf0,0xde,0x48]
|
||||
# CHECK-LE: xsnegqp 7, 27 # encoding: [0x48,0xde,0xf0,0xfc]
|
||||
xsnegqp 7, 27
|
||||
# CHECK-BE: xsnabsqp 7, 27 # encoding: [0xfc,0xe8,0xde,0x48]
|
||||
# CHECK-LE: xsnabsqp 7, 27 # encoding: [0x48,0xde,0xe8,0xfc]
|
||||
xsnabsqp 7, 27
|
||||
|
||||
# Add/Divide/Multiply/Square-Root/Subtract
|
||||
# CHECK-BE: xsaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x08]
|
||||
# CHECK-LE: xsaddqp 7, 31, 27 # encoding: [0x08,0xd8,0xff,0xfc]
|
||||
xsaddqp 7, 31, 27
|
||||
# CHECK-BE: xsaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x09]
|
||||
# CHECK-LE: xsaddqpo 7, 31, 27 # encoding: [0x09,0xd8,0xff,0xfc]
|
||||
xsaddqpo 7, 31, 27
|
||||
# CHECK-BE: xsdivqp 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x48]
|
||||
# CHECK-LE: xsdivqp 7, 31, 27 # encoding: [0x48,0xdc,0xff,0xfc]
|
||||
xsdivqp 7, 31, 27
|
||||
# CHECK-BE: xsdivqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x49]
|
||||
# CHECK-LE: xsdivqpo 7, 31, 27 # encoding: [0x49,0xdc,0xff,0xfc]
|
||||
xsdivqpo 7, 31, 27
|
||||
# CHECK-BE: xsmulqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x48]
|
||||
# CHECK-LE: xsmulqp 7, 31, 27 # encoding: [0x48,0xd8,0xff,0xfc]
|
||||
xsmulqp 7, 31, 27
|
||||
# CHECK-BE: xsmulqpo 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x49]
|
||||
# CHECK-LE: xsmulqpo 7, 31, 27 # encoding: [0x49,0xd8,0xff,0xfc]
|
||||
xsmulqpo 7, 31, 27
|
||||
# CHECK-BE: xssqrtqp 7, 31 # encoding: [0xfc,0xfb,0xfe,0x48]
|
||||
# CHECK-LE: xssqrtqp 7, 31 # encoding: [0x48,0xfe,0xfb,0xfc]
|
||||
xssqrtqp 7, 31
|
||||
# CHECK-BE: xssqrtqpo 7, 31 # encoding: [0xfc,0xfb,0xfe,0x49]
|
||||
# CHECK-LE: xssqrtqpo 7, 31 # encoding: [0x49,0xfe,0xfb,0xfc]
|
||||
xssqrtqpo 7, 31
|
||||
# CHECK-BE: xssubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x08]
|
||||
# CHECK-LE: xssubqp 7, 31, 27 # encoding: [0x08,0xdc,0xff,0xfc]
|
||||
xssubqp 7, 31, 27
|
||||
# CHECK-BE: xssubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x09]
|
||||
# CHECK-LE: xssubqpo 7, 31, 27 # encoding: [0x09,0xdc,0xff,0xfc]
|
||||
xssubqpo 7, 31, 27
|
||||
|
||||
# (Negative) Multiply-Add/Subtract
|
||||
# CHECK-BE: xsmaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x08]
|
||||
# CHECK-LE: xsmaddqp 7, 31, 27 # encoding: [0x08,0xdb,0xff,0xfc]
|
||||
xsmaddqp 7, 31, 27
|
||||
# CHECK-BE: xsmaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x09]
|
||||
# CHECK-LE: xsmaddqpo 7, 31, 27 # encoding: [0x09,0xdb,0xff,0xfc]
|
||||
xsmaddqpo 7, 31, 27
|
||||
# CHECK-BE: xsmsubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x48]
|
||||
# CHECK-LE: xsmsubqp 7, 31, 27 # encoding: [0x48,0xdb,0xff,0xfc]
|
||||
xsmsubqp 7, 31, 27
|
||||
# CHECK-BE: xsmsubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x49]
|
||||
# CHECK-LE: xsmsubqpo 7, 31, 27 # encoding: [0x49,0xdb,0xff,0xfc]
|
||||
xsmsubqpo 7, 31, 27
|
||||
# CHECK-BE: xsnmaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x88]
|
||||
# CHECK-LE: xsnmaddqp 7, 31, 27 # encoding: [0x88,0xdb,0xff,0xfc]
|
||||
xsnmaddqp 7, 31, 27
|
||||
# CHECK-BE: xsnmaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x89]
|
||||
# CHECK-LE: xsnmaddqpo 7, 31, 27 # encoding: [0x89,0xdb,0xff,0xfc]
|
||||
xsnmaddqpo 7, 31, 27
|
||||
# CHECK-BE: xsnmsubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0xc8]
|
||||
# CHECK-LE: xsnmsubqp 7, 31, 27 # encoding: [0xc8,0xdb,0xff,0xfc]
|
||||
xsnmsubqp 7, 31, 27
|
||||
# CHECK-BE: xsnmsubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0xc9]
|
||||
# CHECK-LE: xsnmsubqpo 7, 31, 27 # encoding: [0xc9,0xdb,0xff,0xfc]
|
||||
xsnmsubqpo 7, 31, 27
|
||||
|
||||
# Compare Ordered/Unordered
|
||||
# CHECK-BE: xscmpoqp 6, 31, 27 # encoding: [0xff,0x1f,0xd9,0x08]
|
||||
# CHECK-LE: xscmpoqp 6, 31, 27 # encoding: [0x08,0xd9,0x1f,0xff]
|
||||
|
Loading…
x
Reference in New Issue
Block a user