1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[Power9]Legalize and emit code for round & convert quad-precision values

Legalize and emit code for round & convert float128 to double precision and
single precision.

Differential Revision: https://reviews.llvm.org/D46997

llvm-svn: 336299
This commit is contained in:
Lei Huang 2018-07-04 21:59:16 +00:00
parent 1415773fba
commit 46c288db0b
4 changed files with 180 additions and 3 deletions

View File

@ -515,7 +515,8 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
XSRSQRTESP,
XSSUBDP,
XSSUBSP,
XSCVDPSPN
XSCVDPSPN,
XSRSP
)>;
// Three Cycle PM operation. Only one PM unit per superslice so we use the whole

View File

@ -808,6 +808,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f128, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
}
}

View File

@ -1326,6 +1326,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT), (ins vssrc:$XB),
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
def XSRSP : XX2Form<60, 281,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xsrsp $XT, $XB", IIC_VecFP, []>;
def XSSQRTSP : XX2Form<60, 11,
(outs vssrc:$XT), (ins vssrc:$XB),
"xssqrtsp $XT, $XB", IIC_FPSqrtS,
@ -2370,6 +2373,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
: X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
!strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
// [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
: X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
!strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
: X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
let UseVSXReg = 1 in {
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
@ -2521,8 +2535,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f128 (fpextend f64:$src)), (f128 (XSCVDPQP $src))>;
// Round & Convert QP -> DP (dword[1] is set to zero)
def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>;
def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>;
def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", []>;
// Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
@ -3363,7 +3377,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
(STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
// Round & Convert QP -> DP/SP
def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>;
def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
} // end HasP9Vector, AddedComplexity
let Predicates = [HasP9Vector] in {
let isPseudo = 1 in {
let mayStore = 1 in {

View File

@ -398,3 +398,157 @@ entry:
; CHECK-NEXT: stxv [[CONV]], 0(3)
; CHECK-NEXT: blr
}
; Convert QP to DP
@f128Array = global [4 x fp128]
[fp128 0xL00000000000000004004C00000000000,
fp128 0xLF000000000000000400808AB851EB851,
fp128 0xL5000000000000000400E0C26324C8366,
fp128 0xL8000000000000000400A24E2E147AE14], align 16
@f128global = global fp128 0xL300000000000000040089CA8F5C28F5C, align 16
; Function Attrs: norecurse nounwind readonly
define double @qpConv2dp(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2dp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv 2, 0(3)
; CHECK-NEXT: xscvqpdp 2, 2
; CHECK-NEXT: xxlor 1, 2, 2
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
%conv = fptrunc fp128 %0 to double
ret double %conv
}
; Function Attrs: norecurse nounwind
define void @qpConv2dp_02(double* nocapture %res) {
; CHECK-LABEL: qpConv2dp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 4, 2, .LC6@toc@ha
; CHECK-NEXT: ld 4, .LC6@toc@l(4)
; CHECK-NEXT: lxvx 2, 0, 4
; CHECK-NEXT: xscvqpdp 2, 2
; CHECK-NEXT: stxsd 2, 0(3)
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* @f128global, align 16
%conv = fptrunc fp128 %0 to double
store double %conv, double* %res, align 8
ret void
}
; Function Attrs: norecurse nounwind
define void @qpConv2dp_03(double* nocapture %res, i32 signext %idx) {
; CHECK-LABEL: qpConv2dp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 5, 2, .LC7@toc@ha
; CHECK-NEXT: sldi 4, 4, 3
; CHECK-NEXT: ld 5, .LC7@toc@l(5)
; CHECK-NEXT: lxvx 2, 0, 5
; CHECK-NEXT: xscvqpdp 2, 2
; CHECK-NEXT: stxsdx 2, 3, 4
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* getelementptr inbounds ([4 x fp128], [4 x fp128]* @f128Array, i64 0, i64 0), align 16
%conv = fptrunc fp128 %0 to double
%idxprom = sext i32 %idx to i64
%arrayidx = getelementptr inbounds double, double* %res, i64 %idxprom
store double %conv, double* %arrayidx, align 8
ret void
}
; Function Attrs: norecurse nounwind
define void @qpConv2dp_04(fp128* nocapture readonly %a, fp128* nocapture readonly %b, double* nocapture %res) {
; CHECK-LABEL: qpConv2dp_04:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv 2, 0(3)
; CHECK-NEXT: lxv 3, 0(4)
; CHECK-NEXT: xsaddqp 2, 2, 3
; CHECK-NEXT: xscvqpdp 2, 2
; CHECK-NEXT: stxsd 2, 0(5)
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%add = fadd fp128 %0, %1
%conv = fptrunc fp128 %add to double
store double %conv, double* %res, align 8
ret void
}
; Convert QP to SP
; Function Attrs: norecurse nounwind readonly
define float @qpConv2sp(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2sp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv 2, 0(3)
; CHECK-NEXT: xscvqpdpo 2, 2
; CHECK-NEXT: xsrsp 1, 2
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
%conv = fptrunc fp128 %0 to float
ret float %conv
}
; Function Attrs: norecurse nounwind
define void @qpConv2sp_02(float* nocapture %res) {
; CHECK-LABEL: qpConv2sp_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 4, 2, .LC6@toc@ha
; CHECK-NEXT: ld 4, .LC6@toc@l(4)
; CHECK-NEXT: lxvx 2, 0, 4
; CHECK-NEXT: xscvqpdpo 2, 2
; CHECK-NEXT: xsrsp 0, 2
; CHECK-NEXT: stfs 0, 0(3)
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* @f128global, align 16
%conv = fptrunc fp128 %0 to float
store float %conv, float* %res, align 4
ret void
}
; Function Attrs: norecurse nounwind
define void @qpConv2sp_03(float* nocapture %res, i32 signext %idx) {
; CHECK-LABEL: qpConv2sp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 5, 2, .LC7@toc@ha
; CHECK-NEXT: sldi 4, 4, 2
; CHECK-NEXT: ld 5, .LC7@toc@l(5)
; CHECK-NEXT: lxv 2, 48(5)
; CHECK-NEXT: xscvqpdpo 2, 2
; CHECK-NEXT: xsrsp 0, 2
; CHECK-NEXT: stfsx 0, 3, 4
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* getelementptr inbounds ([4 x fp128], [4 x fp128]* @f128Array, i64 0, i64 3), align 16
%conv = fptrunc fp128 %0 to float
%idxprom = sext i32 %idx to i64
%arrayidx = getelementptr inbounds float, float* %res, i64 %idxprom
store float %conv, float* %arrayidx, align 4
ret void
}
; Function Attrs: norecurse nounwind
define void @qpConv2sp_04(fp128* nocapture readonly %a, fp128* nocapture readonly %b, float* nocapture %res) {
; CHECK-LABEL: qpConv2sp_04:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv 2, 0(3)
; CHECK-NEXT: lxv 3, 0(4)
; CHECK-NEXT: xsaddqp 2, 2, 3
; CHECK-NEXT: xscvqpdpo 2, 2
; CHECK-NEXT: xsrsp 0, 2
; CHECK-NEXT: stfs 0, 0(5)
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%add = fadd fp128 %0, %1
%conv = fptrunc fp128 %add to float
store float %conv, float* %res, align 4
ret void
}