1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Improve handling of some BUILD_VECTOR nodes

An analysis of real world code turned up a number of patterns with BUILD_VECTOR
of nodes resulting from operations on extracted vector elements for which we
produce poor code. This addresses those cases. No attempt is made for
completeness as that would entail a large amount of work for something that
there is no evidence of in real code.

Differential revision: https://reviews.llvm.org/D72660
This commit is contained in:
Nemanja Ivanovic 2020-03-23 17:34:05 -05:00
parent 5f9c6088ec
commit 86f2aa5d7c
5 changed files with 824 additions and 568 deletions

View File

@ -1341,6 +1341,21 @@ def DWToSPExtractConv {
dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
}
def WToDPExtractConv {
dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
dag BV02S = (v2f64 (build_vector El0S, El2S));
dag BV13S = (v2f64 (build_vector El1S, El3S));
dag BV02U = (v2f64 (build_vector El0U, El2U));
dag BV13U = (v2f64 (build_vector El1U, El3U));
}
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@ -4171,6 +4186,41 @@ let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 1))))),
(v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 0))))),
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
(XVCVSPDP (XXMRGHW $A, $A)), 2))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2))))),
(v2f64 (XVCVSPDP $A))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
(v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
(v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$A, 2))))),
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
(XVCVSPDP (XXMRGLW $A, $A)), 2))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$B, 0))))),
(v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
(XXPERMDI $A, $B, 3), 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP $A))>;
def : Pat<WToDPExtractConv.BV13U,
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
}
let Predicates = [IsLittleEndian, HasP8Vector] in {
@ -4249,6 +4299,41 @@ let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 1))))),
(v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 0))))),
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
(XVCVSPDP (XXMRGLW $A, $A)), 2))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2))))),
(v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
(v2f64 (XVCVSPDP $A))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
(v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$A, 2))))),
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
(XVCVSPDP (XXMRGHW $A, $A)), 2))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$B, 0))))),
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
(XXPERMDI $B, $A, 3), 1)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13U,
(v2f64 (XVCVUXWDP $A))>;
}
let Predicates = [HasDirectMove] in {

View File

@ -6123,3 +6123,412 @@ entry:
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat.splat
}
; Some additional patterns that come up in real code.
define dso_local <2 x double> @sint_to_fp_widen02(<4 x i32> %a) {
; P9BE-LABEL: sint_to_fp_widen02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvsxwdp v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: sint_to_fp_widen02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvsxwdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: sint_to_fp_widen02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvsxwdp v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: sint_to_fp_widen02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvsxwdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sitofp i32 %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 2
%conv2 = sitofp i32 %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @sint_to_fp_widen13(<4 x i32> %a) {
; P9BE-LABEL: sint_to_fp_widen13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
; P9BE-NEXT: xvcvsxwdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: sint_to_fp_widen13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvsxwdp v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: sint_to_fp_widen13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
; P8BE-NEXT: xvcvsxwdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: sint_to_fp_widen13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvsxwdp v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 1
%conv = sitofp i32 %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 3
%conv2 = sitofp i32 %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @uint_to_fp_widen02(<4 x i32> %a) {
; P9BE-LABEL: uint_to_fp_widen02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvuxwdp v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: uint_to_fp_widen02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvuxwdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: uint_to_fp_widen02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvuxwdp v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: uint_to_fp_widen02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvuxwdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = uitofp i32 %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 2
%conv2 = uitofp i32 %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @uint_to_fp_widen13(<4 x i32> %a) {
; P9BE-LABEL: uint_to_fp_widen13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
; P9BE-NEXT: xvcvuxwdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: uint_to_fp_widen13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvuxwdp v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: uint_to_fp_widen13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
; P8BE-NEXT: xvcvuxwdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: uint_to_fp_widen13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvuxwdp v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 1
%conv = uitofp i32 %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 3
%conv2 = uitofp i32 %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend01(<4 x float> %a) {
; P9BE-LABEL: fp_extend01:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrghw vs0, v2, v2
; P9BE-NEXT: xvcvspdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend01:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrglw vs0, v2, v2
; P9LE-NEXT: xvcvspdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend01:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrghw vs0, v2, v2
; P8BE-NEXT: xvcvspdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend01:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrglw vs0, v2, v2
; P8LE-NEXT: xvcvspdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 1
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend10(<4 x float> %a) {
; P9BE-LABEL: fp_extend10:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrghw vs0, v2, v2
; P9BE-NEXT: xvcvspdp vs0, vs0
; P9BE-NEXT: xxswapd v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend10:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrglw vs0, v2, v2
; P9LE-NEXT: xvcvspdp vs0, vs0
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend10:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrghw vs0, v2, v2
; P8BE-NEXT: xvcvspdp vs0, vs0
; P8BE-NEXT: xxswapd v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend10:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrglw vs0, v2, v2
; P8LE-NEXT: xvcvspdp vs0, vs0
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 0
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend02(<4 x float> %a) {
; P9BE-LABEL: fp_extend02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvspdp v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvspdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvspdp v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvspdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend13(<4 x float> %a) {
; P9BE-LABEL: fp_extend13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
; P9BE-NEXT: xvcvspdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvspdp v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
; P8BE-NEXT: xvcvspdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvspdp v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend23(<4 x float> %a) {
; P9BE-LABEL: fp_extend23:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrglw vs0, v2, v2
; P9BE-NEXT: xvcvspdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend23:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrghw vs0, v2, v2
; P9LE-NEXT: xvcvspdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend23:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrglw vs0, v2, v2
; P8BE-NEXT: xvcvspdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend23:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrghw vs0, v2, v2
; P8LE-NEXT: xvcvspdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 2
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend32(<4 x float> %a) {
; P9BE-LABEL: fp_extend32:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrglw vs0, v2, v2
; P9BE-NEXT: xvcvspdp vs0, vs0
; P9BE-NEXT: xxswapd v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend32:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrghw vs0, v2, v2
; P9LE-NEXT: xvcvspdp vs0, vs0
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend32:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrglw vs0, v2, v2
; P8BE-NEXT: xvcvspdp vs0, vs0
; P8BE-NEXT: xxswapd v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend32:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrghw vs0, v2, v2
; P8LE-NEXT: xvcvspdp vs0, vs0
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 3
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend_two00(<4 x float> %a, <4 x float> %b) {
; P9BE-LABEL: fp_extend_two00:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrghd vs0, v2, v3
; P9BE-NEXT: xvcvspdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend_two00:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrgld vs0, v3, v2
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 1
; P9LE-NEXT: xvcvspdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend_two00:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrghd vs0, v2, v3
; P8BE-NEXT: xvcvspdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend_two00:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrgld vs0, v3, v2
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 1
; P8LE-NEXT: xvcvspdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %b, i32 0
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x double> @fp_extend_two33(<4 x float> %a, <4 x float> %b) {
; P9BE-LABEL: fp_extend_two33:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxmrgld vs0, v2, v3
; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 1
; P9BE-NEXT: xvcvspdp v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fp_extend_two33:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxmrghd vs0, v3, v2
; P9LE-NEXT: xvcvspdp v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fp_extend_two33:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxmrgld vs0, v2, v3
; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 1
; P8BE-NEXT: xvcvspdp v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fp_extend_two33:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxmrghd vs0, v3, v2
; P8LE-NEXT: xvcvspdp v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 3
%conv = fpext float %vecext to double
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
%vecext1 = extractelement <4 x float> %b, i32 3
%conv2 = fpext float %vecext1 to double
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}

View File

@ -47,33 +47,23 @@ define dso_local void @test2(<16 x float>* nocapture readonly %a, <2 x double>*
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxsldwi vs1, vs0, vs0, 1
; CHECK-NEXT: xscvspdpn f2, vs0
; CHECK-NEXT: xxsldwi vs3, vs0, vs0, 3
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: xscvspdpn f1, vs1
; CHECK-NEXT: xscvspdpn f3, vs3
; CHECK-NEXT: xscvspdpn f0, vs0
; CHECK-NEXT: xxmrghd vs0, vs0, vs3
; CHECK-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-NEXT: stxv vs0, 0(r4)
; CHECK-NEXT: stxv vs1, 0(r5)
; CHECK-NEXT: xxmrglw vs1, vs0, vs0
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-NEXT: xvcvspdp vs1, vs1
; CHECK-NEXT: xvcvspdp vs0, vs0
; CHECK-NEXT: stxv vs1, 0(r4)
; CHECK-NEXT: stxv vs0, 0(r5)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: xxswapd vs1, vs0
; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3
; CHECK-BE-NEXT: xscvspdpn f3, vs0
; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
; CHECK-BE-NEXT: stxv vs0, 0(r4)
; CHECK-BE-NEXT: stxv vs1, 0(r5)
; CHECK-BE-NEXT: xxmrghw vs1, vs0, vs0
; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
; CHECK-BE-NEXT: xvcvspdp vs1, vs1
; CHECK-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-BE-NEXT: stxv vs1, 0(r4)
; CHECK-BE-NEXT: stxv vs0, 0(r5)
; CHECK-BE-NEXT: blr
entry:
%0 = load <16 x float>, <16 x float>* %a, align 16

File diff suppressed because it is too large Load Diff

View File

@ -1554,11 +1554,8 @@ define <2 x i64> @test46(<2 x float> %a) {
;
; CHECK-LE-LABEL: test46:
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-LE-NEXT: xxswapd vs1, v2
; CHECK-LE-NEXT: xscvspdpn f0, vs0
; CHECK-LE-NEXT: xscvspdpn f1, vs1
; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
; CHECK-LE-NEXT: xvcvspdp vs0, vs0
; CHECK-LE-NEXT: xvcvdpuxds v2, vs0
; CHECK-LE-NEXT: blr
%v = fptoui <2 x float> %a to <2 x i64>
@ -1625,11 +1622,8 @@ define <2 x i64> @test47(<2 x float> %a) {
;
; CHECK-LE-LABEL: test47:
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-LE-NEXT: xxswapd vs1, v2
; CHECK-LE-NEXT: xscvspdpn f0, vs0
; CHECK-LE-NEXT: xscvspdpn f1, vs1
; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
; CHECK-LE-NEXT: xvcvspdp vs0, vs0
; CHECK-LE-NEXT: xvcvdpsxds v2, vs0
; CHECK-LE-NEXT: blr
%v = fptosi <2 x float> %a to <2 x i64>