mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PowerPC] Improve handling of some BUILD_VECTOR nodes
An analysis of real world code turned up a number of patterns with BUILD_VECTOR of nodes resulting from operations on extracted vector elements for which we produce poor code. This addresses those cases. No attempt is made for completeness as that would entail a large amount of work for something that there is no evidence of in real code. Differential revision: https://reviews.llvm.org/D72660
This commit is contained in:
parent
5f9c6088ec
commit
86f2aa5d7c
@ -1341,6 +1341,21 @@ def DWToSPExtractConv {
|
||||
dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
|
||||
}
|
||||
|
||||
def WToDPExtractConv {
|
||||
dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
|
||||
dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
|
||||
dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
|
||||
dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
|
||||
dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
|
||||
dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
|
||||
dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
|
||||
dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
|
||||
dag BV02S = (v2f64 (build_vector El0S, El2S));
|
||||
dag BV13S = (v2f64 (build_vector El1S, El3S));
|
||||
dag BV02U = (v2f64 (build_vector El0U, El2U));
|
||||
dag BV13U = (v2f64 (build_vector El1U, El3U));
|
||||
}
|
||||
|
||||
// The following VSX instructions were introduced in Power ISA 2.07
|
||||
/* FIXME: if the operands are v2i64, these patterns will not match.
|
||||
we should define new patterns or otherwise match the same patterns
|
||||
@ -4171,6 +4186,41 @@ let AddedComplexity = 400 in {
|
||||
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
|
||||
ExtDbl.B0U, ExtDbl.B1U)),
|
||||
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 1))))),
|
||||
(v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 0))))),
|
||||
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
|
||||
(XVCVSPDP (XXMRGHW $A, $A)), 2))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 2))))),
|
||||
(v2f64 (XVCVSPDP $A))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 3))))),
|
||||
(v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 3))))),
|
||||
(v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 2))))),
|
||||
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
|
||||
(XVCVSPDP (XXMRGLW $A, $A)), 2))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$B, 0))))),
|
||||
(v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
|
||||
(f64 (fpextend (extractelt v4f32:$B, 3))))),
|
||||
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
|
||||
(XXPERMDI $A, $B, 3), 1)))>;
|
||||
def : Pat<WToDPExtractConv.BV02S,
|
||||
(v2f64 (XVCVSXWDP $A))>;
|
||||
def : Pat<WToDPExtractConv.BV13S,
|
||||
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
|
||||
def : Pat<WToDPExtractConv.BV02U,
|
||||
(v2f64 (XVCVUXWDP $A))>;
|
||||
def : Pat<WToDPExtractConv.BV13U,
|
||||
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsLittleEndian, HasP8Vector] in {
|
||||
@ -4249,6 +4299,41 @@ let AddedComplexity = 400 in {
|
||||
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
|
||||
ExtDbl.B0U, ExtDbl.B1U)),
|
||||
(v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 1))))),
|
||||
(v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 0))))),
|
||||
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
|
||||
(XVCVSPDP (XXMRGLW $A, $A)), 2))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 2))))),
|
||||
(v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 3))))),
|
||||
(v2f64 (XVCVSPDP $A))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 3))))),
|
||||
(v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
|
||||
(f64 (fpextend (extractelt v4f32:$A, 2))))),
|
||||
(v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
|
||||
(XVCVSPDP (XXMRGHW $A, $A)), 2))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
|
||||
(f64 (fpextend (extractelt v4f32:$B, 0))))),
|
||||
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
|
||||
(XXPERMDI $B, $A, 3), 1)))>;
|
||||
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
|
||||
(f64 (fpextend (extractelt v4f32:$B, 3))))),
|
||||
(v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
|
||||
def : Pat<WToDPExtractConv.BV02S,
|
||||
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
|
||||
def : Pat<WToDPExtractConv.BV13S,
|
||||
(v2f64 (XVCVSXWDP $A))>;
|
||||
def : Pat<WToDPExtractConv.BV02U,
|
||||
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
|
||||
def : Pat<WToDPExtractConv.BV13U,
|
||||
(v2f64 (XVCVUXWDP $A))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasDirectMove] in {
|
||||
|
@ -6123,3 +6123,412 @@ entry:
|
||||
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %splat.splat
|
||||
}
|
||||
|
||||
; Some additional patterns that come up in real code.
|
||||
define dso_local <2 x double> @sint_to_fp_widen02(<4 x i32> %a) {
|
||||
; P9BE-LABEL: sint_to_fp_widen02:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xvcvsxwdp v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: sint_to_fp_widen02:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P9LE-NEXT: xvcvsxwdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: sint_to_fp_widen02:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xvcvsxwdp v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: sint_to_fp_widen02:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P8LE-NEXT: xvcvsxwdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 0
|
||||
%conv = sitofp i32 %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %a, i32 2
|
||||
%conv2 = sitofp i32 %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @sint_to_fp_widen13(<4 x i32> %a) {
|
||||
; P9BE-LABEL: sint_to_fp_widen13:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P9BE-NEXT: xvcvsxwdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: sint_to_fp_widen13:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xvcvsxwdp v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: sint_to_fp_widen13:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P8BE-NEXT: xvcvsxwdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: sint_to_fp_widen13:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xvcvsxwdp v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 1
|
||||
%conv = sitofp i32 %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %a, i32 3
|
||||
%conv2 = sitofp i32 %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @uint_to_fp_widen02(<4 x i32> %a) {
|
||||
; P9BE-LABEL: uint_to_fp_widen02:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xvcvuxwdp v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: uint_to_fp_widen02:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P9LE-NEXT: xvcvuxwdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: uint_to_fp_widen02:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xvcvuxwdp v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: uint_to_fp_widen02:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P8LE-NEXT: xvcvuxwdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 0
|
||||
%conv = uitofp i32 %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %a, i32 2
|
||||
%conv2 = uitofp i32 %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @uint_to_fp_widen13(<4 x i32> %a) {
|
||||
; P9BE-LABEL: uint_to_fp_widen13:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P9BE-NEXT: xvcvuxwdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: uint_to_fp_widen13:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xvcvuxwdp v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: uint_to_fp_widen13:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P8BE-NEXT: xvcvuxwdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: uint_to_fp_widen13:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xvcvuxwdp v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 1
|
||||
%conv = uitofp i32 %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %a, i32 3
|
||||
%conv2 = uitofp i32 %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend01(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend01:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P9BE-NEXT: xvcvspdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend01:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P9LE-NEXT: xvcvspdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend01:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P8BE-NEXT: xvcvspdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend01:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P8LE-NEXT: xvcvspdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 0
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 1
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend10(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend10:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P9BE-NEXT: xvcvspdp vs0, vs0
|
||||
; P9BE-NEXT: xxswapd v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend10:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P9LE-NEXT: xvcvspdp vs0, vs0
|
||||
; P9LE-NEXT: xxswapd v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend10:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P8BE-NEXT: xvcvspdp vs0, vs0
|
||||
; P8BE-NEXT: xxswapd v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend10:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P8LE-NEXT: xvcvspdp vs0, vs0
|
||||
; P8LE-NEXT: xxswapd v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 1
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 0
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend02(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend02:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xvcvspdp v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend02:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P9LE-NEXT: xvcvspdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend02:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xvcvspdp v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend02:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
|
||||
; P8LE-NEXT: xvcvspdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 0
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 2
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend13(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend13:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P9BE-NEXT: xvcvspdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend13:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xvcvspdp v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend13:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; P8BE-NEXT: xvcvspdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend13:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xvcvspdp v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 1
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 3
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend23(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend23:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P9BE-NEXT: xvcvspdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend23:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P9LE-NEXT: xvcvspdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend23:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P8BE-NEXT: xvcvspdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend23:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P8LE-NEXT: xvcvspdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 2
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 3
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend32(<4 x float> %a) {
|
||||
; P9BE-LABEL: fp_extend32:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P9BE-NEXT: xvcvspdp vs0, vs0
|
||||
; P9BE-NEXT: xxswapd v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend32:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P9LE-NEXT: xvcvspdp vs0, vs0
|
||||
; P9LE-NEXT: xxswapd v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend32:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrglw vs0, v2, v2
|
||||
; P8BE-NEXT: xvcvspdp vs0, vs0
|
||||
; P8BE-NEXT: xxswapd v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend32:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrghw vs0, v2, v2
|
||||
; P8LE-NEXT: xvcvspdp vs0, vs0
|
||||
; P8LE-NEXT: xxswapd v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 3
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %a, i32 2
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend_two00(<4 x float> %a, <4 x float> %b) {
|
||||
; P9BE-LABEL: fp_extend_two00:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrghd vs0, v2, v3
|
||||
; P9BE-NEXT: xvcvspdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend_two00:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrgld vs0, v3, v2
|
||||
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 1
|
||||
; P9LE-NEXT: xvcvspdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend_two00:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrghd vs0, v2, v3
|
||||
; P8BE-NEXT: xvcvspdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend_two00:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrgld vs0, v3, v2
|
||||
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 1
|
||||
; P8LE-NEXT: xvcvspdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 0
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %b, i32 0
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
||||
define dso_local <2 x double> @fp_extend_two33(<4 x float> %a, <4 x float> %b) {
|
||||
; P9BE-LABEL: fp_extend_two33:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: xxmrgld vs0, v2, v3
|
||||
; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 1
|
||||
; P9BE-NEXT: xvcvspdp v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: fp_extend_two33:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: xxmrghd vs0, v3, v2
|
||||
; P9LE-NEXT: xvcvspdp v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: fp_extend_two33:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: xxmrgld vs0, v2, v3
|
||||
; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 1
|
||||
; P8BE-NEXT: xvcvspdp v2, vs0
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: fp_extend_two33:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: xxmrghd vs0, v3, v2
|
||||
; P8LE-NEXT: xvcvspdp v2, vs0
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x float> %a, i32 3
|
||||
%conv = fpext float %vecext to double
|
||||
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
||||
%vecext1 = extractelement <4 x float> %b, i32 3
|
||||
%conv2 = fpext float %vecext1 to double
|
||||
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
||||
ret <2 x double> %vecinit3
|
||||
}
|
||||
|
@ -47,33 +47,23 @@ define dso_local void @test2(<16 x float>* nocapture readonly %a, <2 x double>*
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-NEXT: xxsldwi vs1, vs0, vs0, 1
|
||||
; CHECK-NEXT: xscvspdpn f2, vs0
|
||||
; CHECK-NEXT: xxsldwi vs3, vs0, vs0, 3
|
||||
; CHECK-NEXT: xxswapd vs0, vs0
|
||||
; CHECK-NEXT: xscvspdpn f1, vs1
|
||||
; CHECK-NEXT: xscvspdpn f3, vs3
|
||||
; CHECK-NEXT: xscvspdpn f0, vs0
|
||||
; CHECK-NEXT: xxmrghd vs0, vs0, vs3
|
||||
; CHECK-NEXT: xxmrghd vs1, vs2, vs1
|
||||
; CHECK-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-NEXT: xxmrglw vs1, vs0, vs0
|
||||
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
|
||||
; CHECK-NEXT: xvcvspdp vs1, vs1
|
||||
; CHECK-NEXT: xvcvspdp vs0, vs0
|
||||
; CHECK-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test2:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: xxswapd vs1, vs0
|
||||
; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
||||
; CHECK-BE-NEXT: xscvspdpn f3, vs0
|
||||
; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
|
||||
; CHECK-BE-NEXT: xscvspdpn f1, vs1
|
||||
; CHECK-BE-NEXT: xscvspdpn f2, vs2
|
||||
; CHECK-BE-NEXT: xscvspdpn f0, vs0
|
||||
; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
|
||||
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-BE-NEXT: xxmrghw vs1, vs0, vs0
|
||||
; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0
|
||||
; CHECK-BE-NEXT: xvcvspdp vs1, vs1
|
||||
; CHECK-BE-NEXT: xvcvspdp vs0, vs0
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load <16 x float>, <16 x float>* %a, align 16
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1554,11 +1554,8 @@ define <2 x i64> @test46(<2 x float> %a) {
|
||||
;
|
||||
; CHECK-LE-LABEL: test46:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-LE-NEXT: xxswapd vs1, v2
|
||||
; CHECK-LE-NEXT: xscvspdpn f0, vs0
|
||||
; CHECK-LE-NEXT: xscvspdpn f1, vs1
|
||||
; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
|
||||
; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; CHECK-LE-NEXT: xvcvspdp vs0, vs0
|
||||
; CHECK-LE-NEXT: xvcvdpuxds v2, vs0
|
||||
; CHECK-LE-NEXT: blr
|
||||
%v = fptoui <2 x float> %a to <2 x i64>
|
||||
@ -1625,11 +1622,8 @@ define <2 x i64> @test47(<2 x float> %a) {
|
||||
;
|
||||
; CHECK-LE-LABEL: test47:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-LE-NEXT: xxswapd vs1, v2
|
||||
; CHECK-LE-NEXT: xscvspdpn f0, vs0
|
||||
; CHECK-LE-NEXT: xscvspdpn f1, vs1
|
||||
; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0
|
||||
; CHECK-LE-NEXT: xxmrglw vs0, v2, v2
|
||||
; CHECK-LE-NEXT: xvcvspdp vs0, vs0
|
||||
; CHECK-LE-NEXT: xvcvdpsxds v2, vs0
|
||||
; CHECK-LE-NEXT: blr
|
||||
%v = fptosi <2 x float> %a to <2 x i64>
|
||||
|
Loading…
x
Reference in New Issue
Block a user