1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[NFC][PowerPC] Fix register class for patterns using XXPERMDIs

There are a few patterns where we use a superclass for inputs to this
instruction rather than the correct class. This can sometimes lead to
unncessary copies.
This commit is contained in:
Nemanja Ivanovic 2020-04-07 14:04:19 -05:00
parent 080e1d3570
commit 9461d82e65
16 changed files with 95 additions and 95 deletions

View File

@ -3311,19 +3311,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
}
let Predicates = [IsBigEndian] in {
@ -3514,17 +3514,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
sub_64), xaddrX4:$src)>;

View File

@ -13,7 +13,7 @@ define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8: lfiwzx f0, 0, r3
; CHECK-P8: ld r4, .LC0@toc@l(r4)
; CHECK-P8: xxpermdi vs0, f0, f0, 2
; CHECK-P8: xxswapd vs0, f0
; CHECK-P8: xxspltw v2, vs0, 3
; CHECK-P8: stvx v2, 0, r4
; CHECK-P8: lis r4, 1024

View File

@ -1282,7 +1282,7 @@ define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
; P8LE-LABEL: spltMemVali:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxspltw v2, vs0, 3
; P8LE-NEXT: blr
entry:
@ -2801,7 +2801,7 @@ define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
; P8LE-LABEL: spltMemValui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxspltw v2, vs0, 3
; P8LE-NEXT: blr
entry:

View File

@ -230,7 +230,7 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
; CHECK-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha
; CHECK-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxpermdi v2, f0, f0, 2
; CHECK-P9-NEXT: xxswapd v2, f0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: vperm v2, v2, v4, v3
; CHECK-P9-NEXT: blr

View File

@ -40,7 +40,7 @@ define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonl
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
; P8-NEXT: xxpermdi vs0, f0, f0, 2
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: stvx v2, 0, r3
; P8-NEXT: blr
@ -65,7 +65,7 @@ define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
; P8-NEXT: xxpermdi vs0, f0, f0, 2
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: stvx v2, 0, r3
; P8-NEXT: blr
@ -110,7 +110,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
; P8-LABEL: unadjusted_lxvwsx:
; P8: # %bb.0: # %entry
; P8-NEXT: lfiwzx f0, 0, r3
; P8-NEXT: xxpermdi vs0, f0, f0, 2
; P8-NEXT: xxswapd vs0, f0
; P8-NEXT: xxspltw v2, vs0, 3
; P8-NEXT: blr
entry:

View File

@ -9,7 +9,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
; CHECK-LE-LABEL: test:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-LE-NEXT: xxswapd vs0, f0
; CHECK-LE-NEXT: xxspltw v2, vs0, 3
; CHECK-LE-NEXT: blr

View File

@ -12,7 +12,7 @@ define void @draw_llvm_vs_variant0(<4 x float> %x) {
; CHECK-LABEL: draw_llvm_vs_variant0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxpermdi v3, f0, f0, 2
; CHECK-NEXT: xxswapd v3, f0
; CHECK-NEXT: vmrglh v3, v3, v3
; CHECK-NEXT: vextsh2w v3, v3
; CHECK-NEXT: xvcvsxwsp vs0, v3

View File

@ -18,7 +18,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
; CHECK-NEXT: addis r5, r2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi r5, r5, .LCPI0_1@toc@l
; CHECK-NEXT: lxvx v4, 0, r5
; CHECK-NEXT: xxpermdi v5, f0, f0, 2
; CHECK-NEXT: xxswapd v5, f0
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: li r5, 4
; CHECK-NEXT: vperm v0, v3, v5, v2
@ -32,7 +32,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader
; CHECK-NEXT: #
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxpermdi v1, f0, f0, 2
; CHECK-NEXT: xxswapd v1, f0
; CHECK-NEXT: lfdx f0, r3, r4
; CHECK-NEXT: vperm v6, v1, v3, v4
; CHECK-NEXT: vperm v1, v3, v1, v2
@ -46,7 +46,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxspltw v6, v1, 2
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxpermdi v6, f0, f0, 2
; CHECK-NEXT: xxswapd v6, f0
; CHECK-NEXT: vextuwrx r3, r5, v1
; CHECK-NEXT: vperm v7, v6, v3, v4
; CHECK-NEXT: vperm v6, v3, v6, v2
@ -186,12 +186,12 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha
; CHECK-NEXT: xxpermdi v2, f0, f0, 2
; CHECK-NEXT: xxswapd v2, f0
; CHECK-NEXT: lfd f0, 0(r4)
; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: lxvx v0, 0, r3
; CHECK-NEXT: xxpermdi v1, f0, f0, 2
; CHECK-NEXT: xxswapd v1, f0
; CHECK-NEXT: vperm v5, v2, v3, v4
; CHECK-NEXT: vperm v2, v3, v2, v0
; CHECK-NEXT: vperm v0, v3, v1, v0
@ -291,11 +291,11 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: xxpermdi v2, f0, f0, 2
; CHECK-NEXT: xxswapd v2, f0
; CHECK-NEXT: lfiwzx f0, r5, r3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vperm v2, v2, v3, v4
; CHECK-NEXT: xxpermdi v5, f0, f0, 2
; CHECK-NEXT: xxswapd v5, f0
; CHECK-NEXT: vperm v3, v5, v3, v4
; CHECK-NEXT: vspltisw v4, 8
; CHECK-NEXT: vnegw v3, v3

View File

@ -53,7 +53,7 @@ define <4 x float> @foof(float* nocapture readonly %a) #0 {
; CHECK-LABEL: foof:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxswapd vs0, f0
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
entry:
@ -68,7 +68,7 @@ define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: lfiwzx f0, r3, r4
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxswapd vs0, f0
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
entry:

View File

@ -13,7 +13,7 @@ define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -33,7 +33,7 @@ define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -55,7 +55,7 @@ define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -78,7 +78,7 @@ define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -99,7 +99,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -119,7 +119,7 @@ define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
@ -152,7 +152,7 @@ define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
@ -187,7 +187,7 @@ define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
@ -225,7 +225,7 @@ define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
@ -259,7 +259,7 @@ define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %
; P9LE-LABEL: s2v_test_f5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr

View File

@ -13,8 +13,8 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
; P9LE: # %bb.0:
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: lfiwzx f1, 0, r4
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxswapd vs1, f1
; P9LE-NEXT: xvaddsp vs0, vs0, vs1
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
; P9LE-NEXT: stfiwx f0, 0, r5
@ -35,8 +35,8 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
; P8LE: # %bb.0:
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: lfiwzx f1, 0, r4
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxswapd vs1, f1
; P8LE-NEXT: xvaddsp vs0, vs0, vs1
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
; P8LE-NEXT: stfiwx f0, 0, r5
@ -67,8 +67,8 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>*
; P9LE: # %bb.0:
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: lfiwzx f1, 0, r4
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xxswapd vs0, f0
; P9LE-NEXT: xxswapd vs1, f1
; P9LE-NEXT: xvsubsp vs0, vs0, vs1
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
; P9LE-NEXT: mr r3, r5
@ -92,8 +92,8 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>*
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: lfiwzx f1, 0, r4
; P8LE-NEXT: mr r3, r5
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xxswapd vs0, f0
; P8LE-NEXT: xxswapd vs1, f1
; P8LE-NEXT: xvsubsp vs0, vs0, vs1
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
; P8LE-NEXT: stfiwx f0, 0, r5

View File

@ -12,7 +12,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -25,7 +25,7 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
@ -47,7 +47,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -62,7 +62,7 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
@ -86,7 +86,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: lfiwax f0, r3, r4
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -101,7 +101,7 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: lfiwax f0, r3, r4
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
@ -126,7 +126,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -141,7 +141,7 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
@ -164,7 +164,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r5
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
@ -177,7 +177,7 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r5
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
@ -198,7 +198,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test6:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v2, f0, f0, 2
; P9LE-NEXT: xxswapd v2, f0
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: blr
@ -211,7 +211,7 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
; P8LE-LABEL: s2v_test6:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v2, f0, f0, 2
; P8LE-NEXT: xxswapd v2, f0
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: blr
@ -233,7 +233,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test7:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v2, f0, f0, 2
; P9LE-NEXT: xxswapd v2, f0
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: blr
@ -246,7 +246,7 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
; P8LE-LABEL: s2v_test7:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v2, f0, f0, 2
; P8LE-NEXT: xxswapd v2, f0
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: blr

View File

@ -14,9 +14,9 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test1:
@ -40,9 +40,9 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test2:
@ -69,7 +69,7 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32
; P8LE-NEXT: lfiwzx f0, r3, r5
; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
@ -97,9 +97,9 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test4:
@ -124,9 +124,9 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test5:
@ -149,9 +149,9 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: lvx v3, 0, r3
; P8LE-NEXT: xxswapd v4, f0
; P8LE-NEXT: vperm v2, v4, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f1:
@ -174,7 +174,7 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
; P9LE-NEXT: addi r3, r3, 4
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: lfiwzx f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
@ -192,7 +192,7 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
@ -218,7 +218,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: lfiwzx f0, r3, r4
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: xxpermdi v3, f0, f0, 2
; P9LE-DAG: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
@ -236,7 +236,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, r3, r4
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
@ -262,8 +262,8 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: xxpermdi v3, f0, f0, 2
; P9LE-DAG: xxspltw v2, v2, 2
; P9LE-DAG: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
@ -281,7 +281,7 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
@ -306,7 +306,7 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r5
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxswapd v3, f0
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
@ -322,7 +322,7 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxswapd v3, f0
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr

View File

@ -52,7 +52,7 @@ define void @bar0() {
; CHECK-P9: lfd f1, 0(r3)
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
; CHECK-P9: xxpermdi vs1, f1, f1, 2
; CHECK-P9: xxswapd vs1, f1
; CHECK-P9: xxpermdi vs0, vs0, vs1, 1
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr
@ -97,7 +97,7 @@ define void @bar1() {
; CHECK-P9: lfd f1, 0(r3)
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
; CHECK-P9: xxpermdi vs1, f1, f1, 2
; CHECK-P9: xxswapd vs1, f1
; CHECK-P9: xxmrgld vs0, vs1, vs0
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr

View File

@ -6375,7 +6375,7 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI103_0@toc@l
; PC64LE-NEXT: lfiwzx 0, 0, 3
; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
@ -6383,7 +6383,7 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI103_0@toc@l
; PC64LE9-NEXT: lfiwzx 0, 0, 3
; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: xxswapd 34, 0
; PC64LE9-NEXT: blr
entry:
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
@ -6464,7 +6464,7 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI107_0@toc@l
; PC64LE-NEXT: lfiwzx 0, 0, 3
; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_floor_v1f32:
@ -6472,7 +6472,7 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI107_0@toc@l
; PC64LE9-NEXT: lfiwzx 0, 0, 3
; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: xxswapd 34, 0
; PC64LE9-NEXT: blr
entry:
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
@ -6554,7 +6554,7 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI111_0@toc@l
; PC64LE-NEXT: lfiwzx 0, 0, 3
; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_round_v1f32:
@ -6562,7 +6562,7 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI111_0@toc@l
; PC64LE9-NEXT: lfiwzx 0, 0, 3
; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: xxswapd 34, 0
; PC64LE9-NEXT: blr
entry:
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
@ -6646,7 +6646,7 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI115_0@toc@l
; PC64LE-NEXT: lfiwzx 0, 0, 3
; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
@ -6654,7 +6654,7 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI115_0@toc@l
; PC64LE9-NEXT: lfiwzx 0, 0, 3
; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: xxswapd 34, 0
; PC64LE9-NEXT: blr
entry:
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(

View File

@ -34,7 +34,7 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r3)
; CHECK-P9-NEXT: xxpermdi vs1, f1, f1, 2
; CHECK-P9-NEXT: xxswapd vs1, f1
; CHECK-P9-NEXT: xxpermdi v2, vs0, vs1, 1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
@ -68,7 +68,7 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r3)
; CHECK-P9-NEXT: xxpermdi vs1, f1, f1, 2
; CHECK-P9-NEXT: xxswapd vs1, f1
; CHECK-P9-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1