diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index caebb953439..df88a879db7 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -238,6 +238,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -412,6 +413,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MLI = &getAnalysis(); PassConfig = &getAnalysis(); + AA = &getAnalysis().getAAResults(); if (VerifyScheduling) MF->verify(this, "Before post machine scheduling."); diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll index 4c561f337dc..5613db1e521 100644 --- a/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/test/CodeGen/AArch64/merge-store-dependency.ll @@ -17,15 +17,15 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg ; A53-NEXT: movi v0.2d, #0000000000000000 ; A53-NEXT: mov x8, x0 ; A53-NEXT: mov x19, x8 -; A53-NEXT: mov w9, #256 ; A53-NEXT: mov w0, w1 -; A53-NEXT: str q0, [x8] +; A53-NEXT: mov w9, #256 ; A53-NEXT: str q0, [x19, #16]! -; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str w1, [x19] ; A53-NEXT: mov w1, #4 ; A53-NEXT: stp x2, x3, [x8, #32] ; A53-NEXT: mov x2, x8 +; A53-NEXT: str q0, [x8] +; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] ; A53-NEXT: bl fcntl ; A53-NEXT: adrp x9, gv0 diff --git a/test/CodeGen/PowerPC/extract-and-store.ll b/test/CodeGen/PowerPC/extract-and-store.ll index 9a5bacda86a..2731ffd0712 100644 --- a/test/CodeGen/PowerPC/extract-and-store.ll +++ b/test/CodeGen/PowerPC/extract-and-store.ll @@ -484,8 +484,8 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 ; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stfiwx f0, 0, r5 ; CHECK-NEXT: stxsiwx vs34, r5, r3 +; CHECK-NEXT: stfiwx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32: @@ -501,8 +501,8 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 ; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 ; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 +; CHECK-P9-NEXT: stfiwx f0, 0, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32: @@ -590,8 +590,8 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture ; CHECK-BE-NEXT: li r4, 20 ; CHECK-BE-NEXT: stxsiwx vs34, r5, r3 ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs0, 2 -; CHECK-BE-NEXT: stxvw4x vs0, 0, r5 ; CHECK-BE-NEXT: stfiwx f1, r5, r4 +; CHECK-BE-NEXT: stxvw4x vs0, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_stores_exceed_vec_size: @@ -599,13 +599,13 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture ; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha ; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l ; CHECK-P9-NEXT: lxvx vs35, 0, r3 -; CHECK-P9-NEXT: li r3, 16 -; CHECK-P9-NEXT: vperm v3, v2, v2, v3 ; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: stxv vs35, 0(r5) +; CHECK-P9-NEXT: li r3, 16 ; CHECK-P9-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-NEXT: li r3, 20 ; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 +; CHECK-P9-NEXT: vperm v3, v2, v2, v3 +; CHECK-P9-NEXT: stxv vs35, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size: @@ -613,10 +613,10 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture ; CHECK-P9-BE-NEXT: xxspltw vs0, vs34, 0 ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs0, 2 ; CHECK-P9-BE-NEXT: li r3, 16 -; CHECK-P9-BE-NEXT: stxv vs0, 0(r5) -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3 ; CHECK-P9-BE-NEXT: li r3, 20 +; CHECK-P9-BE-NEXT: stxv vs0, 0(r5) +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-BE-NEXT: blr entry: @@ -930,8 +930,8 @@ define void @test_elements_from_two_vec(<4 x i32> %a, <4 x i32> %b, i32* nocaptu ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 ; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stfiwx f0, r7, r3 ; CHECK-BE-NEXT: stxsiwx vs35, 0, r7 +; CHECK-BE-NEXT: stfiwx f0, r7, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_elements_from_two_vec: @@ -977,19 +977,19 @@ define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float> ; CHECK-BE-NEXT: xxsldwi vs1, vs35, vs35, 1 ; CHECK-BE-NEXT: li r3, 4 ; CHECK-BE-NEXT: li r4, 8 +; CHECK-BE-NEXT: stxsiwx vs36, r9, r4 ; CHECK-BE-NEXT: stfiwx f1, r9, r3 ; CHECK-BE-NEXT: stfiwx f0, 0, r9 -; CHECK-BE-NEXT: stxsiwx vs36, r9, r4 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_elements_from_three_vec: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 ; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r9 -; CHECK-P9-NEXT: xxsldwi vs0, vs36, vs36, 1 ; CHECK-P9-NEXT: stxsiwx vs35, r9, r3 ; CHECK-P9-NEXT: li r3, 8 +; CHECK-P9-NEXT: stfiwx f0, 0, r9 +; CHECK-P9-NEXT: xxsldwi vs0, vs36, vs36, 1 ; CHECK-P9-NEXT: stfiwx f0, r9, r3 ; CHECK-P9-NEXT: blr ; diff --git a/test/CodeGen/PowerPC/f128-aggregates.ll b/test/CodeGen/PowerPC/f128-aggregates.ll index 8c21b85d0ce..6e782c2b02a 100644 --- a/test/CodeGen/PowerPC/f128-aggregates.ll +++ b/test/CodeGen/PowerPC/f128-aggregates.ll @@ -353,10 +353,10 @@ define fp128 @sum_float128(i32 signext %count, ...) { ; CHECK-NEXT: addi r3, r1, 40 ; CHECK-NEXT: lxvx v3, 0, r3 ; CHECK-NEXT: xsaddqp v2, v3, v2 -; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, 72 -; CHECK-NEXT: std [[REG2]], -8(r1) ; CHECK-NEXT: lxv v3, 16(r3) ; CHECK-NEXT: xsaddqp v2, v2, v3 +; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, 72 +; CHECK-NEXT: std [[REG2]], -8(r1) ; CHECK-NEXT: blr entry: %ap = alloca i8*, align 8 diff --git a/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll index 6b945d46880..cf4a6d63620 100644 --- a/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll +++ b/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll @@ -328,27 +328,27 @@ define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-NEXT: lxv vs2, 48(r4) ; CHECK-P9-NEXT: xxswapd vs8, vs2 ; CHECK-P9-NEXT: xscvspdpn f8, vs8 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: stxv vs5, 32(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3 ; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: stxv vs6, 64(r3) ; CHECK-P9-NEXT: xscvspdpn f7, vs7 ; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7 ; CHECK-P9-NEXT: xscvspdpn f8, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: stxv vs6, 64(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: stxv vs3, 80(r3) -; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 -; CHECK-P9-NEXT: stxv vs2, 112(r3) ; CHECK-P9-NEXT: stxv vs7, 96(r3) +; CHECK-P9-NEXT: stxv vs2, 112(r3) ; CHECK-P9-NEXT: stxv vs4, 48(r3) +; CHECK-P9-NEXT: stxv vs5, 32(r3) ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: stxv vs1, 0(r3) ; CHECK-P9-NEXT: blr @@ -738,27 +738,27 @@ define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: lxv vs2, 48(r4) ; CHECK-P9-NEXT: xxswapd vs8, vs2 ; CHECK-P9-NEXT: xscvspdpn f8, vs8 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: stxv vs5, 32(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3 ; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: stxv vs6, 64(r3) ; CHECK-P9-NEXT: xscvspdpn f7, vs7 ; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7 ; CHECK-P9-NEXT: xscvspdpn f8, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: stxv vs6, 64(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: stxv vs3, 80(r3) -; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 -; CHECK-P9-NEXT: stxv vs2, 112(r3) ; CHECK-P9-NEXT: stxv vs7, 96(r3) +; CHECK-P9-NEXT: stxv vs2, 112(r3) ; CHECK-P9-NEXT: stxv vs4, 48(r3) +; CHECK-P9-NEXT: stxv vs5, 32(r3) ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: stxv vs1, 0(r3) ; CHECK-P9-NEXT: blr diff --git a/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll index f03d0696f21..883cf7e5170 100644 --- a/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -712,24 +712,24 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs3, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v3 +; CHECK-P9-NEXT: stxv vs2, 32(r3) ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: stxv vs3, 48(r3) +; CHECK-P9-NEXT: stxv vs1, 16(r3) ; CHECK-P9-NEXT: xvcvsxddp vs4, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v5 ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs5, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v0 ; CHECK-P9-NEXT: stxv vs4, 64(r3) -; CHECK-P9-NEXT: stxv vs5, 80(r3) ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs6, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v1 -; CHECK-P9-NEXT: vextsh2d v2, v2 +; CHECK-P9-NEXT: stxv vs5, 80(r3) ; CHECK-P9-NEXT: stxv vs6, 96(r3) +; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs7, v2 ; CHECK-P9-NEXT: stxv vs7, 112(r3) -; CHECK-P9-NEXT: stxv vs2, 32(r3) -; CHECK-P9-NEXT: stxv vs1, 16(r3) ; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; diff --git a/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll index 08e6f70bbe6..66e85e9f81b 100644 --- a/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll +++ b/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll @@ -239,18 +239,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v0 ; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v5 ; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: vpkudum v0, v0, v6 +; CHECK-P9-NEXT: stxv v0, 16(r3) ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 -; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xvcvuxdsp vs0, v4 +; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v3 ; CHECK-P9-NEXT: lxv v2, 112(r4) -; CHECK-P9-NEXT: stxv v0, 16(r3) -; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: vpkudum v4, v4, v5 ; CHECK-P9-NEXT: stxv v4, 32(r3) ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -275,18 +275,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v0 ; CHECK-BE-NEXT: lxv v5, 80(r4) +; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v5 ; CHECK-BE-NEXT: lxv v4, 64(r4) ; CHECK-BE-NEXT: vpkudum v0, v0, v6 +; CHECK-BE-NEXT: stxv v0, 16(r3) ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 -; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xvcvuxdsp vs0, v4 +; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v3 ; CHECK-BE-NEXT: lxv v2, 96(r4) -; CHECK-BE-NEXT: stxv v0, 16(r3) -; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: vpkudum v4, v4, v5 ; CHECK-BE-NEXT: stxv v4, 32(r3) ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -532,18 +532,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v0 ; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v5 ; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: vpkudum v0, v0, v6 +; CHECK-P9-NEXT: stxv v0, 16(r3) ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 -; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xvcvsxdsp vs0, v4 +; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v3 ; CHECK-P9-NEXT: lxv v2, 112(r4) -; CHECK-P9-NEXT: stxv v0, 16(r3) -; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: vpkudum v4, v4, v5 ; CHECK-P9-NEXT: stxv v4, 32(r3) ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -568,18 +568,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v0 ; CHECK-BE-NEXT: lxv v5, 80(r4) +; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v5 ; CHECK-BE-NEXT: lxv v4, 64(r4) ; CHECK-BE-NEXT: vpkudum v0, v0, v6 +; CHECK-BE-NEXT: stxv v0, 16(r3) ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 -; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xvcvsxdsp vs0, v4 +; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v3 ; CHECK-BE-NEXT: lxv v2, 96(r4) -; CHECK-BE-NEXT: stxv v0, 16(r3) -; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: vpkudum v4, v4, v5 ; CHECK-BE-NEXT: stxv v4, 32(r3) ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3