mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
71632b974c
On PowerPC, VSRpRC represents the pairs of even and odd VSX register, and VRRC corresponds to higher 32 VSX registers. In some cases, extra copies are produced when handling incoming VRRC arguments with VSRpRC. This patch changes allocation order of VSRpRC to eliminate this kind of copy. Stack frame sizes may increase if allocating non-volatile registers, and some other vector copies happen. They need fix in future changes. Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D104855
758 lines
28 KiB
LLVM
758 lines
28 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
|
|
|
; assemble_acc
|
|
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
|
define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
|
; CHECK-LABEL: ass_acc:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vmr v3, v2
|
|
; CHECK-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: ass_acc:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: vmr v3, v2
|
|
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
|
store <512 x i1> %0, <512 x i1>* %ptr, align 64
|
|
ret void
|
|
}
|
|
|
|
; xxmtacc
|
|
declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
|
|
define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
|
; CHECK-LABEL: int_xxmtacc:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vmr v3, v2
|
|
; CHECK-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: int_xxmtacc:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: vmr v3, v2
|
|
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-BE-NEXT: xxmtacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
|
|
; generated from the call to xxmtacc then one xxmfacc is generated for the store
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
|
%1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0)
|
|
store <512 x i1> %1, <512 x i1>* %ptr, align 64
|
|
ret void
|
|
}
|
|
|
|
; xxmfacc
|
|
declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
|
|
define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
|
; CHECK-LABEL: int_xxmfacc:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vmr v3, v2
|
|
; CHECK-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: int_xxmfacc:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: vmr v3, v2
|
|
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
|
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
|
|
; generated from the call to xxmfacc then one xxmfacc is generated for the store
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
|
%1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0)
|
|
store <512 x i1> %1, <512 x i1>* %ptr, align 64
|
|
ret void
|
|
}
|
|
|
|
; xxsetaccz
|
|
declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
define void @int_xxsetaccz(<512 x i1>* %ptr) {
|
|
; CHECK-LABEL: int_xxsetaccz:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: int_xxsetaccz:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
store <512 x i1> %0, <512 x i1>* %ptr, align 64
|
|
ret void
|
|
}
|
|
|
|
; disassemble_acc
|
|
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
|
|
define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) {
|
|
; CHECK-LABEL: disass_acc:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: stxv vs2, 0(r4)
|
|
; CHECK-NEXT: stxv vs1, 0(r5)
|
|
; CHECK-NEXT: stxv vs0, 0(r6)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: disass_acc:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs1, 0(r4)
|
|
; CHECK-BE-NEXT: stxv vs2, 0(r5)
|
|
; CHECK-BE-NEXT: stxv vs3, 0(r6)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
|
|
%2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
|
|
%3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
|
|
%4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
|
|
%5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
|
|
store <16 x i8> %2, <16 x i8>* %ptr1, align 16
|
|
store <16 x i8> %3, <16 x i8>* %ptr2, align 16
|
|
store <16 x i8> %4, <16 x i8>* %ptr3, align 16
|
|
store <16 x i8> %5, <16 x i8>* %ptr4, align 16
|
|
ret void
|
|
}
|
|
|
|
declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
|
|
define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
|
|
; CHECK-LABEL: testBranch:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: cmplwi r7, 0
|
|
; CHECK-NEXT: beq cr0, .LBB5_2
|
|
; CHECK-NEXT: # %bb.1: # %if.then
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: b .LBB5_3
|
|
; CHECK-NEXT: .LBB5_2: # %if.else
|
|
; CHECK-NEXT: lxv vs1, 32(r3)
|
|
; CHECK-NEXT: lxv vs0, 48(r3)
|
|
; CHECK-NEXT: lxv vs3, 0(r3)
|
|
; CHECK-NEXT: lxv vs2, 16(r3)
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: xvi4ger8pp acc0, v2, v2
|
|
; CHECK-NEXT: .LBB5_3: # %if.end
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testBranch:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: cmplwi r7, 0
|
|
; CHECK-BE-NEXT: beq cr0, .LBB5_2
|
|
; CHECK-BE-NEXT: # %bb.1: # %if.then
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: b .LBB5_3
|
|
; CHECK-BE-NEXT: .LBB5_2: # %if.else
|
|
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: xxmtacc acc0
|
|
; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2
|
|
; CHECK-BE-NEXT: .LBB5_3: # %if.end
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%tobool = icmp eq i32 %val, 0
|
|
br i1 %tobool, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
br label %if.end
|
|
|
|
if.else:
|
|
%1 = load <512 x i1>, <512 x i1>* %ptr, align 64
|
|
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%vq1.0 = phi <512 x i1> [ %0, %if.then ], [ %2, %if.else ]
|
|
store <512 x i1> %vq1.0, <512 x i1>* %ptr, align 64
|
|
ret void
|
|
}
|
|
|
|
; The following test cases check that the xxsetaccz instruction is correctly rematerialized
|
|
declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
|
|
declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>)
|
|
declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
|
|
|
|
define void @testcse(<512 x i1>* %res, <16 x i8> %vc) {
|
|
; CHECK-LABEL: testcse:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xvf32gerpp acc0, v2, v2
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: stxv vs0, 112(r3)
|
|
; CHECK-NEXT: stxv vs1, 96(r3)
|
|
; CHECK-NEXT: stxv vs2, 80(r3)
|
|
; CHECK-NEXT: stxv vs3, 64(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testcse:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: stxv vs1, 80(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 64(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 112(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 96(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
|
|
%3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
|
%4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
|
|
%5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
|
|
store <512 x i1> %2, <512 x i1>* %4, align 64
|
|
store <512 x i1> %3, <512 x i1>* %5, align 64
|
|
ret void
|
|
}
|
|
|
|
define void @testcse2(<512 x i1>* %res, <16 x i8> %vc) {
|
|
; CHECK-LABEL: testcse2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xxsetaccz acc1
|
|
; CHECK-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-NEXT: xvf32gerpn acc0, v2, v2
|
|
; CHECK-NEXT: xxmfacc acc1
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs4, 48(r3)
|
|
; CHECK-NEXT: stxv vs5, 32(r3)
|
|
; CHECK-NEXT: stxv vs6, 16(r3)
|
|
; CHECK-NEXT: stxv vs7, 0(r3)
|
|
; CHECK-NEXT: stxv vs0, 112(r3)
|
|
; CHECK-NEXT: stxv vs1, 96(r3)
|
|
; CHECK-NEXT: stxv vs2, 80(r3)
|
|
; CHECK-NEXT: stxv vs3, 64(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testcse2:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xxsetaccz acc1
|
|
; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc1
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs5, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs4, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs7, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs6, 32(r3)
|
|
; CHECK-BE-NEXT: stxv vs1, 80(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 64(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 112(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 96(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
|
|
%3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
|
%4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
|
|
%5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
|
|
store <512 x i1> %2, <512 x i1>* %4, align 64
|
|
store <512 x i1> %3, <512 x i1>* %5, align 64
|
|
ret void
|
|
}
|
|
|
|
define void @testcse3(<512 x i1>* %res, <16 x i8> %vc) {
|
|
; CHECK-LABEL: testcse3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xxsetaccz acc1
|
|
; CHECK-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-NEXT: xvf32gerpn acc0, v2, v2
|
|
; CHECK-NEXT: xxmfacc acc1
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs4, 48(r3)
|
|
; CHECK-NEXT: stxv vs5, 32(r3)
|
|
; CHECK-NEXT: stxv vs6, 16(r3)
|
|
; CHECK-NEXT: stxv vs7, 0(r3)
|
|
; CHECK-NEXT: stxv vs0, 112(r3)
|
|
; CHECK-NEXT: stxv vs1, 96(r3)
|
|
; CHECK-NEXT: stxv vs2, 80(r3)
|
|
; CHECK-NEXT: stxv vs3, 64(r3)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testcse3:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xxsetaccz acc1
|
|
; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc1
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs5, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs4, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs7, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs6, 32(r3)
|
|
; CHECK-BE-NEXT: stxv vs1, 80(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 64(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 112(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 96(r3)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
|
|
%2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
|
|
%3 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
|
|
%4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
|
|
store <512 x i1> %1, <512 x i1>* %3, align 64
|
|
store <512 x i1> %2, <512 x i1>* %4, align 64
|
|
ret void
|
|
}
|
|
|
|
define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
|
|
; CHECK-LABEL: testcse4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: cmpwi r4, 1
|
|
; CHECK-NEXT: bltlr cr0
|
|
; CHECK-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-NEXT: clrldi r4, r4, 32
|
|
; CHECK-NEXT: li r6, 0
|
|
; CHECK-NEXT: mtctr r4
|
|
; CHECK-NEXT: li r4, 0
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB9_2: # %for.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: rldic r7, r6, 4, 28
|
|
; CHECK-NEXT: xxsetaccz acc2
|
|
; CHECK-NEXT: xxsetaccz acc1
|
|
; CHECK-NEXT: addi r6, r6, 6
|
|
; CHECK-NEXT: lxvx vs0, r5, r7
|
|
; CHECK-NEXT: add r7, r5, r7
|
|
; CHECK-NEXT: lxv vs1, 16(r7)
|
|
; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1
|
|
; CHECK-NEXT: lxv vs0, 32(r7)
|
|
; CHECK-NEXT: lxv vs1, 48(r7)
|
|
; CHECK-NEXT: xxmfacc acc2
|
|
; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1
|
|
; CHECK-NEXT: lxv vs12, 64(r7)
|
|
; CHECK-NEXT: lxv vs13, 80(r7)
|
|
; CHECK-NEXT: rldic r7, r4, 6, 26
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: addi r4, r4, 3
|
|
; CHECK-NEXT: xxmfacc acc1
|
|
; CHECK-NEXT: xvf32gernp acc0, vs12, vs13
|
|
; CHECK-NEXT: stxvx vs11, r3, r7
|
|
; CHECK-NEXT: add r7, r3, r7
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs8, 48(r7)
|
|
; CHECK-NEXT: stxv vs9, 32(r7)
|
|
; CHECK-NEXT: stxv vs10, 16(r7)
|
|
; CHECK-NEXT: stxv vs4, 112(r7)
|
|
; CHECK-NEXT: stxv vs5, 96(r7)
|
|
; CHECK-NEXT: stxv vs6, 80(r7)
|
|
; CHECK-NEXT: stxv vs7, 64(r7)
|
|
; CHECK-NEXT: stxv vs0, 176(r7)
|
|
; CHECK-NEXT: stxv vs1, 160(r7)
|
|
; CHECK-NEXT: stxv vs2, 144(r7)
|
|
; CHECK-NEXT: stxv vs3, 128(r7)
|
|
; CHECK-NEXT: bdnz .LBB9_2
|
|
; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testcse4:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: cmpwi r4, 1
|
|
; CHECK-BE-NEXT: bltlr cr0
|
|
; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-BE-NEXT: clrldi r4, r4, 32
|
|
; CHECK-BE-NEXT: li r6, 0
|
|
; CHECK-BE-NEXT: mtctr r4
|
|
; CHECK-BE-NEXT: li r4, 0
|
|
; CHECK-BE-NEXT: .p2align 4
|
|
; CHECK-BE-NEXT: .LBB9_2: # %for.body
|
|
; CHECK-BE-NEXT: #
|
|
; CHECK-BE-NEXT: rldic r7, r6, 4, 28
|
|
; CHECK-BE-NEXT: xxsetaccz acc2
|
|
; CHECK-BE-NEXT: xxsetaccz acc1
|
|
; CHECK-BE-NEXT: addi r6, r6, 6
|
|
; CHECK-BE-NEXT: lxvx vs0, r5, r7
|
|
; CHECK-BE-NEXT: add r7, r5, r7
|
|
; CHECK-BE-NEXT: lxv vs1, 16(r7)
|
|
; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1
|
|
; CHECK-BE-NEXT: lxv vs0, 32(r7)
|
|
; CHECK-BE-NEXT: lxv vs1, 48(r7)
|
|
; CHECK-BE-NEXT: xxmfacc acc2
|
|
; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1
|
|
; CHECK-BE-NEXT: lxv vs12, 64(r7)
|
|
; CHECK-BE-NEXT: lxv vs13, 80(r7)
|
|
; CHECK-BE-NEXT: rldic r7, r4, 6, 26
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: addi r4, r4, 3
|
|
; CHECK-BE-NEXT: xxmfacc acc1
|
|
; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13
|
|
; CHECK-BE-NEXT: stxvx vs8, r3, r7
|
|
; CHECK-BE-NEXT: add r7, r3, r7
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs9, 16(r7)
|
|
; CHECK-BE-NEXT: stxv vs11, 48(r7)
|
|
; CHECK-BE-NEXT: stxv vs10, 32(r7)
|
|
; CHECK-BE-NEXT: stxv vs5, 80(r7)
|
|
; CHECK-BE-NEXT: stxv vs4, 64(r7)
|
|
; CHECK-BE-NEXT: stxv vs7, 112(r7)
|
|
; CHECK-BE-NEXT: stxv vs6, 96(r7)
|
|
; CHECK-BE-NEXT: stxv vs1, 144(r7)
|
|
; CHECK-BE-NEXT: stxv vs0, 128(r7)
|
|
; CHECK-BE-NEXT: stxv vs3, 176(r7)
|
|
; CHECK-BE-NEXT: stxv vs2, 160(r7)
|
|
; CHECK-BE-NEXT: bdnz .LBB9_2
|
|
; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%cmp55 = icmp sgt i32 %lim, 0
|
|
br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %lim to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %for.body.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%1 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%2 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
%3 = trunc i64 %indvars.iv to i32
|
|
%mul = mul nsw i32 %3, 6
|
|
%idxprom = zext i32 %mul to i64
|
|
%arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom
|
|
%4 = load <16 x i8>, <16 x i8>* %arrayidx, align 16
|
|
%add2 = or i32 %mul, 1
|
|
%idxprom3 = zext i32 %add2 to i64
|
|
%arrayidx4 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom3
|
|
%5 = load <16 x i8>, <16 x i8>* %arrayidx4, align 16
|
|
%6 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %4, <16 x i8> %5)
|
|
%add6 = add nuw nsw i32 %mul, 2
|
|
%idxprom7 = zext i32 %add6 to i64
|
|
%arrayidx8 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom7
|
|
%7 = load <16 x i8>, <16 x i8>* %arrayidx8, align 16
|
|
%add10 = add nuw nsw i32 %mul, 3
|
|
%idxprom11 = zext i32 %add10 to i64
|
|
%arrayidx12 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom11
|
|
%8 = load <16 x i8>, <16 x i8>* %arrayidx12, align 16
|
|
%9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %7, <16 x i8> %8)
|
|
%add14 = add nuw nsw i32 %mul, 4
|
|
%idxprom15 = zext i32 %add14 to i64
|
|
%arrayidx16 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom15
|
|
%10 = load <16 x i8>, <16 x i8>* %arrayidx16, align 16
|
|
%add18 = add nuw nsw i32 %mul, 5
|
|
%idxprom19 = zext i32 %add18 to i64
|
|
%arrayidx20 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom19
|
|
%11 = load <16 x i8>, <16 x i8>* %arrayidx20, align 16
|
|
%12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %2, <16 x i8> %10, <16 x i8> %11)
|
|
%mul21 = mul i64 %indvars.iv, 3
|
|
%idx.ext = and i64 %mul21, 4294967295
|
|
%add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 %idx.ext
|
|
store <512 x i1> %6, <512 x i1>* %add.ptr, align 64
|
|
%add.ptr26 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 1
|
|
store <512 x i1> %9, <512 x i1>* %add.ptr26, align 64
|
|
%add.ptr30 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 2
|
|
store <512 x i1> %12, <512 x i1>* %add.ptr30, align 64
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
declare i32 @testRedundantPrimeUnprimeF()
|
|
define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind {
|
|
; CHECK-LABEL: testRedundantPrimeUnprime:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-NEXT: std r0, 16(r1)
|
|
; CHECK-NEXT: stdu r1, -112(r1)
|
|
; CHECK-NEXT: xxsetaccz acc0
|
|
; CHECK-NEXT: xxsetaccz acc1
|
|
; CHECK-NEXT: mr r30, r3
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r3)
|
|
; CHECK-NEXT: stxv vs1, 32(r3)
|
|
; CHECK-NEXT: stxv vs2, 16(r3)
|
|
; CHECK-NEXT: stxv vs3, 0(r3)
|
|
; CHECK-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-NEXT: xxmfacc acc1
|
|
; CHECK-NEXT: stxvp vsp4, 64(r1)
|
|
; CHECK-NEXT: stxvp vsp6, 32(r1)
|
|
; CHECK-NEXT: bl testRedundantPrimeUnprimeF@notoc
|
|
; CHECK-NEXT: lxvp vsp0, 64(r1)
|
|
; CHECK-NEXT: lxvp vsp2, 32(r1)
|
|
; CHECK-NEXT: stxv vs0, 112(r30)
|
|
; CHECK-NEXT: stxv vs1, 96(r30)
|
|
; CHECK-NEXT: stxv vs2, 80(r30)
|
|
; CHECK-NEXT: stxv vs3, 64(r30)
|
|
; CHECK-NEXT: addi r1, r1, 112
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: testRedundantPrimeUnprime:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: mflr r0
|
|
; CHECK-BE-NEXT: std r0, 16(r1)
|
|
; CHECK-BE-NEXT: stdu r1, -192(r1)
|
|
; CHECK-BE-NEXT: xxsetaccz acc0
|
|
; CHECK-BE-NEXT: xxsetaccz acc1
|
|
; CHECK-BE-NEXT: std r30, 176(r1) # 8-byte Folded Spill
|
|
; CHECK-BE-NEXT: mr r30, r3
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc1
|
|
; CHECK-BE-NEXT: stxvp vsp4, 112(r1)
|
|
; CHECK-BE-NEXT: stxvp vsp6, 144(r1)
|
|
; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF
|
|
; CHECK-BE-NEXT: nop
|
|
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
|
|
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
|
|
; CHECK-BE-NEXT: stxv vs3, 112(r30)
|
|
; CHECK-BE-NEXT: stxv vs2, 96(r30)
|
|
; CHECK-BE-NEXT: stxv vs1, 80(r30)
|
|
; CHECK-BE-NEXT: stxv vs0, 64(r30)
|
|
; CHECK-BE-NEXT: ld r30, 176(r1) # 8-byte Folded Reload
|
|
; CHECK-BE-NEXT: addi r1, r1, 192
|
|
; CHECK-BE-NEXT: ld r0, 16(r1)
|
|
; CHECK-BE-NEXT: mtlr r0
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
|
store <512 x i1> %0, <512 x i1>* %dst, align 64
|
|
%1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
|
|
%call = tail call signext i32 bitcast (i32 ()* @testRedundantPrimeUnprimeF to i32 ()*)()
|
|
%add.ptr1 = getelementptr inbounds <512 x i1>, <512 x i1>* %dst, i64 1
|
|
store <512 x i1> %1, <512 x i1>* %add.ptr1, align 64
|
|
ret void
|
|
}
|
|
|
|
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
|
declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
|
|
|
|
; Function Attrs: nofree nounwind
|
|
define void @test_ldst_1(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
|
; CHECK-LABEL: test_ldst_1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv vs1, 32(r3)
|
|
; CHECK-NEXT: lxv vs0, 48(r3)
|
|
; CHECK-NEXT: lxv vs3, 0(r3)
|
|
; CHECK-NEXT: lxv vs2, 16(r3)
|
|
; CHECK-NEXT: li r3, 8
|
|
; CHECK-NEXT: lxvpx vsp36, r4, r3
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r7)
|
|
; CHECK-NEXT: stxv vs1, 32(r7)
|
|
; CHECK-NEXT: stxv vs2, 16(r7)
|
|
; CHECK-NEXT: stxv vs3, 0(r7)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_ldst_1:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: li r3, 8
|
|
; CHECK-BE-NEXT: lxvpx vsp36, r4, r3
|
|
; CHECK-BE-NEXT: xxmtacc acc0
|
|
; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = bitcast i8* %vqp to <512 x i1>*
|
|
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
|
%2 = bitcast <256 x i1>* %vpp to i8*
|
|
%3 = getelementptr i8, i8* %2, i64 8
|
|
%4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3)
|
|
%5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0)
|
|
%6 = bitcast i8* %resp to <512 x i1>*
|
|
store <512 x i1> %5, <512 x i1>* %6, align 64
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nofree nounwind
|
|
define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
|
; CHECK-LABEL: test_ldst_2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv vs1, 32(r3)
|
|
; CHECK-NEXT: lxv vs0, 48(r3)
|
|
; CHECK-NEXT: lxv vs3, 0(r3)
|
|
; CHECK-NEXT: lxv vs2, 16(r3)
|
|
; CHECK-NEXT: lxvp vsp36, 0(r4)
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r7)
|
|
; CHECK-NEXT: stxv vs1, 32(r7)
|
|
; CHECK-NEXT: stxv vs2, 16(r7)
|
|
; CHECK-NEXT: stxv vs3, 0(r7)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_ldst_2:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
|
|
; CHECK-BE-NEXT: xxmtacc acc0
|
|
; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = bitcast i8* %vqp to <512 x i1>*
|
|
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
|
%2 = bitcast <256 x i1>* %vpp to i8*
|
|
%3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
|
|
%4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
|
|
%5 = bitcast i8* %resp to <512 x i1>*
|
|
store <512 x i1> %4, <512 x i1>* %5, align 64
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nofree nounwind
|
|
define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
|
; CHECK-LABEL: test_ldst_3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv vs1, 32(r3)
|
|
; CHECK-NEXT: lxv vs0, 48(r3)
|
|
; CHECK-NEXT: lxv vs3, 0(r3)
|
|
; CHECK-NEXT: lxv vs2, 16(r3)
|
|
; CHECK-NEXT: lxvp vsp36, 0(r5)
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: stxv vs0, 48(r9)
|
|
; CHECK-NEXT: stxv vs1, 32(r9)
|
|
; CHECK-NEXT: stxv vs2, 16(r9)
|
|
; CHECK-NEXT: stxv vs3, 0(r9)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_ldst_3:
|
|
; CHECK-BE: # %bb.0: # %entry
|
|
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
|
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
|
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
|
; CHECK-BE-NEXT: lxvp vsp36, 0(r5)
|
|
; CHECK-BE-NEXT: xxmtacc acc0
|
|
; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
|
|
; CHECK-BE-NEXT: xxmfacc acc0
|
|
; CHECK-BE-NEXT: stxv vs1, 16(r9)
|
|
; CHECK-BE-NEXT: stxv vs0, 0(r9)
|
|
; CHECK-BE-NEXT: stxv vs3, 48(r9)
|
|
; CHECK-BE-NEXT: stxv vs2, 32(r9)
|
|
; CHECK-BE-NEXT: blr
|
|
entry:
|
|
%0 = bitcast i8* %vqp to <512 x i1>*
|
|
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
|
%2 = bitcast <256 x i1>* %vpp to i8*
|
|
%3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
|
|
%4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
|
|
%5 = bitcast i8* %resp to <512 x i1>*
|
|
store <512 x i1> %4, <512 x i1>* %5, align 64
|
|
ret void
|
|
}
|
|
|
|
declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
|
|
declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
|