mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PowerPC] Rename the vector pair intrinsics and builtins to replace the _mma_ prefix by _vsx_
On PPC, the vector pair instructions are independent from MMA. This patch renames the vector pair LLVM intrinsics and Clang builtins to replace the _mma_ prefix by _vsx_ in their names. We also move the vector pair type/intrinsic/builtin tests to their own files. Differential Revision: https://reviews.llvm.org/D91974
This commit is contained in:
parent
13f5c4b392
commit
5cbcf5677b
@ -1132,12 +1132,8 @@ def int_ppc_vsx_lxvl :
|
||||
def int_ppc_vsx_lxvll :
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem,
|
||||
IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvl :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvll :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_lxvp :
|
||||
Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
// Vector store.
|
||||
def int_ppc_vsx_stxvw4x : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
|
||||
@ -1148,6 +1144,15 @@ def int_ppc_vsx_stxvw4x_be : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvd2x_be : Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvl :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvll :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvp :
|
||||
Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty], [IntrWriteMem,
|
||||
IntrArgMemOnly]>;
|
||||
// Vector and scalar maximum.
|
||||
def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
|
||||
def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
|
||||
@ -1406,6 +1411,14 @@ def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
|
||||
}
|
||||
|
||||
let TargetPrefix = "ppc" in {
|
||||
def int_ppc_vsx_assemble_pair :
|
||||
Intrinsic<[llvm_v256i1_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_vsx_disassemble_pair :
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[llvm_v256i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_assemble_acc :
|
||||
Intrinsic<[llvm_v512i1_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
@ -1415,14 +1428,6 @@ let TargetPrefix = "ppc" in {
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[llvm_v512i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_assemble_pair :
|
||||
Intrinsic<[llvm_v256i1_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_disassemble_pair :
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[llvm_v256i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_xxmtacc :
|
||||
Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
|
||||
|
||||
@ -1432,14 +1437,6 @@ let TargetPrefix = "ppc" in {
|
||||
def int_ppc_mma_xxsetaccz :
|
||||
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_lxvp :
|
||||
Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
def int_ppc_mma_stxvp :
|
||||
Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
|
||||
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
|
||||
defm int_ppc_mma_xvi4ger8 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
|
@ -10614,7 +10614,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return DAG.getRegister(PPC::R2, MVT::i32);
|
||||
|
||||
case Intrinsic::ppc_mma_disassemble_acc:
|
||||
case Intrinsic::ppc_mma_disassemble_pair: {
|
||||
case Intrinsic::ppc_vsx_disassemble_pair: {
|
||||
int NumVecs = 2;
|
||||
SDValue WideVec = Op.getOperand(1);
|
||||
if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
|
||||
|
@ -1617,7 +1617,7 @@ let Predicates = [MMA] in {
|
||||
let Predicates = [PairedVectorMemops] in {
|
||||
def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
|
||||
Concats.VecsToVecPair0>;
|
||||
def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)),
|
||||
def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)),
|
||||
Concats.VecsToVecPair0>;
|
||||
def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))),
|
||||
(v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>;
|
||||
@ -1659,18 +1659,18 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i
|
||||
|
||||
let Predicates = [PairedVectorMemops] in {
|
||||
// Intrinsics for Paired Vector Loads.
|
||||
def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
|
||||
def : Pat<(v256i1 (int_ppc_mma_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
|
||||
def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
|
||||
def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
|
||||
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
|
||||
def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
|
||||
def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
|
||||
}
|
||||
// Intrinsics for Paired Vector Stores.
|
||||
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX16:$dst),
|
||||
def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
|
||||
(STXVP $XSp, memrix16:$dst)>;
|
||||
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, xaddrX16:$dst),
|
||||
def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
|
||||
(STXVPX $XSp, xaddrX16:$dst)>;
|
||||
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
|
||||
def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX34:$dst),
|
||||
def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
|
||||
(PSTXVP $XSp, memri34:$dst)>;
|
||||
}
|
||||
}
|
||||
|
@ -276,9 +276,9 @@ static Value *GetPointerOperand(Value *MemI) {
|
||||
return SMemI->getPointerOperand();
|
||||
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
|
||||
if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
|
||||
IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp)
|
||||
IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
|
||||
return IMemI->getArgOperand(0);
|
||||
if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp)
|
||||
if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
|
||||
return IMemI->getArgOperand(1);
|
||||
}
|
||||
|
||||
@ -347,10 +347,10 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
|
||||
PtrValue = SMemI->getPointerOperand();
|
||||
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
|
||||
if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
|
||||
IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) {
|
||||
IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
|
||||
MemI = IMemI;
|
||||
PtrValue = IMemI->getArgOperand(0);
|
||||
} else if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp) {
|
||||
} else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
|
||||
MemI = IMemI;
|
||||
PtrValue = IMemI->getArgOperand(1);
|
||||
} else continue;
|
||||
@ -834,8 +834,8 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
|
||||
return false;
|
||||
// There are no update forms for P10 lxvp/stxvp intrinsic.
|
||||
auto *II = dyn_cast<IntrinsicInst>(I);
|
||||
if (II && ((II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) ||
|
||||
II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp))
|
||||
if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
|
||||
II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
|
||||
return false;
|
||||
// See getPreIndexedAddressParts, the displacement for LDU/STDU has to
|
||||
// be 4's multiple (DS-form). For i64 loads/stores when the displacement
|
||||
@ -877,8 +877,8 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
|
||||
// Check if it is a P10 lxvp/stxvp intrinsic.
|
||||
auto *II = dyn_cast<IntrinsicInst>(I);
|
||||
if (II)
|
||||
return II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp ||
|
||||
II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp;
|
||||
return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
|
||||
II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
|
||||
// Check if it is a P9 vector load/store.
|
||||
return ST && ST->hasP9Vector() &&
|
||||
(PtrValue->getType()->getPointerElementType()->isVectorTy());
|
||||
|
@ -1224,7 +1224,7 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
case Intrinsic::ppc_vsx_lxvw4x_be:
|
||||
case Intrinsic::ppc_vsx_lxvl:
|
||||
case Intrinsic::ppc_vsx_lxvll:
|
||||
case Intrinsic::ppc_mma_lxvp: {
|
||||
case Intrinsic::ppc_vsx_lxvp: {
|
||||
Info.PtrVal = Inst->getArgOperand(0);
|
||||
Info.ReadMem = true;
|
||||
Info.WriteMem = false;
|
||||
@ -1241,7 +1241,7 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
case Intrinsic::ppc_vsx_stxvw4x_be:
|
||||
case Intrinsic::ppc_vsx_stxvl:
|
||||
case Intrinsic::ppc_vsx_stxvll:
|
||||
case Intrinsic::ppc_mma_stxvp: {
|
||||
case Intrinsic::ppc_vsx_stxvp: {
|
||||
Info.PtrVal = Inst->getArgOperand(1);
|
||||
Info.ReadMem = false;
|
||||
Info.WriteMem = true;
|
||||
|
@ -8,8 +8,8 @@
|
||||
; This test checks that LSR properly recognizes lxvp/stxvp as load/store
|
||||
; intrinsics to avoid generating x-form instructions instead of d-forms.
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
|
||||
declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
|
||||
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
||||
declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
|
||||
define void @foo(i32 zeroext %n, <256 x i1>* %ptr, <256 x i1>* %ptr2) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
@ -78,24 +78,24 @@ for.cond.cleanup:
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%2 = getelementptr i8, i8* %0, i64 %indvars.iv
|
||||
%3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
|
||||
%3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
|
||||
%add2 = add nuw nsw i64 %indvars.iv, 32
|
||||
%4 = getelementptr i8, i8* %0, i64 %add2
|
||||
%5 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %4)
|
||||
%5 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %4)
|
||||
%add4 = add nuw nsw i64 %indvars.iv, 64
|
||||
%6 = getelementptr i8, i8* %0, i64 %add4
|
||||
%7 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %6)
|
||||
%7 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %6)
|
||||
%add6 = add nuw nsw i64 %indvars.iv, 96
|
||||
%8 = getelementptr i8, i8* %0, i64 %add6
|
||||
%9 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %8)
|
||||
%9 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %8)
|
||||
%10 = getelementptr i8, i8* %1, i64 %indvars.iv
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %3, i8* %10)
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %3, i8* %10)
|
||||
%11 = getelementptr i8, i8* %1, i64 %add2
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %5, i8* %11)
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %5, i8* %11)
|
||||
%12 = getelementptr i8, i8* %1, i64 %add4
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %7, i8* %12)
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %7, i8* %12)
|
||||
%13 = getelementptr i8, i8* %1, i64 %add6
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %9, i8* %13)
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %9, i8* %13)
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
|
@ -81,13 +81,13 @@ _loop_1_do_: ; preds = %_loop_1_do_.lr.ph,
|
||||
%x_ix_dim_0_6 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_5, i64 %i.08
|
||||
%x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_6 to i8*
|
||||
%0 = getelementptr i8, i8* %x_ix_dim_0_, i64 1
|
||||
%1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
|
||||
%2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %1)
|
||||
%1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0)
|
||||
%2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %1)
|
||||
%.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %2, 0
|
||||
%.fca.1.extract2 = extractvalue { <16 x i8>, <16 x i8> } %2, 1
|
||||
%3 = getelementptr i8, i8* %x_ix_dim_0_, i64 33
|
||||
%4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
|
||||
%5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %4)
|
||||
%4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3)
|
||||
%5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %4)
|
||||
%.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 0
|
||||
%.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 1
|
||||
%6 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double>
|
||||
@ -110,5 +110,5 @@ _return_bb: ; preds = %_loop_1_loopHeader_
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
|
||||
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
|
||||
|
@ -40,28 +40,6 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; assemble_pair
|
||||
declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: ass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-NEXT: stxv v3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: ass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
|
||||
store <256 x i1> %0, <256 x i1>* %ptr, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; xxmtacc
|
||||
declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
|
||||
define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
||||
@ -202,51 +180,23 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; disassemble_pair
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
|
||||
define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
|
||||
; CHECK-LABEL: disass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 0(r3)
|
||||
; CHECK-NEXT: lxv vs0, 16(r3)
|
||||
; CHECK-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: disass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
|
||||
%1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
|
||||
%2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
|
||||
%3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
|
||||
store <16 x i8> %2, <16 x i8>* %ptr2, align 16
|
||||
store <16 x i8> %3, <16 x i8>* %ptr3, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
|
||||
; CHECK-LABEL: testBranch:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmplwi r7, 0
|
||||
; CHECK-NEXT: beq cr0, .LBB7_2
|
||||
; CHECK-NEXT: beq cr0, .LBB5_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: xxsetaccz acc0
|
||||
; CHECK-NEXT: b .LBB7_3
|
||||
; CHECK-NEXT: .LBB7_2: # %if.else
|
||||
; CHECK-NEXT: b .LBB5_3
|
||||
; CHECK-NEXT: .LBB5_2: # %if.else
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvi4ger8pp acc0, v2, v2
|
||||
; CHECK-NEXT: .LBB7_3: # %if.end
|
||||
; CHECK-NEXT: .LBB5_3: # %if.end
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r3)
|
||||
; CHECK-NEXT: stxv vs1, 32(r3)
|
||||
@ -257,18 +207,18 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
|
||||
; CHECK-BE-LABEL: testBranch:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: cmplwi r7, 0
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB7_2
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB5_2
|
||||
; CHECK-BE-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-BE-NEXT: xxsetaccz acc0
|
||||
; CHECK-BE-NEXT: b .LBB7_3
|
||||
; CHECK-BE-NEXT: .LBB7_2: # %if.else
|
||||
; CHECK-BE-NEXT: b .LBB5_3
|
||||
; CHECK-BE-NEXT: .LBB5_2: # %if.else
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2
|
||||
; CHECK-BE-NEXT: .LBB7_3: # %if.end
|
||||
; CHECK-BE-NEXT: .LBB5_3: # %if.end
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
@ -447,7 +397,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
|
||||
; CHECK-NEXT: mtctr r4
|
||||
; CHECK-NEXT: li r4, 0
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB11_2: # %for.body
|
||||
; CHECK-NEXT: .LBB9_2: # %for.body
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: rldic r7, r6, 4, 28
|
||||
; CHECK-NEXT: addi r6, r6, 6
|
||||
@ -482,7 +432,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
|
||||
; CHECK-NEXT: stxv vs1, 160(r7)
|
||||
; CHECK-NEXT: stxv vs2, 144(r7)
|
||||
; CHECK-NEXT: stxv vs3, 128(r7)
|
||||
; CHECK-NEXT: bdnz .LBB11_2
|
||||
; CHECK-NEXT: bdnz .LBB9_2
|
||||
; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
@ -496,7 +446,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
|
||||
; CHECK-BE-NEXT: mtctr r4
|
||||
; CHECK-BE-NEXT: li r4, 0
|
||||
; CHECK-BE-NEXT: .p2align 4
|
||||
; CHECK-BE-NEXT: .LBB11_2: # %for.body
|
||||
; CHECK-BE-NEXT: .LBB9_2: # %for.body
|
||||
; CHECK-BE-NEXT: #
|
||||
; CHECK-BE-NEXT: rldic r7, r6, 4, 28
|
||||
; CHECK-BE-NEXT: addi r6, r6, 6
|
||||
@ -531,7 +481,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
|
||||
; CHECK-BE-NEXT: stxv vs0, 128(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 176(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 160(r7)
|
||||
; CHECK-BE-NEXT: bdnz .LBB11_2
|
||||
; CHECK-BE-NEXT: bdnz .LBB9_2
|
||||
; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
@ -674,189 +624,12 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
|
||||
declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
|
||||
%2 = bitcast <256 x i1>* %vp2 to i8*
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind readonly
|
||||
declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
|
||||
|
||||
; Function Attrs: argmemonly nounwind writeonly
|
||||
declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_2:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 %offset
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 %offset
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 18
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_3:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 18
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 18
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 18
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 1
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_4:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 1
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 1
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 42
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_5:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 42
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 42
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 42
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_6:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-BE-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
|
||||
%1 = bitcast <256 x i1>* %0 to i8*
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
|
||||
%4 = bitcast <256 x i1>* %3 to i8*
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; FIXME: A prefixed load (plxvp) is expected here as the offset in this
|
||||
; test case is a constant that fits within 34-bits.
|
||||
; CHECK-LABEL: test_ldst_7:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 0
|
||||
; CHECK-NEXT: ori r5, r5, 32799
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_7:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 0
|
||||
; CHECK-BE-NEXT: ori r5, r5, 32799
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 32799
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 32799
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
||||
declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_8:
|
||||
define void @test_ldst_1(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
@ -873,7 +646,7 @@ define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_8:
|
||||
; CHECK-BE-LABEL: test_ldst_1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
@ -894,7 +667,7 @@ entry:
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = bitcast <256 x i1>* %vpp to i8*
|
||||
%3 = getelementptr i8, i8* %2, i64 8
|
||||
%4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
|
||||
%4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3)
|
||||
%5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0)
|
||||
%6 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %5, <512 x i1>* %6, align 64
|
||||
@ -902,8 +675,8 @@ entry:
|
||||
}
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_9:
|
||||
define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
@ -919,7 +692,7 @@ define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_9:
|
||||
; CHECK-BE-LABEL: test_ldst_2:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
@ -938,7 +711,7 @@ entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = bitcast <256 x i1>* %vpp to i8*
|
||||
%3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
|
||||
%3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
|
||||
%4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
|
||||
%5 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %4, <512 x i1>* %5, align 64
|
||||
@ -946,8 +719,8 @@ entry:
|
||||
}
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_10:
|
||||
define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test_ldst_3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
@ -963,7 +736,7 @@ define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %v
|
||||
; CHECK-NEXT: stxv vs3, 0(r9)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_10:
|
||||
; CHECK-BE-LABEL: test_ldst_3:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
@ -982,9 +755,12 @@ entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = bitcast <256 x i1>* %vpp to i8*
|
||||
%3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
|
||||
%3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
|
||||
%4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
|
||||
%5 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %4, <512 x i1>* %5, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
|
||||
declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
|
||||
|
@ -7,7 +7,7 @@
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) {
|
||||
; CHECK-LABEL: intrinsics1:
|
||||
; CHECK: # %bb.0:
|
||||
@ -62,7 +62,7 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
|
||||
%3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
|
||||
%4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
|
||||
%5 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
|
||||
%5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
|
||||
%6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
|
||||
%7 = bitcast i8* %ptr to <512 x i1>*
|
||||
store <512 x i1> %6, <512 x i1>* %7, align 64
|
||||
@ -126,7 +126,7 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
|
||||
%3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
|
||||
%4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
|
||||
%5 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
|
||||
%5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
|
||||
%6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
|
||||
%7 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %6)
|
||||
%8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
|
||||
|
@ -6,7 +6,7 @@
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
|
||||
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
|
||||
@ -68,7 +68,7 @@ entry:
|
||||
%0 = load <16 x i8>, <16 x i8>* %Src, align 16
|
||||
%arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
|
||||
%1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1)
|
||||
%3 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
%cmp11 = icmp sgt i32 %Len, 2
|
||||
br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
|
||||
@ -165,7 +165,7 @@ entry:
|
||||
%0 = load <16 x i8>, <16 x i8>* %Src, align 16
|
||||
%arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
|
||||
%1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
|
||||
%2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1)
|
||||
%arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 2
|
||||
%3 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16
|
||||
%4 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %2, <16 x i8> %3)
|
||||
|
@ -469,38 +469,38 @@ _loop_2_do_: ; preds = %_loop_2_do_.lr.ph,
|
||||
%_ix_x_len = shl nuw nsw i64 %indvars.iv, 3
|
||||
%x_ix_dim_0_113 = getelementptr inbounds %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_112, i64 %indvars.iv
|
||||
%x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_113 to i8*
|
||||
%55 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %x_ix_dim_0_)
|
||||
%55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %x_ix_dim_0_)
|
||||
%a_ix_dim_1_ = getelementptr inbounds i8, i8* %a_ix_dim_0_, i64 %_ix_x_len
|
||||
%56 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_)
|
||||
%56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_)
|
||||
%a_ix_dim_1_29 = getelementptr inbounds i8, i8* %a_ix_dim_0_25, i64 %_ix_x_len
|
||||
%57 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_29)
|
||||
%57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_29)
|
||||
%a_ix_dim_1_45 = getelementptr inbounds i8, i8* %a_ix_dim_0_41, i64 %_ix_x_len
|
||||
%58 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_45)
|
||||
%58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_45)
|
||||
%a_ix_dim_1_61 = getelementptr inbounds i8, i8* %a_ix_dim_0_57, i64 %_ix_x_len
|
||||
%59 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_61)
|
||||
%59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_61)
|
||||
%a_ix_dim_1_77 = getelementptr inbounds i8, i8* %a_ix_dim_0_73, i64 %_ix_x_len
|
||||
%60 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_77)
|
||||
%60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_77)
|
||||
%a_ix_dim_1_93 = getelementptr inbounds i8, i8* %a_ix_dim_0_89, i64 %_ix_x_len
|
||||
%61 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_93)
|
||||
%62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %55)
|
||||
%61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_93)
|
||||
%62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %55)
|
||||
%.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %62, 0
|
||||
%.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %62, 1
|
||||
%63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %56)
|
||||
%63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %56)
|
||||
%.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %63, 0
|
||||
%.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %63, 1
|
||||
%64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %57)
|
||||
%64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %57)
|
||||
%.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %64, 0
|
||||
%.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %64, 1
|
||||
%65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %58)
|
||||
%65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %58)
|
||||
%.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %65, 0
|
||||
%.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %65, 1
|
||||
%66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %59)
|
||||
%66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %59)
|
||||
%.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %66, 0
|
||||
%.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %66, 1
|
||||
%67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %60)
|
||||
%67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %60)
|
||||
%.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %67, 0
|
||||
%.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %67, 1
|
||||
%68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %61)
|
||||
%68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %61)
|
||||
%.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 0
|
||||
%.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 1
|
||||
%69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double>
|
||||
@ -518,38 +518,38 @@ _loop_2_do_: ; preds = %_loop_2_do_.lr.ph,
|
||||
%81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %80, <2 x double> %70, <2 x double> %49)
|
||||
%82 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_ix_dim_0_113, i64 4
|
||||
%83 = bitcast %_elem_type_of_x* %82 to i8*
|
||||
%84 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %83)
|
||||
%84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %83)
|
||||
%85 = getelementptr i8, i8* %a_ix_dim_1_, i64 32
|
||||
%86 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %85)
|
||||
%86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %85)
|
||||
%87 = getelementptr i8, i8* %a_ix_dim_1_29, i64 32
|
||||
%88 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %87)
|
||||
%88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %87)
|
||||
%89 = getelementptr i8, i8* %a_ix_dim_1_45, i64 32
|
||||
%90 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %89)
|
||||
%90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %89)
|
||||
%91 = getelementptr i8, i8* %a_ix_dim_1_61, i64 32
|
||||
%92 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %91)
|
||||
%92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %91)
|
||||
%93 = getelementptr i8, i8* %a_ix_dim_1_77, i64 32
|
||||
%94 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %93)
|
||||
%94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %93)
|
||||
%95 = getelementptr i8, i8* %a_ix_dim_1_93, i64 32
|
||||
%96 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %95)
|
||||
%97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %84)
|
||||
%96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %95)
|
||||
%97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %84)
|
||||
%.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %97, 0
|
||||
%.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %97, 1
|
||||
%98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %86)
|
||||
%98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %86)
|
||||
%.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %98, 0
|
||||
%.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %98, 1
|
||||
%99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %88)
|
||||
%99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %88)
|
||||
%.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %99, 0
|
||||
%.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %99, 1
|
||||
%100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %90)
|
||||
%100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %90)
|
||||
%.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %100, 0
|
||||
%.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %100, 1
|
||||
%101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %92)
|
||||
%101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %92)
|
||||
%.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %101, 0
|
||||
%.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %101, 1
|
||||
%102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %94)
|
||||
%102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %94)
|
||||
%.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %102, 0
|
||||
%.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %102, 1
|
||||
%103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %96)
|
||||
%103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %96)
|
||||
%.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %103, 0
|
||||
%.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %103, 1
|
||||
%104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double>
|
||||
@ -631,7 +631,7 @@ _return_bb: ; preds = %_loop_1_do_.lr.ph,
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
|
||||
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
|
@ -1,59 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
|
||||
; RUN: < %s | FileCheck %s
|
||||
|
||||
; This test is to check that the paired vector intrinsics are available even
|
||||
; when MMA is disabled.
|
||||
|
||||
define <16 x i8> @test1(<256 x i1>* %ptr) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv v3, 0(r3)
|
||||
; CHECK-NEXT: lxv v2, 16(r3)
|
||||
; CHECK-NEXT: vaddubm v2, v3, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = load <256 x i1>, <256 x i1>* %ptr, align 32
|
||||
%1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
|
||||
%2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
|
||||
%3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
|
||||
%add = add <16 x i8> %2, %3
|
||||
ret <16 x i8> %add
|
||||
}
|
||||
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
|
||||
|
||||
define void @test2(<16 x i8> %v1, <16 x i8> %v2, <256 x i1>* %ptr) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v4, v3
|
||||
; CHECK-NEXT: vmr v5, v2
|
||||
; CHECK-NEXT: stxv v4, 16(r7)
|
||||
; CHECK-NEXT: stxv v5, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %v2, <16 x i8> %v1)
|
||||
store <256 x i1> %0, <256 x i1>* %ptr, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
|
||||
define void @test3(<256 x i1>* %ptr) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-NEXT: stxvp vsp0, 32(r3)
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %ptr to i8*
|
||||
%1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
|
||||
%add.ptr1 = getelementptr inbounds <256 x i1>, <256 x i1>* %ptr, i64 1
|
||||
%2 = bitcast <256 x i1>* %add.ptr1 to i8*
|
||||
tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
|
||||
declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
|
357
test/CodeGen/PowerPC/paired-vector-intrinsics.ll
Normal file
357
test/CodeGen/PowerPC/paired-vector-intrinsics.ll
Normal file
@ -0,0 +1,357 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
||||
; RUN: < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
|
||||
; RUN: < %s | FileCheck %s --check-prefix=CHECK-NOMMA
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
||||
; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
|
||||
; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE-NOMMA
|
||||
|
||||
; This test also checks that the paired vector intrinsics are available even
|
||||
; when MMA is disabled.
|
||||
|
||||
; assemble_pair
|
||||
declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: ass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-NEXT: stxv v3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: ass_pair:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: vmr v3, v2
|
||||
; CHECK-NOMMA-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-NOMMA-NEXT: stxv v3, 0(r3)
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: ass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: ass_pair:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NOMMA-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: stxv v2, 0(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
|
||||
store <256 x i1> %0, <256 x i1>* %ptr, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; disassemble_pair
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
|
||||
define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
|
||||
; CHECK-LABEL: disass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 0(r3)
|
||||
; CHECK-NEXT: lxv vs0, 16(r3)
|
||||
; CHECK-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: disass_pair:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: lxv vs1, 0(r3)
|
||||
; CHECK-NOMMA-NEXT: lxv vs0, 16(r3)
|
||||
; CHECK-NOMMA-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-NOMMA-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: disass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: disass_pair:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-BE-NOMMA-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
|
||||
%1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %0)
|
||||
%2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
|
||||
%3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
|
||||
store <16 x i8> %2, <16 x i8>* %ptr2, align 16
|
||||
store <16 x i8> %3, <16 x i8>* %ptr3, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_1:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-NOMMA-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_1:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: lxvp vsp0, 0(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: stxvp vsp0, 0(r4)
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0)
|
||||
%2 = bitcast <256 x i1>* %vp2 to i8*
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %1, i8* %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
|
||||
declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
|
||||
|
||||
define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_2:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-NOMMA-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_2:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_2:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r4
|
||||
; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r5, r4
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 %offset
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 %offset
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 18
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_3:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: li r5, 18
|
||||
; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_3:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 18
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_3:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: li r5, 18
|
||||
; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 18
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 18
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 1
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_4:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: li r5, 1
|
||||
; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_4:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_4:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: li r5, 1
|
||||
; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 1
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 1
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 42
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_5:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: li r5, 42
|
||||
; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_5:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 42
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_5:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: li r5, 42
|
||||
; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 42
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 42
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; CHECK-LABEL: test_ldst_6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_6:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-NOMMA-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_6:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-BE-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_6:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: lxvp vsp0, 4096(r3)
|
||||
; CHECK-BE-NOMMA-NEXT: stxvp vsp0, 4096(r4)
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
|
||||
%1 = bitcast <256 x i1>* %0 to i8*
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
|
||||
%4 = bitcast <256 x i1>* %3 to i8*
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2) {
|
||||
; FIXME: A prefixed load (plxvp) is expected here as the offset in this
|
||||
; test case is a constant that fits within 34-bits.
|
||||
; CHECK-LABEL: test_ldst_7:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r5, 0
|
||||
; CHECK-NEXT: ori r5, r5, 32799
|
||||
; CHECK-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-NOMMA-LABEL: test_ldst_7:
|
||||
; CHECK-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-NOMMA-NEXT: li r5, 0
|
||||
; CHECK-NOMMA-NEXT: ori r5, r5, 32799
|
||||
; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-NOMMA-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_ldst_7:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: li r5, 0
|
||||
; CHECK-BE-NEXT: ori r5, r5, 32799
|
||||
; CHECK-BE-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-NOMMA-LABEL: test_ldst_7:
|
||||
; CHECK-BE-NOMMA: # %bb.0: # %entry
|
||||
; CHECK-BE-NOMMA-NEXT: li r5, 0
|
||||
; CHECK-BE-NOMMA-NEXT: ori r5, r5, 32799
|
||||
; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5
|
||||
; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5
|
||||
; CHECK-BE-NOMMA-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <256 x i1>* %vpp to i8*
|
||||
%1 = getelementptr i8, i8* %0, i64 32799
|
||||
%2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
|
||||
%3 = bitcast <256 x i1>* %vp2 to i8*
|
||||
%4 = getelementptr i8, i8* %3, i64 32799
|
||||
tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user