mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[PowerPC] Add intrinsics for MMA
This patch adds support for MMA intrinsics. Authored by: Baptiste Saleil Reviewed By: #powerpc, bsaleil, amyk Differential Revision: https://reviews.llvm.org/D89345
This commit is contained in:
parent
bac894562a
commit
e248116f2c
@ -141,6 +141,28 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
|
||||
Intrinsic<ret_types, param_types, properties>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC MMA Intrinsic Multi Class Definitions.
|
||||
//
|
||||
|
||||
multiclass PowerPC_MMA_ACC_Intrinsic<list<LLVMType> args> {
|
||||
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
|
||||
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
|
||||
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
|
||||
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Altivec Intrinsic Class Definitions.
|
||||
//
|
||||
@ -1371,7 +1393,6 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
|
||||
// PowerPC set FPSCR Intrinsic Definitions.
|
||||
def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
|
||||
|
||||
}
|
||||
|
||||
let TargetPrefix = "ppc" in {
|
||||
@ -1400,5 +1421,60 @@ let TargetPrefix = "ppc" in {
|
||||
|
||||
def int_ppc_mma_xxsetaccz :
|
||||
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
|
||||
defm int_ppc_mma_xvi4ger8 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvi4ger8 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
defm int_ppc_mma_xvi8ger4 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvi8ger4 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
defm int_ppc_mma_xvi16ger2s :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvi16ger2s :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
defm int_ppc_mma_xvf16ger2 :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvf16ger2 :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
defm int_ppc_mma_xvf32ger :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvf32ger :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty]>;
|
||||
defm int_ppc_mma_xvf64ger :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvf64ger :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty]>;
|
||||
|
||||
// MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions.
|
||||
defm int_ppc_mma_xvbf16ger2 :
|
||||
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvbf16ger2 :
|
||||
PowerPC_MMA_ACC_Intrinsic<
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
// MMA Reduced-Precision: Missing Integer-based Outer Product Operations.
|
||||
defm int_ppc_mma_xvi16ger2 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmxvi16ger2 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
def int_ppc_mma_xvi8ger4spp :
|
||||
Intrinsic<[llvm_v512i1_ty],
|
||||
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_ppc_mma_pmxvi8ger4spp :
|
||||
Intrinsic<[llvm_v512i1_ty],
|
||||
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
@ -1,3 +1,8 @@
|
||||
// Mask immediates for MMA instructions (2, 4 and 8 bits).
|
||||
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
|
||||
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
|
||||
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC ISA 3.1 specific type constraints.
|
||||
//
|
||||
@ -1341,6 +1346,220 @@ defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
|
||||
"xvf64ger", "$AT, $XA, $XB">;
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// MMA Intrinsics
|
||||
let Predicates = [MMA] in {
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
|
||||
(XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
|
||||
(XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
|
||||
(XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
|
||||
(XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
|
||||
(XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
|
||||
(XVF64GER $XA, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
|
||||
(XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
|
||||
(XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
|
||||
(XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
|
||||
(XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
|
||||
(XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
|
||||
(XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
|
||||
(XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
|
||||
}
|
||||
|
||||
// MMA Intrinsics
|
||||
let Predicates = [MMA, PrefixInstrs] in {
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
|
||||
(PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk8Imm:$PMSK)),
|
||||
(PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
|
||||
(PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk4Imm:$PMSK)),
|
||||
(PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
|
||||
(PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)),
|
||||
(PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
|
||||
(PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
|
||||
(PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
|
||||
(PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
|
||||
(PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk2Imm:$YMSK)),
|
||||
(PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
|
||||
(PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk2Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
|
||||
(PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk2Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
|
||||
(PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk2Imm:$YMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
|
||||
(PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk2Imm:$YMSK)>;
|
||||
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
|
||||
(PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
|
||||
(PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
|
||||
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk2Imm:$PMSK)),
|
||||
(PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
|
||||
}
|
||||
|
||||
def Concats {
|
||||
dag VecsToVecPair0 =
|
||||
(v256i1 (INSERT_SUBREG
|
||||
|
@ -6,6 +6,412 @@
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
; Function Attrs: nofree nounwind writeonly
|
||||
define dso_local void @test50(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test50:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvbf16ger2 acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test50:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xvbf16ger2 acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> %vc, <16 x i8> %vc)
|
||||
%1 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %0, <512 x i1>* %1, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind writeonly
|
||||
define dso_local void @test51(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test51:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pmxvbf16ger2 acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test51:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: pmxvbf16ger2 acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%1 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %0, <512 x i1>* %1, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test52(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test52:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvbf16ger2pp acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test52:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvbf16ger2pp acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test53(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test53:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvbf16ger2pn acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test53:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvbf16ger2pn acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test54(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test54:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvbf16ger2np acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test54:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvbf16ger2np acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test55(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test55:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvbf16ger2nn acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test55:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvbf16ger2nn acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test56(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test56:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test56:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test57(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test57:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test57:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test58(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test58:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test58:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test59(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test59:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test59:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind writeonly
|
||||
define dso_local void @test60(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test60:
|
||||
@ -25,6 +431,7 @@ entry:
|
||||
store <16 x i8> %0, <16 x i8>* %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8>)
|
||||
|
||||
|
241
test/CodeGen/PowerPC/mma-integer-based-outer-product.ll
Normal file
241
test/CodeGen/PowerPC/mma-integer-based-outer-product.ll
Normal file
@ -0,0 +1,241 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
; Function Attrs: nofree nounwind writeonly
|
||||
define dso_local void @test1(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvi16ger2 acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xvi16ger2 acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %vc, <16 x i8> %vc)
|
||||
%1 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %0, <512 x i1>* %1, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind writeonly
|
||||
define dso_local void @test2(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pmxvi16ger2 acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test2:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: pmxvi16ger2 acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%1 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %0, <512 x i1>* %1, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test3(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvi8ger4spp acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test3:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvi8ger4spp acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test4(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvi16ger2pp acc0, v2, v2
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test4:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvi16ger2pp acc0, v2, v2
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test5(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvi8ger4spp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test5:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvi8ger4spp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
define dso_local void @test6(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 32(r3)
|
||||
; CHECK-NEXT: lxv vs0, 48(r3)
|
||||
; CHECK-NEXT: lxv vs3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs2, 16(r3)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: pmxvi16ger2pp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r7)
|
||||
; CHECK-NEXT: stxv vs1, 32(r7)
|
||||
; CHECK-NEXT: stxv vs2, 16(r7)
|
||||
; CHECK-NEXT: stxv vs3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test6:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: lxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: pmxvi16ger2pp acc0, v2, v2, 0, 0, 0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r7)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r7)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %vqp to <512 x i1>*
|
||||
%1 = load <512 x i1>, <512 x i1>* %0, align 64
|
||||
%2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
|
||||
%3 = bitcast i8* %resp to <512 x i1>*
|
||||
store <512 x i1> %2, <512 x i1>* %3, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
|
1822
test/CodeGen/PowerPC/mma-outer-product.ll
Normal file
1822
test/CodeGen/PowerPC/mma-outer-product.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user