mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[AArch64][SVE] Move instcombine like transforms out of SVEIntrinsicOpts
Instead move them to the instcombine that happens in AArch64TargetTransformInfo. Differential Revision: https://reviews.llvm.org/D106144
This commit is contained in:
parent
9c42154b7e
commit
b354b5b95c
@ -686,6 +686,115 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
|
||||
: None;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
|
||||
IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
|
||||
|
||||
if (Op1 && Op2 &&
|
||||
Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
|
||||
Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
|
||||
Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
|
||||
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
||||
Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
|
||||
Type *Tys[] = {Op1->getArgOperand(0)->getType()};
|
||||
|
||||
auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
|
||||
|
||||
PTest->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, PTest);
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
auto *OpPredicate = II.getOperand(0);
|
||||
auto *OpMultiplicand = II.getOperand(1);
|
||||
auto *OpMultiplier = II.getOperand(2);
|
||||
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
||||
// Return true if a given instruction is an aarch64_sve_dup_x intrinsic call
|
||||
// with a unit splat value, false otherwise.
|
||||
auto IsUnitDupX = [](auto *I) {
|
||||
auto *IntrI = dyn_cast<IntrinsicInst>(I);
|
||||
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
|
||||
return false;
|
||||
|
||||
auto *SplatValue = IntrI->getOperand(0);
|
||||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
|
||||
};
|
||||
|
||||
// Return true if a given instruction is an aarch64_sve_dup intrinsic call
|
||||
// with a unit splat value, false otherwise.
|
||||
auto IsUnitDup = [](auto *I) {
|
||||
auto *IntrI = dyn_cast<IntrinsicInst>(I);
|
||||
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
|
||||
return false;
|
||||
|
||||
auto *SplatValue = IntrI->getOperand(2);
|
||||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
|
||||
};
|
||||
|
||||
// The OpMultiplier variable should always point to the dup (if any), so
|
||||
// swap if necessary.
|
||||
if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
|
||||
std::swap(OpMultiplier, OpMultiplicand);
|
||||
|
||||
if (IsUnitDupX(OpMultiplier)) {
|
||||
// [f]mul pg (dupx 1) %n => %n
|
||||
OpMultiplicand->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, OpMultiplicand);
|
||||
} else if (IsUnitDup(OpMultiplier)) {
|
||||
// [f]mul pg (dup pg 1) %n => %n
|
||||
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
|
||||
auto *DupPg = DupInst->getOperand(1);
|
||||
// TODO: this is naive. The optimization is still valid if DupPg
|
||||
// 'encompasses' OpPredicate, not only if they're the same predicate.
|
||||
if (OpPredicate == DupPg) {
|
||||
OpMultiplicand->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, OpMultiplicand);
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
auto *OpVal = II.getOperand(0);
|
||||
auto *OpIndices = II.getOperand(1);
|
||||
VectorType *VTy = cast<VectorType>(II.getType());
|
||||
|
||||
// Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
|
||||
// constant splat value < minimal element count of result.
|
||||
auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
|
||||
if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
|
||||
return None;
|
||||
|
||||
auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
|
||||
if (!SplatValue ||
|
||||
SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
|
||||
return None;
|
||||
|
||||
// Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
|
||||
// splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
auto *Extract = Builder.CreateExtractElement(OpVal, SplatValue);
|
||||
auto *VectorSplat =
|
||||
Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
|
||||
|
||||
VectorSplat->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, VectorSplat);
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const {
|
||||
@ -713,6 +822,15 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||
return instCombineSVECntElts(IC, II, 8);
|
||||
case Intrinsic::aarch64_sve_cntb:
|
||||
return instCombineSVECntElts(IC, II, 16);
|
||||
case Intrinsic::aarch64_sve_ptest_any:
|
||||
case Intrinsic::aarch64_sve_ptest_first:
|
||||
case Intrinsic::aarch64_sve_ptest_last:
|
||||
return instCombineSVEPTest(IC, II);
|
||||
case Intrinsic::aarch64_sve_mul:
|
||||
case Intrinsic::aarch64_sve_fmul:
|
||||
return instCombineSVEVectorMul(IC, II);
|
||||
case Intrinsic::aarch64_sve_tbl:
|
||||
return instCombineSVETBL(IC, II);
|
||||
}
|
||||
|
||||
return None;
|
||||
|
@ -60,18 +60,9 @@ private:
|
||||
SmallSetVector<IntrinsicInst *, 4> &PTrues);
|
||||
bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
|
||||
|
||||
/// Operates at the instruction-scope. I.e., optimizations are applied local
|
||||
/// to individual instructions.
|
||||
static bool optimizeIntrinsic(Instruction *I);
|
||||
bool optimizeIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
|
||||
|
||||
/// Operates at the function-scope. I.e., optimizations are applied local to
|
||||
/// the functions themselves.
|
||||
bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
|
||||
|
||||
static bool optimizePTest(IntrinsicInst *I);
|
||||
static bool optimizeVectorMul(IntrinsicInst *I);
|
||||
static bool optimizeTBL(IntrinsicInst *I);
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -285,185 +276,11 @@ bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
|
||||
IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
|
||||
IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));
|
||||
|
||||
if (Op1 && Op2 &&
|
||||
Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
|
||||
Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
|
||||
Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
|
||||
|
||||
Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
|
||||
Type *Tys[] = {Op1->getArgOperand(0)->getType()};
|
||||
Module *M = I->getParent()->getParent()->getParent();
|
||||
|
||||
auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
|
||||
auto CI = CallInst::Create(Fn, Ops, I->getName(), I);
|
||||
|
||||
I->replaceAllUsesWith(CI);
|
||||
I->eraseFromParent();
|
||||
if (Op1->use_empty())
|
||||
Op1->eraseFromParent();
|
||||
if (Op1 != Op2 && Op2->use_empty())
|
||||
Op2->eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizeVectorMul(IntrinsicInst *I) {
|
||||
assert((I->getIntrinsicID() == Intrinsic::aarch64_sve_mul ||
|
||||
I->getIntrinsicID() == Intrinsic::aarch64_sve_fmul) &&
|
||||
"Unexpected opcode");
|
||||
|
||||
auto *OpPredicate = I->getOperand(0);
|
||||
auto *OpMultiplicand = I->getOperand(1);
|
||||
auto *OpMultiplier = I->getOperand(2);
|
||||
|
||||
// Return true if a given instruction is an aarch64_sve_dup_x intrinsic call
|
||||
// with a unit splat value, false otherwise.
|
||||
auto IsUnitDupX = [](auto *I) {
|
||||
auto *IntrI = dyn_cast<IntrinsicInst>(I);
|
||||
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
|
||||
return false;
|
||||
|
||||
auto *SplatValue = IntrI->getOperand(0);
|
||||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
|
||||
};
|
||||
|
||||
// Return true if a given instruction is an aarch64_sve_dup intrinsic call
|
||||
// with a unit splat value, false otherwise.
|
||||
auto IsUnitDup = [](auto *I) {
|
||||
auto *IntrI = dyn_cast<IntrinsicInst>(I);
|
||||
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
|
||||
return false;
|
||||
|
||||
auto *SplatValue = IntrI->getOperand(2);
|
||||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
|
||||
};
|
||||
|
||||
bool Changed = true;
|
||||
|
||||
// The OpMultiplier variable should always point to the dup (if any), so
|
||||
// swap if necessary.
|
||||
if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
|
||||
std::swap(OpMultiplier, OpMultiplicand);
|
||||
|
||||
if (IsUnitDupX(OpMultiplier)) {
|
||||
// [f]mul pg (dupx 1) %n => %n
|
||||
I->replaceAllUsesWith(OpMultiplicand);
|
||||
I->eraseFromParent();
|
||||
Changed = true;
|
||||
} else if (IsUnitDup(OpMultiplier)) {
|
||||
// [f]mul pg (dup pg 1) %n => %n
|
||||
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
|
||||
auto *DupPg = DupInst->getOperand(1);
|
||||
// TODO: this is naive. The optimization is still valid if DupPg
|
||||
// 'encompasses' OpPredicate, not only if they're the same predicate.
|
||||
if (OpPredicate == DupPg) {
|
||||
I->replaceAllUsesWith(OpMultiplicand);
|
||||
I->eraseFromParent();
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If an instruction was optimized out then it is possible that some dangling
|
||||
// instructions are left.
|
||||
if (Changed) {
|
||||
auto *OpPredicateInst = dyn_cast<Instruction>(OpPredicate);
|
||||
auto *OpMultiplierInst = dyn_cast<Instruction>(OpMultiplier);
|
||||
if (OpMultiplierInst && OpMultiplierInst->use_empty())
|
||||
OpMultiplierInst->eraseFromParent();
|
||||
if (OpPredicateInst && OpPredicateInst->use_empty())
|
||||
OpPredicateInst->eraseFromParent();
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizeTBL(IntrinsicInst *I) {
|
||||
assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_tbl &&
|
||||
"Unexpected opcode");
|
||||
|
||||
auto *OpVal = I->getOperand(0);
|
||||
auto *OpIndices = I->getOperand(1);
|
||||
VectorType *VTy = cast<VectorType>(I->getType());
|
||||
|
||||
// Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
|
||||
// constant splat value < minimal element count of result.
|
||||
auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
|
||||
if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
|
||||
return false;
|
||||
|
||||
auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
|
||||
if (!SplatValue ||
|
||||
SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
|
||||
return false;
|
||||
|
||||
// Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
|
||||
// splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
|
||||
LLVMContext &Ctx = I->getContext();
|
||||
IRBuilder<> Builder(Ctx);
|
||||
Builder.SetInsertPoint(I);
|
||||
auto *Extract = Builder.CreateExtractElement(OpVal, SplatValue);
|
||||
auto *VectorSplat =
|
||||
Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
|
||||
|
||||
I->replaceAllUsesWith(VectorSplat);
|
||||
I->eraseFromParent();
|
||||
if (DupXIntrI->use_empty())
|
||||
DupXIntrI->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
|
||||
IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
|
||||
if (!IntrI)
|
||||
return false;
|
||||
|
||||
switch (IntrI->getIntrinsicID()) {
|
||||
case Intrinsic::aarch64_sve_fmul:
|
||||
case Intrinsic::aarch64_sve_mul:
|
||||
return optimizeVectorMul(IntrI);
|
||||
case Intrinsic::aarch64_sve_ptest_any:
|
||||
case Intrinsic::aarch64_sve_ptest_first:
|
||||
case Intrinsic::aarch64_sve_ptest_last:
|
||||
return optimizePTest(IntrI);
|
||||
case Intrinsic::aarch64_sve_tbl:
|
||||
return optimizeTBL(IntrI);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizeIntrinsicCalls(
|
||||
SmallSetVector<Function *, 4> &Functions) {
|
||||
bool Changed = false;
|
||||
for (auto *F : Functions) {
|
||||
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
|
||||
|
||||
// Traverse the DT with an rpo walk so we see defs before uses, allowing
|
||||
// simplification to be done incrementally.
|
||||
BasicBlock *Root = DT->getRoot();
|
||||
ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
|
||||
for (auto *BB : RPOT)
|
||||
for (Instruction &I : make_early_inc_range(*BB))
|
||||
Changed |= optimizeIntrinsic(&I);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SVEIntrinsicOpts::optimizeFunctions(
|
||||
SmallSetVector<Function *, 4> &Functions) {
|
||||
bool Changed = false;
|
||||
|
||||
Changed |= optimizePTrueIntrinsicCalls(Functions);
|
||||
Changed |= optimizeIntrinsicCalls(Functions);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
@ -480,13 +297,7 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
|
||||
continue;
|
||||
|
||||
switch (F.getIntrinsicID()) {
|
||||
case Intrinsic::aarch64_sve_ptest_any:
|
||||
case Intrinsic::aarch64_sve_ptest_first:
|
||||
case Intrinsic::aarch64_sve_ptest_last:
|
||||
case Intrinsic::aarch64_sve_ptrue:
|
||||
case Intrinsic::aarch64_sve_mul:
|
||||
case Intrinsic::aarch64_sve_fmul:
|
||||
case Intrinsic::aarch64_sve_tbl:
|
||||
for (User *U : F.users())
|
||||
Functions.insert(cast<Instruction>(U)->getFunction());
|
||||
break;
|
||||
|
@ -1,8 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Idempotent fmuls -- should compile to just a ret.
|
||||
define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_f16(
|
||||
; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
|
||||
;
|
||||
@ -11,7 +12,7 @@ define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x
|
||||
ret <vscale x 8 x half> %2
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_f32(
|
||||
; CHECK-NEXT: ret <vscale x 4 x float> [[A:%.*]]
|
||||
;
|
||||
@ -20,7 +21,7 @@ define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale
|
||||
ret <vscale x 4 x float> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_f64(
|
||||
; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
|
||||
;
|
||||
@ -29,7 +30,7 @@ define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale
|
||||
ret <vscale x 2 x double> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_different_argument_order(
|
||||
; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
|
||||
;
|
||||
@ -39,7 +40,7 @@ define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x
|
||||
ret <vscale x 2 x double> %2
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_with_predicated_dup(
|
||||
; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
|
||||
;
|
||||
@ -48,7 +49,7 @@ define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1
|
||||
ret <vscale x 8 x half> %2
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
|
||||
; Edge case -- make sure that the case where we're fmultiplying two dups
|
||||
; together is sane.
|
||||
; CHECK-LABEL: @idempotent_fmul_two_dups(
|
||||
@ -62,7 +63,7 @@ define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vsc
|
||||
}
|
||||
|
||||
; Non-idempotent fmuls -- we don't expect these to be optimised out.
|
||||
define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_fmul_f16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH4000)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[TMP1]])
|
||||
@ -73,7 +74,7 @@ define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vsca
|
||||
ret <vscale x 8 x half> %2
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_fmul_f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.000000e+00)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[TMP1]])
|
||||
@ -84,7 +85,7 @@ define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vsc
|
||||
ret <vscale x 4 x float> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_fmul_f64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.000000e+00)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
|
||||
@ -95,7 +96,7 @@ define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vs
|
||||
ret <vscale x 2 x double> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @non_idempotent_fmul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x double> %a) {
|
||||
define <vscale x 2 x double> @non_idempotent_fmul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x double> %a) #0 {
|
||||
; Different predicates
|
||||
; CHECK-LABEL: @non_idempotent_fmul_with_predicated_dup(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG1:%.*]], double 1.000000e+00)
|
||||
@ -117,3 +118,5 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -1,8 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Idempotent muls -- should compile to just a ret.
|
||||
define <vscale x 8 x i16> @idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
define <vscale x 8 x i16> @idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_i16(
|
||||
; CHECK-NEXT: ret <vscale x 8 x i16> [[A:%.*]]
|
||||
;
|
||||
@ -11,7 +12,7 @@ define <vscale x 8 x i16> @idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8
|
||||
ret <vscale x 8 x i16> %2
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x i32> @idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_i32(
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[A:%.*]]
|
||||
;
|
||||
@ -20,7 +21,7 @@ define <vscale x 4 x i32> @idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4
|
||||
ret <vscale x 4 x i32> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_i64(
|
||||
; CHECK-NEXT: ret <vscale x 2 x i64> [[A:%.*]]
|
||||
;
|
||||
@ -29,7 +30,7 @@ define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2
|
||||
ret <vscale x 2 x i64> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_different_argument_order(
|
||||
; CHECK-NEXT: ret <vscale x 2 x i64> [[A:%.*]]
|
||||
;
|
||||
@ -39,7 +40,7 @@ define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x
|
||||
ret <vscale x 2 x i64> %2
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @idempotent_mul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
define <vscale x 8 x i16> @idempotent_mul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_with_predicated_dup(
|
||||
; CHECK-NEXT: ret <vscale x 8 x i16> [[A:%.*]]
|
||||
;
|
||||
@ -48,7 +49,7 @@ define <vscale x 8 x i16> @idempotent_mul_with_predicated_dup(<vscale x 8 x i1>
|
||||
ret <vscale x 8 x i16> %2
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
|
||||
; Edge case -- make sure that the case where we're multiplying two dups
|
||||
; together is sane.
|
||||
; CHECK-LABEL: @idempotent_mul_two_dups(
|
||||
@ -62,7 +63,7 @@ define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscal
|
||||
}
|
||||
|
||||
; Non-idempotent muls -- we don't expect these to be optimised out.
|
||||
define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_mul_i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[TMP1]])
|
||||
@ -73,7 +74,7 @@ define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale
|
||||
ret <vscale x 8 x i16> %2
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_mul_i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 2)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[TMP1]])
|
||||
@ -84,7 +85,7 @@ define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale
|
||||
ret <vscale x 4 x i32> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @non_idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
define <vscale x 2 x i64> @non_idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: @non_idempotent_mul_i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 2)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[TMP1]])
|
||||
@ -95,7 +96,7 @@ define <vscale x 2 x i64> @non_idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale
|
||||
ret <vscale x 2 x i64> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @non_idempotent_mul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x i64> %a) {
|
||||
define <vscale x 2 x i64> @non_idempotent_mul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x i64> %a) #0 {
|
||||
; Different predicates
|
||||
; CHECK-LABEL: @non_idempotent_mul_with_predicated_dup(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG1:%.*]], i64 1)
|
||||
@ -117,3 +118,5 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vs
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -1,9 +1,10 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; op2 = tbl(op1 dup_x(idx)) -> op2 = vector_splat(extractelement(op1, idx))
|
||||
|
||||
define <vscale x 16 x i8> @dup_ext_i8(<vscale x 16 x i8> %data) {
|
||||
define <vscale x 16 x i8> @dup_ext_i8(<vscale x 16 x i8> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 16 x i8> [[DATA:%.*]], i8 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP1]], i32 0
|
||||
@ -15,7 +16,7 @@ define <vscale x 16 x i8> @dup_ext_i8(<vscale x 16 x i8> %data) {
|
||||
ret <vscale x 16 x i8> %out
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @dup_ext_i16(<vscale x 8 x i16> %data) {
|
||||
define <vscale x 8 x i16> @dup_ext_i16(<vscale x 8 x i16> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 8 x i16> [[DATA:%.*]], i16 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP1]], i32 0
|
||||
@ -27,7 +28,7 @@ define <vscale x 8 x i16> @dup_ext_i16(<vscale x 8 x i16> %data) {
|
||||
ret <vscale x 8 x i16> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @dup_ext_i32(<vscale x 4 x i32> %data) {
|
||||
define <vscale x 4 x i32> @dup_ext_i32(<vscale x 4 x i32> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 4 x i32> [[DATA:%.*]], i32 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
|
||||
@ -39,7 +40,7 @@ define <vscale x 4 x i32> @dup_ext_i32(<vscale x 4 x i32> %data) {
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @dup_ext_i64(<vscale x 2 x i64> %data) {
|
||||
define <vscale x 2 x i64> @dup_ext_i64(<vscale x 2 x i64> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 2 x i64> [[DATA:%.*]], i64 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP1]], i32 0
|
||||
@ -51,7 +52,7 @@ define <vscale x 2 x i64> @dup_ext_i64(<vscale x 2 x i64> %data) {
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @dup_ext_f16(<vscale x 8 x half> %data) {
|
||||
define <vscale x 8 x half> @dup_ext_f16(<vscale x 8 x half> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_f16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 8 x half> [[DATA:%.*]], i16 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[TMP1]], i32 0
|
||||
@ -63,7 +64,7 @@ define <vscale x 8 x half> @dup_ext_f16(<vscale x 8 x half> %data) {
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @dup_ext_f32(<vscale x 4 x float> %data) {
|
||||
define <vscale x 4 x float> @dup_ext_f32(<vscale x 4 x float> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 4 x float> [[DATA:%.*]], i32 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[TMP1]], i32 0
|
||||
@ -75,7 +76,7 @@ define <vscale x 4 x float> @dup_ext_f32(<vscale x 4 x float> %data) {
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @dup_ext_f64(<vscale x 2 x double> %data) {
|
||||
define <vscale x 2 x double> @dup_ext_f64(<vscale x 2 x double> %data) #0 {
|
||||
; CHECK-LABEL: @dup_ext_f64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <vscale x 2 x double> [[DATA:%.*]], i64 1
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[TMP1]], i32 0
|
||||
@ -98,3 +99,5 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64( <vscale x 2 x i64>, <v
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16( <vscale x 8 x half>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32( <vscale x 4 x float>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64( <vscale x 2 x double>, <vscale x 2 x i64>)
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -1,11 +1,13 @@
|
||||
; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
define i1 @ptest_any1(<vscale x 2 x i1> %a) {
|
||||
; OPT-LABEL: ptest_any1
|
||||
; OPT: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
|
||||
; OPT-NOT: convert
|
||||
; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %a)
|
||||
; OPT-NEXT: ret i1 %[[OUT]]
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define i1 @ptest_any1(<vscale x 2 x i1> %a) #0 {
|
||||
; CHECK-LABEL: ptest_any1
|
||||
; CHECK: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
|
||||
; CHECK-NOT: convert
|
||||
; CHECK-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %a)
|
||||
; CHECK-NEXT: ret i1 %[[OUT]]
|
||||
%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
|
||||
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
|
||||
@ -14,12 +16,12 @@ define i1 @ptest_any1(<vscale x 2 x i1> %a) {
|
||||
}
|
||||
|
||||
; No transform because the ptest is using differently sized operands.
|
||||
define i1 @ptest_any2(<vscale x 4 x i1> %a) {
|
||||
; OPT-LABEL: ptest_any2
|
||||
; OPT: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
; OPT-NEXT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
|
||||
; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
|
||||
; OPT-NEXT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
|
||||
define i1 @ptest_any2(<vscale x 4 x i1> %a) #0 {
|
||||
; CHECK-LABEL: ptest_any2
|
||||
; CHECK: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
; CHECK-NEXT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
|
||||
; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
|
||||
; CHECK-NEXT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
|
||||
%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
|
||||
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
|
||||
@ -27,12 +29,12 @@ define i1 @ptest_any2(<vscale x 4 x i1> %a) {
|
||||
ret i1 %out
|
||||
}
|
||||
|
||||
define i1 @ptest_first(<vscale x 4 x i1> %a) {
|
||||
; OPT-LABEL: ptest_first
|
||||
; OPT: %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
|
||||
; OPT-NOT: convert
|
||||
; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %mask, <vscale x 4 x i1> %a)
|
||||
; OPT-NEXT: ret i1 %[[OUT]]
|
||||
define i1 @ptest_first(<vscale x 4 x i1> %a) #0 {
|
||||
; CHECK-LABEL: ptest_first
|
||||
; CHECK: %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
|
||||
; CHECK-NOT: convert
|
||||
; CHECK-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %mask, <vscale x 4 x i1> %a)
|
||||
; CHECK-NEXT: ret i1 %[[OUT]]
|
||||
%mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %mask)
|
||||
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
|
||||
@ -40,22 +42,22 @@ define i1 @ptest_first(<vscale x 4 x i1> %a) {
|
||||
ret i1 %out
|
||||
}
|
||||
|
||||
define i1 @ptest_first_same_ops(<vscale x 2 x i1> %a) {
|
||||
; OPT-LABEL: ptest_first_same_ops
|
||||
; OPT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %a)
|
||||
; OPT-NOT: convert
|
||||
; OPT-NEXT: ret i1 %[[OUT]]
|
||||
define i1 @ptest_first_same_ops(<vscale x 2 x i1> %a) #0 {
|
||||
; CHECK-LABEL: ptest_first_same_ops
|
||||
; CHECK: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %a)
|
||||
; CHECK-NOT: convert
|
||||
; CHECK-NEXT: ret i1 %[[OUT]]
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
|
||||
%2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
|
||||
ret i1 %2
|
||||
}
|
||||
|
||||
define i1 @ptest_last(<vscale x 8 x i1> %a) {
|
||||
; OPT-LABEL: ptest_last
|
||||
; OPT: %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 0)
|
||||
; OPT-NOT: convert
|
||||
; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %a)
|
||||
; OPT-NEXT: ret i1 %[[OUT]]
|
||||
define i1 @ptest_last(<vscale x 8 x i1> %a) #0 {
|
||||
; CHECK-LABEL: ptest_last
|
||||
; CHECK: %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 0)
|
||||
; CHECK-NOT: convert
|
||||
; CHECK-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %a)
|
||||
; CHECK-NEXT: ret i1 %[[OUT]]
|
||||
%mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 0)
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %mask)
|
||||
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
|
||||
@ -75,3 +77,5 @@ declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue
Block a user