mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics
This commit extends SVEIntrinsicOpts::optimizeConvertFromSVBool to identify and remove longer chains of redundant SVE reintepret intrinsics. For example, the following chain of redundant SVE reinterprets is now recognised as redundant: %a = <vscale x 2 x i1> %1 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 2 x i1> %a) %2 = <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %1) %3 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 4 x i1> %2) %4 = <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %3) %5 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 4 x i1> %4) %6 = <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %5) ret <vscale x 2 x i1> %6 and will be replaced with: ret <vscale x 2 x i1> %a Eliminating these can sometimes mean emitting fewer unnecessary loads/stores when lowering to assembly. Differential Revision: https://reviews.llvm.org/D94074
This commit is contained in:
parent
1d15e5fce3
commit
7f6a519103
@ -177,22 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
|
||||
if (isa<PHINode>(I->getArgOperand(0)))
|
||||
return processPhiNode(I);
|
||||
|
||||
// If we have a reinterpret intrinsic I of type A which is converting from
|
||||
// another reinterpret Y of type B, and the source type of Y is A, then we can
|
||||
// elide away both reinterprets if there are no other users of Y.
|
||||
auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
|
||||
if (!Y)
|
||||
SmallVector<Instruction *, 32> CandidatesForRemoval;
|
||||
Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
|
||||
|
||||
const auto *IVTy = cast<VectorType>(I->getType());
|
||||
|
||||
// Walk the chain of conversions.
|
||||
while (Cursor) {
|
||||
// If the type of the cursor has fewer lanes than the final result, zeroing
|
||||
// must take place, which breaks the equivalence chain.
|
||||
const auto *CursorVTy = cast<VectorType>(Cursor->getType());
|
||||
if (CursorVTy->getElementCount().getKnownMinValue() <
|
||||
IVTy->getElementCount().getKnownMinValue())
|
||||
break;
|
||||
|
||||
// If the cursor has the same type as I, it is a viable replacement.
|
||||
if (Cursor->getType() == IVTy)
|
||||
EarliestReplacement = Cursor;
|
||||
|
||||
auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
|
||||
|
||||
// If this is not an SVE conversion intrinsic, this is the end of the chain.
|
||||
if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
|
||||
Intrinsic::aarch64_sve_convert_to_svbool ||
|
||||
IntrinsicCursor->getIntrinsicID() ==
|
||||
Intrinsic::aarch64_sve_convert_from_svbool))
|
||||
break;
|
||||
|
||||
CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
|
||||
Cursor = IntrinsicCursor->getOperand(0);
|
||||
}
|
||||
|
||||
// If no viable replacement in the conversion chain was found, there is
|
||||
// nothing to do.
|
||||
if (!EarliestReplacement)
|
||||
return false;
|
||||
|
||||
Value *SourceVal = Y->getArgOperand(0);
|
||||
if (I->getType() != SourceVal->getType())
|
||||
return false;
|
||||
|
||||
I->replaceAllUsesWith(SourceVal);
|
||||
I->replaceAllUsesWith(EarliestReplacement);
|
||||
I->eraseFromParent();
|
||||
if (Y->use_empty())
|
||||
Y->eraseFromParent();
|
||||
|
||||
while (!CandidatesForRemoval.empty()) {
|
||||
Instruction *Candidate = CandidatesForRemoval.pop_back_val();
|
||||
if (Candidate->use_empty())
|
||||
Candidate->eraseFromParent();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -67,6 +67,62 @@ define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
|
||||
ret <vscale x 16 x i1> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
|
||||
; OPT-LABEL: @reinterpret_test_full_chain(
|
||||
; OPT: ret <vscale x 2 x i1> %a
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
|
||||
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
|
||||
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
|
||||
%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
|
||||
%6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
|
||||
ret <vscale x 2 x i1> %6
|
||||
}
|
||||
|
||||
; The last two reinterprets are not necessary, since they are doing the same
|
||||
; work as the first two.
|
||||
define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
|
||||
; OPT-LABEL: @reinterpret_test_partial_chain(
|
||||
; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
|
||||
; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
; OPT-NEXT: ret <vscale x 4 x i1> %2
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
|
||||
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
|
||||
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
|
||||
ret <vscale x 4 x i1> %4
|
||||
}
|
||||
|
||||
; The chain cannot be reduced because of the second reinterpret, which causes
|
||||
; zeroing.
|
||||
define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
|
||||
; OPT-LABEL: @reinterpret_test_irreducible_chain(
|
||||
; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
|
||||
; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
; OPT-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
|
||||
; OPT-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
|
||||
; OPT-NEXT: ret <vscale x 8 x i1> %4
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
|
||||
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
|
||||
%4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
|
||||
ret <vscale x 8 x i1> %4
|
||||
}
|
||||
|
||||
; Here, the candidate list is larger than the number of instructions that we
|
||||
; end up removing.
|
||||
define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
|
||||
; OPT-LABEL: @reinterpret_test_keep_some_candidates(
|
||||
; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
|
||||
; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
; OPT-NEXT: ret <vscale x 4 x i1> %2
|
||||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
|
||||
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
|
||||
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
|
||||
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
|
||||
ret <vscale x 4 x i1> %4
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
|
||||
; OPT-LABEL: reinterpret_reductions
|
||||
; OPT-NOT: convert
|
||||
|
Loading…
x
Reference in New Issue
Block a user