mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.
As a follow-up to D95982, this patch continues unblocking optimizations that are blocked by pseudu probe instrumention. The optimizations unblocked are: - In-block load propagation. - In-block dead store elimination - Memory copy optimization that turns stores to consecutive memories into a memset. These optimizations are local to a block, so they shouldn't affect the profile quality. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D100075
This commit is contained in:
parent
d0dc7f2344
commit
7416a011e2
@ -532,7 +532,7 @@ Value *llvm::findAvailablePtrLoadStore(
|
||||
// We must ignore debug info directives when counting (otherwise they
|
||||
// would affect codegen).
|
||||
Instruction *Inst = &*--ScanFrom;
|
||||
if (isa<DbgInfoIntrinsic>(Inst))
|
||||
if (Inst->isDebugOrPseudoInst())
|
||||
continue;
|
||||
|
||||
// Restore ScanFrom to expected value in case next test succeeds
|
||||
@ -620,7 +620,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
|
||||
SmallVector<Instruction *> MustNotAliasInsts;
|
||||
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
|
||||
ScanBB->rend())) {
|
||||
if (isa<DbgInfoIntrinsic>(&Inst))
|
||||
if (Inst.isDebugOrPseudoInst())
|
||||
continue;
|
||||
|
||||
if (MaxInstsToScan-- == 0)
|
||||
|
@ -1396,7 +1396,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
|
||||
--BBI;
|
||||
// Don't count debug info directives, lest they affect codegen,
|
||||
// and we skip pointer-to-pointer bitcasts, which are NOPs.
|
||||
if (isa<DbgInfoIntrinsic>(BBI) ||
|
||||
if (BBI->isDebugOrPseudoInst() ||
|
||||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
|
||||
ScanInsts++;
|
||||
continue;
|
||||
|
@ -399,6 +399,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||
}
|
||||
}
|
||||
|
||||
// Calls that only access inaccessible memory do not block merging
|
||||
// accessible stores.
|
||||
if (auto *CB = dyn_cast<CallBase>(BI)) {
|
||||
if (CB->onlyAccessesInaccessibleMemory())
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
|
||||
// If the instruction is readnone, ignore it, otherwise bail out. We
|
||||
// don't even allow readonly here because we don't want something like:
|
||||
|
@ -202,7 +202,7 @@ static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
|
||||
if (!ProcessedBegin)
|
||||
--I;
|
||||
|
||||
if (isa<DbgInfoIntrinsic>(Inst))
|
||||
if (Inst->isDebugOrPseudoInst())
|
||||
continue;
|
||||
|
||||
if (SinkInstruction(Inst, Stores, DT, LI, AA)) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
|
||||
; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s
|
||||
|
||||
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
|
||||
%struct.CompAtomExt = type { i32 }
|
||||
@ -13,11 +13,11 @@
|
||||
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
|
||||
%class.Pairlists = type { i16*, i32, i32 }
|
||||
|
||||
define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
|
||||
;; Check the minPart4 and minPart assignments are merged.
|
||||
; CHECK-LABEL: @merge(
|
||||
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
||||
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
||||
|
||||
define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
|
||||
entry:
|
||||
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
|
||||
%0 = load i32, i32* %savePairlists3, align 8
|
||||
@ -58,7 +58,36 @@ if.else147: ; preds = %if.then138
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
|
||||
define i32 @load(i32* nocapture %a, i32* nocapture %b) {
|
||||
;; Check the last store is deleted.
|
||||
; CHECK-LABEL: @load(
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds i32, i32* %a, i64 1
|
||||
; CHECK-NEXT: %2 = load i32, i32* %1, align 8
|
||||
; CHECK-NEXT: %3 = getelementptr inbounds i32, i32* %b, i64 1
|
||||
; CHECK-NEXT: store i32 %2, i32* %3, align 8
|
||||
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||
; CHECK-NEXT: ret i32 %[[#]]
|
||||
%1 = getelementptr inbounds i32, i32* %a, i32 1
|
||||
%2 = load i32, i32* %1, align 8
|
||||
%3 = getelementptr inbounds i32, i32* %b, i32 1
|
||||
store i32 %2, i32* %3, align 8
|
||||
%4 = getelementptr inbounds i32, i32* %b, i32 1
|
||||
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||
%5 = load i32, i32* %4, align 8
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
define void @dse(i32* %p) {
|
||||
;; Check the first store is deleted.
|
||||
; CHECK-LABEL: @dse(
|
||||
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
store i32 0, i32* %p
|
||||
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||
store i32 0, i32* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: inaccessiblememonly nounwind willreturn
|
||||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
||||
|
25
test/Transforms/SampleProfile/pseudo-probe-memset.ll
Normal file
25
test/Transforms/SampleProfile/pseudo-probe-memset.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||
|
||||
%struct.MV = type { i16, i16 }
|
||||
|
||||
define void @test(i32* nocapture %c) nounwind optsize {
|
||||
; All the stores in this example should be merged into a single memset.
|
||||
; CHECK-NOT: store i32 -1
|
||||
; CHECK: call void @llvm.memset.p0i8.i64
|
||||
store i32 -1, i32* %c, align 4
|
||||
%1 = getelementptr inbounds i32, i32* %c, i32 1
|
||||
store i32 -1, i32* %1, align 4
|
||||
%2 = getelementptr inbounds i32, i32* %c, i32 2
|
||||
store i32 -1, i32* %2, align 4
|
||||
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||
%3 = getelementptr inbounds i32, i32* %c, i32 3
|
||||
store i32 -1, i32* %3, align 4
|
||||
%4 = getelementptr inbounds i32, i32* %c, i32 4
|
||||
store i32 -1, i32* %4, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: inaccessiblememonly nounwind willreturn
|
||||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
||||
|
||||
attributes #0 = { inaccessiblememonly nounwind willreturn }
|
Loading…
Reference in New Issue
Block a user