mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[Attributor] Use abstract call sites to determine associated arguments
This is the second step after D67871 to make use of abstract call sites. In this patch the argument we associate with a abstract call site argument can be the one in the callback callee instead of the one in the callback broker. Caveat: We cannot allow no-alias arguments for problematic callbacks: As described in [1], adding no-alias (or restrict) to arguments could break synchronization as the synchronization effect, e.g., a barrier, does not "alias" with the pointer anymore. This disables no-alias annotation for potentially problematic arguments until we implement the fix described in [1]. Reviewed By: uenoku Differential Revision: https://reviews.llvm.org/D68008 [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel, International Workshop on OpenMP 2018, http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
This commit is contained in:
parent
6fdd67f760
commit
4ce6535212
@ -693,6 +693,18 @@ private:
|
||||
User::op_iterator getCallee() const;
|
||||
};
|
||||
|
||||
/// Establish a view to a call site for examination.
|
||||
class ImmutableCallSite : public CallSiteBase<> {
|
||||
public:
|
||||
ImmutableCallSite() = default;
|
||||
ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
|
||||
ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
|
||||
ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
|
||||
explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
|
||||
explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
|
||||
ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
|
||||
};
|
||||
|
||||
/// AbstractCallSite
|
||||
///
|
||||
/// An abstract call site is a wrapper that allows to treat direct,
|
||||
@ -765,6 +777,13 @@ public:
|
||||
/// as well as the callee of the abstract call site.
|
||||
AbstractCallSite(const Use *U);
|
||||
|
||||
/// Add operand uses of \p ICS that represent callback uses into \p CBUses.
|
||||
///
|
||||
/// All uses added to \p CBUses can be used to create abstract call sites for
|
||||
/// which AbstractCallSite::isCallbackCall() will return true.
|
||||
static void getCallbackUses(ImmutableCallSite ICS,
|
||||
SmallVectorImpl<const Use *> &CBUses);
|
||||
|
||||
/// Conversion operator to conveniently check for a valid/initialized ACS.
|
||||
explicit operator bool() const { return (bool)CS; }
|
||||
|
||||
@ -902,18 +921,6 @@ template <> struct DenseMapInfo<CallSite> {
|
||||
}
|
||||
};
|
||||
|
||||
/// Establish a view to a call site for examination.
|
||||
class ImmutableCallSite : public CallSiteBase<> {
|
||||
public:
|
||||
ImmutableCallSite() = default;
|
||||
ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
|
||||
ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
|
||||
ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
|
||||
explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
|
||||
explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
|
||||
ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_IR_CALLSITE_H
|
||||
|
@ -280,17 +280,7 @@ struct IRPosition {
|
||||
}
|
||||
|
||||
/// Return the associated argument, if any.
|
||||
Argument *getAssociatedArgument() const {
|
||||
if (auto *Arg = dyn_cast<Argument>(&getAnchorValue()))
|
||||
return Arg;
|
||||
int ArgNo = getArgNo();
|
||||
if (ArgNo < 0)
|
||||
return nullptr;
|
||||
Function *AssociatedFn = getAssociatedFunction();
|
||||
if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo))
|
||||
return nullptr;
|
||||
return AssociatedFn->arg_begin() + ArgNo;
|
||||
}
|
||||
Argument *getAssociatedArgument() const;
|
||||
|
||||
/// Return true if the position refers to a function interface, that is the
|
||||
/// function scope, the function return, or an argumnt.
|
||||
|
@ -33,6 +33,25 @@ STATISTIC(NumInvalidAbstractCallSitesUnknownCallee,
|
||||
STATISTIC(NumInvalidAbstractCallSitesNoCallback,
|
||||
"Number of invalid abstract call sites created (no callback)");
|
||||
|
||||
void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS,
|
||||
SmallVectorImpl<const Use *> &CBUses) {
|
||||
const Function *Callee = ICS.getCalledFunction();
|
||||
if (!Callee)
|
||||
return;
|
||||
|
||||
MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback);
|
||||
if (!CallbackMD)
|
||||
return;
|
||||
|
||||
for (const MDOperand &Op : CallbackMD->operands()) {
|
||||
MDNode *OpMD = cast<MDNode>(Op.get());
|
||||
auto *CBCalleeIdxAsCM = cast<ConstantAsMetadata>(OpMD->getOperand(0));
|
||||
uint64_t CBCalleeIdx =
|
||||
cast<ConstantInt>(CBCalleeIdxAsCM->getValue())->getZExtValue();
|
||||
CBUses.push_back(ICS.arg_begin() + CBCalleeIdx);
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an abstract call site from a use.
|
||||
AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
|
||||
|
||||
|
@ -183,6 +183,60 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
|
||||
}
|
||||
///}
|
||||
|
||||
Argument *IRPosition::getAssociatedArgument() const {
|
||||
if (getPositionKind() == IRP_ARGUMENT)
|
||||
return cast<Argument>(&getAnchorValue());
|
||||
|
||||
// Not an Argument and no argument number means this is not a call site
|
||||
// argument, thus we cannot find a callback argument to return.
|
||||
int ArgNo = getArgNo();
|
||||
if (ArgNo < 0)
|
||||
return nullptr;
|
||||
|
||||
// Use abstract call sites to make the connection between the call site
|
||||
// values and the ones in callbacks. If a callback was found that makes use
|
||||
// of the underlying call site operand, we want the corresponding callback
|
||||
// callee argument and not the direct callee argument.
|
||||
Optional<Argument *> CBCandidateArg;
|
||||
SmallVector<const Use *, 4> CBUses;
|
||||
ImmutableCallSite ICS(&getAnchorValue());
|
||||
AbstractCallSite::getCallbackUses(ICS, CBUses);
|
||||
for (const Use *U : CBUses) {
|
||||
AbstractCallSite ACS(U);
|
||||
assert(ACS && ACS.isCallbackCall());
|
||||
if (!ACS.getCalledFunction())
|
||||
continue;
|
||||
|
||||
for (unsigned u = 0, e = ACS.getNumArgOperands(); u < e; u++) {
|
||||
|
||||
// Test if the underlying call site operand is argument number u of the
|
||||
// callback callee.
|
||||
if (ACS.getCallArgOperandNo(u) != ArgNo)
|
||||
continue;
|
||||
|
||||
assert(ACS.getCalledFunction()->arg_size() > u &&
|
||||
"ACS mapped into var-args arguments!");
|
||||
if (CBCandidateArg.hasValue()) {
|
||||
CBCandidateArg = nullptr;
|
||||
break;
|
||||
}
|
||||
CBCandidateArg = ACS.getCalledFunction()->getArg(u);
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a unique callback candidate argument, return it.
|
||||
if (CBCandidateArg.hasValue() && CBCandidateArg.getValue())
|
||||
return CBCandidateArg.getValue();
|
||||
|
||||
// If no callbacks were found, or none used the underlying call site operand
|
||||
// exclusively, use the direct callee argument if available.
|
||||
const Function *Callee = ICS.getCalledFunction();
|
||||
if (Callee && Callee->arg_size() > unsigned(ArgNo))
|
||||
return Callee->getArg(ArgNo);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// For calls (and invokes) we will only replace instruction uses to not disturb
|
||||
/// the old style call graph.
|
||||
/// TODO: Remove this once we get rid of the old PM.
|
||||
@ -2339,8 +2393,43 @@ struct AANoAliasFloating final : AANoAliasImpl {
|
||||
/// NoAlias attribute for an argument.
|
||||
struct AANoAliasArgument final
|
||||
: AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
|
||||
AANoAliasArgument(const IRPosition &IRP)
|
||||
: AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>(IRP) {}
|
||||
using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
|
||||
AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {}
|
||||
|
||||
/// See AbstractAttribute::update(...).
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
// We have to make sure no-alias on the argument does not break
|
||||
// synchronization when this is a callback argument, see also [1] below.
|
||||
// If synchronization cannot be affected, we delegate to the base updateImpl
|
||||
// function, otherwise we give up for now.
|
||||
|
||||
// If the function is no-sync, no-alias cannot break synchronization.
|
||||
const auto &NoSyncAA = A.getAAFor<AANoSync>(
|
||||
*this, IRPosition::function_scope(getIRPosition()));
|
||||
if (NoSyncAA.isAssumedNoSync())
|
||||
return Base::updateImpl(A);
|
||||
|
||||
// If the argument is read-only, no-alias cannot break synchronization.
|
||||
const auto &MemBehaviorAA =
|
||||
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
|
||||
if (MemBehaviorAA.isAssumedReadOnly())
|
||||
return Base::updateImpl(A);
|
||||
|
||||
// If the argument is never passed through callbacks, no-alias cannot break
|
||||
// synchronization.
|
||||
if (A.checkForAllCallSites(
|
||||
[](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
|
||||
true))
|
||||
return Base::updateImpl(A);
|
||||
|
||||
// TODO: add no-alias but make sure it doesn't break synchronization by
|
||||
// introducing fake uses. See:
|
||||
// [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel,
|
||||
// International Workshop on OpenMP 2018,
|
||||
// http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
|
||||
|
||||
return indicatePessimisticFixpoint();
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::trackStatistics()
|
||||
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
|
||||
@ -2395,6 +2484,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
|
||||
|
||||
// (iii) Check there is no other pointer argument which could alias with the
|
||||
// value.
|
||||
// TODO: AbstractCallSite
|
||||
ImmutableCallSite ICS(&getAnchorValue());
|
||||
for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) {
|
||||
if (getArgNo() == (int)i)
|
||||
|
@ -14,7 +14,7 @@ define void @fn2(i32* %P) {
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]])
|
||||
; CHECK-NEXT: store i32 [[CALL]], i32* [[P]]
|
||||
; CHECK-NEXT: br label %for.cond1
|
||||
; CHECK-NEXT: br label [[FOR_COND1:%.*]]
|
||||
;
|
||||
entry:
|
||||
br label %if.end
|
||||
@ -55,7 +55,7 @@ define void @fn_no_null_opt(i32* %P) #0 {
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]])
|
||||
; CHECK-NEXT: store i32 [[CALL]], i32* [[P]]
|
||||
; CHECK-NEXT: br label %for.cond1
|
||||
; CHECK-NEXT: br label [[FOR_COND1:%.*]]
|
||||
;
|
||||
entry:
|
||||
br label %if.end
|
||||
|
@ -33,7 +33,7 @@ define dso_local void @foo(i32 %N) {
|
||||
; CHECK-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
|
||||
; CHECK-NEXT: store float 3.000000e+00, float* [[P]], align 4
|
||||
; CHECK-NEXT: store i32 7, i32* [[N_ADDR]], align 4
|
||||
; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @1, i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull align 4 dereferenceable(4) [[N_ADDR]], float* nonnull align 4 dereferenceable(4) [[P]], i64 4617315517961601024)
|
||||
; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @1, i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -33,10 +33,10 @@ define dso_local i32 @main() {
|
||||
; CHECK-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8
|
||||
; CHECK-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8
|
||||
; CHECK-NEXT: [[THREAD:%.*]] = alloca i64, align 8
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @foo, i8* noalias null)
|
||||
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @bar, i8* nonnull align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*))
|
||||
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nonnull align 8 dereferenceable(1) [[ALLOC1]])
|
||||
; CHECK-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @buz, i8* nonnull align 8 dereferenceable(1) [[ALLOC2]])
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @foo, i8* noalias nofree readnone null)
|
||||
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @bar, i8* nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*))
|
||||
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
|
||||
; CHECK-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @buz, i8* nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]])
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
entry:
|
||||
@ -76,7 +76,7 @@ define internal i8* @baz(i8* %arg) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@baz
|
||||
; CHECK-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]])
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i8* [[ARG:%.*]]
|
||||
; CHECK-NEXT: ret i8* [[ARG]]
|
||||
;
|
||||
entry:
|
||||
ret i8* %arg
|
||||
@ -86,7 +86,7 @@ define internal i8* @buz(i8* %arg) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@buz
|
||||
; CHECK-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]])
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i8* [[ARG:%.*]]
|
||||
; CHECK-NEXT: ret i8* [[ARG]]
|
||||
;
|
||||
entry:
|
||||
ret i8* %arg
|
||||
|
@ -39,7 +39,7 @@ entry:
|
||||
define dso_local void @caller() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@caller()
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: call void @broker(i32* nonnull align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nonnull align 4 dereferenceable(4) @gsh)
|
||||
; CHECK-NEXT: call void @broker(i32* nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nofree nonnull readonly align 4 dereferenceable(4) @gsh)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -13,7 +13,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
|
||||
; each other but argument 3-5 of the transitive call site in the caller match
|
||||
; arguments 2-4 of the callback callee. Here we should see information and value
|
||||
; transfer in both directions.
|
||||
; FIXME: The callee -> call site direction is not working yet.
|
||||
; FIXME: %a should be align 256 at the call site
|
||||
|
||||
define void @t0_caller(i32* %a) {
|
||||
; CHECK-LABEL: @t0_caller(
|
||||
@ -24,7 +24,8 @@ define void @t0_caller(i32* %a) {
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
|
||||
; CHECK-NEXT: store i32 42, i32* [[B]], align 32
|
||||
; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64
|
||||
; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
|
||||
; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
|
||||
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
@ -41,7 +42,8 @@ entry:
|
||||
; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
|
||||
; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
|
||||
define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
|
||||
; CHECK-LABEL: @t0_callback_callee(
|
||||
; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee
|
||||
; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user