From c5b0326e2f911e3acf0f0626f28d929393166558 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 1 Mar 2021 19:31:42 -0600 Subject: [PATCH] [Attributor] Make DepClass a required argument We often used a sub-optimal dependence class in the past because we didn't see the argument. Let's make it explicit so we remember to think about it. --- include/llvm/Transforms/IPO/Attributor.h | 18 +- lib/Transforms/IPO/Attributor.cpp | 6 +- lib/Transforms/IPO/AttributorAttributes.cpp | 219 ++++++++++-------- lib/Transforms/IPO/OpenMPOpt.cpp | 15 +- .../IPConstantProp/openmp_parallel_for.ll | 181 ++++++++++----- 5 files changed, 269 insertions(+), 170 deletions(-) diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h index 330a8f4ee50..fb831a6cb92 100644 --- a/include/llvm/Transforms/IPO/Attributor.h +++ b/include/llvm/Transforms/IPO/Attributor.h @@ -1056,8 +1056,7 @@ struct Attributor { /// the `Attributor::recordDependence` method. template const AAType &getAAFor(const AbstractAttribute &QueryingAA, - const IRPosition &IRP, - DepClassTy DepClass = DepClassTy::REQUIRED) { + const IRPosition &IRP, DepClassTy DepClass) { return getOrCreateAAFor(IRP, &QueryingAA, DepClass, /* ForceUpdate */ false); } @@ -1069,8 +1068,7 @@ struct Attributor { /// was assumed dead. template const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA, - const IRPosition &IRP, - DepClassTy DepClass = DepClassTy::REQUIRED) { + const IRPosition &IRP, DepClassTy DepClass) { return getOrCreateAAFor(IRP, &QueryingAA, DepClass, /* ForceUpdate */ true); } @@ -1081,10 +1079,9 @@ struct Attributor { /// function. /// NOTE: ForceUpdate is ignored in any stage other than the update stage. template - const AAType &getOrCreateAAFor(const IRPosition &IRP, - const AbstractAttribute *QueryingAA = nullptr, - DepClassTy DepClass = DepClassTy::REQUIRED, - bool ForceUpdate = false) { + const AAType & + getOrCreateAAFor(const IRPosition &IRP, const AbstractAttribute *QueryingAA, + DepClassTy DepClass, bool ForceUpdate = false) { if (AAType *AAPtr = lookupAAFor(IRP, QueryingAA, DepClass)) { if (ForceUpdate && Phase == AttributorPhase::UPDATE) updateAA(*AAPtr); @@ -1160,6 +1157,11 @@ struct Attributor { DepClass); return AA; } + template + const AAType &getOrCreateAAFor(const IRPosition &IRP) { + return getOrCreateAAFor(IRP, /* QueryingAA */ nullptr, + DepClassTy::NONE); + } /// Return the attribute of \p AAType for \p IRP if existing. This also allows /// non-AA users lookup. diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp index b7485a2238d..d2b4c7bc353 100644 --- a/lib/Transforms/IPO/Attributor.cpp +++ b/lib/Transforms/IPO/Attributor.cpp @@ -864,7 +864,8 @@ bool Attributor::checkForAllReturnedValuesAndReturnInsts( // and liveness information. // TODO: use the function scope once we have call site AAReturnedValues. const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); - const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + const auto &AARetVal = + getAAFor(QueryingAA, QueryIRP, DepClassTy::REQUIRED); if (!AARetVal.getState().isValidState()) return false; @@ -881,7 +882,8 @@ bool Attributor::checkForAllReturnedValues( // TODO: use the function scope once we have call site AAReturnedValues. const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); - const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + const auto &AARetVal = + getAAFor(QueryingAA, QueryIRP, DepClassTy::REQUIRED); if (!AARetVal.getState().isValidState()) return false; diff --git a/lib/Transforms/IPO/AttributorAttributes.cpp b/lib/Transforms/IPO/AttributorAttributes.cpp index 2d3c9d8123f..4157be82ffd 100644 --- a/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/lib/Transforms/IPO/AttributorAttributes.cpp @@ -450,7 +450,8 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, // Callback for each possibly returned value. auto CheckReturnValue = [&](Value &RV) -> bool { const IRPosition &RVPos = IRPosition::value(RV); - const AAType &AA = A.getAAFor(QueryingAA, RVPos); + const AAType &AA = + A.getAAFor(QueryingAA, RVPos, DepClassTy::REQUIRED); LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() << " @ " << RVPos << "\n"); const StateType &AAS = AA.getState(); @@ -512,7 +513,8 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) return false; - const AAType &AA = A.getAAFor(QueryingAA, ACSArgPos); + const AAType &AA = + A.getAAFor(QueryingAA, ACSArgPos, DepClassTy::REQUIRED); LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); const StateType &AAS = AA.getState(); @@ -571,7 +573,7 @@ struct AACallSiteReturnedFromReturned : public BaseType { return S.indicatePessimisticFixpoint(); IRPosition FnPos = IRPosition::returned(*AssociatedFunction); - const AAType &AA = A.getAAFor(*this, FnPos); + const AAType &AA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(S, AA.getState()); } }; @@ -708,8 +710,8 @@ struct AANoUnwindImpl : AANoUnwind { return true; if (const auto *CB = dyn_cast(&I)) { - const auto &NoUnwindAA = - A.getAAFor(*this, IRPosition::callsite_function(*CB)); + const auto &NoUnwindAA = A.getAAFor( + *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED); return NoUnwindAA.isAssumedNoUnwind(); } return false; @@ -751,7 +753,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -1084,7 +1086,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { // TODO: use the function scope once we have call site AAReturnedValues. const auto &RetValAA = A.getAAFor( - *this, IRPosition::function(*CB->getCalledFunction())); + *this, IRPosition::function(*CB->getCalledFunction()), + DepClassTy::REQUIRED); LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " << RetValAA << "\n"); @@ -1344,11 +1347,9 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { if (CB->hasFnAttr(Attribute::NoSync)) return true; - const auto &NoSyncAA = - A.getAAFor(*this, IRPosition::callsite_function(*CB)); - if (NoSyncAA.isAssumedNoSync()) - return true; - return false; + const auto &NoSyncAA = A.getAAFor( + *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED); + return NoSyncAA.isAssumedNoSync(); } if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) @@ -1403,7 +1404,7 @@ struct AANoSyncCallSite final : AANoSyncImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -1423,8 +1424,8 @@ struct AANoFreeImpl : public AANoFree { if (CB.hasFnAttr(Attribute::NoFree)) return true; - const auto &NoFreeAA = - A.getAAFor(*this, IRPosition::callsite_function(CB)); + const auto &NoFreeAA = A.getAAFor( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); return NoFreeAA.isAssumedNoFree(); }; @@ -1468,7 +1469,7 @@ struct AANoFreeCallSite final : AANoFreeImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -1504,7 +1505,8 @@ struct AANoFreeFloating : AANoFreeImpl { unsigned ArgNo = CB->getArgOperandNo(&U); const auto &NoFreeArg = A.getAAFor( - *this, IRPosition::callsite_argument(*CB, ArgNo)); + *this, IRPosition::callsite_argument(*CB, ArgNo), + DepClassTy::REQUIRED); return NoFreeArg.isAssumedNoFree(); } @@ -1550,7 +1552,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating { if (!Arg) return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor(*this, ArgPos); + auto &ArgAA = A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), ArgAA.getState()); } @@ -1756,7 +1758,8 @@ struct AANonNullFloating : public AANonNullImpl { auto VisitValueCB = [&](Value &V, const Instruction *CtxI, AANonNull::StateType &T, bool Stripped) -> bool { - const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + const auto &AA = A.getAAFor(*this, IRPosition::value(V), + DepClassTy::REQUIRED); if (!Stripped && this == &AA) { if (!isKnownNonZero(&V, DL, 0, AC, CtxI, DT)) T.indicatePessimisticFixpoint(); @@ -1874,8 +1877,8 @@ struct AANoRecurseFunction final : AANoRecurseImpl { if (CB.hasFnAttr(Attribute::NoRecurse)) return true; - const auto &NoRecurseAA = - A.getAAFor(*this, IRPosition::callsite_function(CB)); + const auto &NoRecurseAA = A.getAAFor( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); if (!NoRecurseAA.isAssumedNoRecurse()) return false; @@ -1915,7 +1918,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -2205,8 +2208,8 @@ private: // use for specific processing. Optional stopOnUndefOrAssumed(Attributor &A, const Value *V, Instruction *I) { - const auto &ValueSimplifyAA = - A.getAAFor(*this, IRPosition::value(*V)); + const auto &ValueSimplifyAA = A.getAAFor( + *this, IRPosition::value(*V), DepClassTy::REQUIRED); Optional SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(A); if (!ValueSimplifyAA.isKnown()) { @@ -2290,12 +2293,14 @@ struct AAWillReturnImpl : public AAWillReturn { ChangeStatus updateImpl(Attributor &A) override { auto CheckForWillReturn = [&](Instruction &I) { IRPosition IPos = IRPosition::callsite_function(cast(I)); - const auto &WillReturnAA = A.getAAFor(*this, IPos); + const auto &WillReturnAA = + A.getAAFor(*this, IPos, DepClassTy::REQUIRED); if (WillReturnAA.isKnownWillReturn()) return true; if (!WillReturnAA.isAssumedWillReturn()) return false; - const auto &NoRecurseAA = A.getAAFor(*this, IPos); + const auto &NoRecurseAA = + A.getAAFor(*this, IPos, DepClassTy::REQUIRED); return NoRecurseAA.isAssumedNoRecurse(); }; @@ -2340,7 +2345,7 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -2420,8 +2425,8 @@ struct AANoAliasFloating final : AANoAliasImpl { Val->getType()->getPointerAddressSpace())) indicateOptimisticFixpoint(); else if (Val != &getAssociatedValue()) { - const auto &ValNoAliasAA = - A.getAAFor(*this, IRPosition::value(*Val)); + const auto &ValNoAliasAA = A.getAAFor( + *this, IRPosition::value(*Val), DepClassTy::OPTIONAL); if (ValNoAliasAA.isKnownNoAlias()) indicateOptimisticFixpoint(); } @@ -2461,14 +2466,15 @@ struct AANoAliasArgument final // function, otherwise we give up for now. // If the function is no-sync, no-alias cannot break synchronization. - const auto &NoSyncAA = A.getAAFor( - *this, IRPosition::function_scope(getIRPosition())); + const auto &NoSyncAA = + A.getAAFor(*this, IRPosition::function_scope(getIRPosition()), + DepClassTy::OPTIONAL); if (NoSyncAA.isAssumedNoSync()) return Base::updateImpl(A); // If the argument is read-only, no-alias cannot break synchronization. - const auto &MemBehaviorAA = - A.getAAFor(*this, getIRPosition()); + const auto &MemBehaviorAA = A.getAAFor( + *this, getIRPosition(), DepClassTy::OPTIONAL); if (MemBehaviorAA.isAssumedReadOnly()) return Base::updateImpl(A); @@ -2593,8 +2599,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return true; if (ScopeFn) { - const auto &ReachabilityAA = - A.getAAFor(*this, IRPosition::function(*ScopeFn)); + const auto &ReachabilityAA = A.getAAFor( + *this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL); if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI())) return true; @@ -2605,7 +2611,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { unsigned ArgNo = CB->getArgOperandNo(&U); const auto &NoCaptureAA = A.getAAFor( - *this, IRPosition::callsite_argument(*CB, ArgNo)); + *this, IRPosition::callsite_argument(*CB, ArgNo), + DepClassTy::OPTIONAL); if (NoCaptureAA.isAssumedNoCapture()) return true; @@ -2703,11 +2710,13 @@ struct AANoAliasReturned final : AANoAliasImpl { return false; const IRPosition &RVPos = IRPosition::value(RV); - const auto &NoAliasAA = A.getAAFor(*this, RVPos); + const auto &NoAliasAA = + A.getAAFor(*this, RVPos, DepClassTy::REQUIRED); if (!NoAliasAA.isAssumedNoAlias()) return false; - const auto &NoCaptureAA = A.getAAFor(*this, RVPos); + const auto &NoCaptureAA = + A.getAAFor(*this, RVPos, DepClassTy::REQUIRED); return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); }; @@ -2742,7 +2751,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::returned(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -2932,7 +2941,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { if (!Arg) return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor(*this, ArgPos); + auto &ArgAA = A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), ArgAA.getState()); } @@ -3561,8 +3570,8 @@ struct AADereferenceableFloating : AADereferenceableImpl { const Value *Base = stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); - const auto &AA = - A.getAAFor(*this, IRPosition::value(*Base)); + const auto &AA = A.getAAFor( + *this, IRPosition::value(*Base), DepClassTy::REQUIRED); int64_t DerefBytes = 0; if (!Stripped && this == &AA) { // Use IR information if we did not strip anything. @@ -3842,7 +3851,8 @@ struct AAAlignFloating : AAAlignImpl { auto VisitValueCB = [&](Value &V, const Instruction *, AAAlign::StateType &T, bool Stripped) -> bool { - const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + const auto &AA = A.getAAFor(*this, IRPosition::value(V), + DepClassTy::REQUIRED); if (!Stripped && this == &AA) { int64_t Offset; unsigned Alignment = 1; @@ -4023,7 +4033,7 @@ struct AANoReturnCallSite final : AANoReturnImpl { AANoReturnImpl::initialize(A); if (Function *F = getAssociatedFunction()) { const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); if (!FnAA.isAssumedNoReturn()) indicatePessimisticFixpoint(); } @@ -4037,7 +4047,7 @@ struct AANoReturnCallSite final : AANoReturnImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -4239,7 +4249,8 @@ struct AACaptureUseTracker final : public CaptureTracker { const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo); // If we have a abstract no-capture attribute for the argument we can use // it to justify a non-capture attribute here. This allows recursion! - auto &ArgNoCaptureAA = A.getAAFor(NoCaptureAA, CSArgPos); + auto &ArgNoCaptureAA = + A.getAAFor(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED); if (ArgNoCaptureAA.isAssumedNoCapture()) return isCapturedIn(/* Memory */ false, /* Integer */ false, /* Return */ false); @@ -4423,7 +4434,7 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { if (!Arg) return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor(*this, ArgPos); + auto &ArgAA = A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), ArgAA.getState()); } @@ -4517,7 +4528,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { // FIXME: Add a typecast support. auto &ValueSimplifyAA = A.getAAFor( - QueryingAA, IRPosition::value(QueryingValue)); + QueryingAA, IRPosition::value(QueryingValue), DepClassTy::REQUIRED); Optional QueryingValueSimplified = ValueSimplifyAA.getAssumedSimplifiedValue(A); @@ -4651,7 +4662,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { if (Arg->hasByValAttr()) { // TODO: We probably need to verify synchronization is not an issue, e.g., // there is no race by not copying a constant byval. - const auto &MemAA = A.getAAFor(*this, getIRPosition()); + const auto &MemAA = A.getAAFor(*this, getIRPosition(), + DepClassTy::REQUIRED); if (!MemAA.isAssumedReadOnly()) return indicatePessimisticFixpoint(); } @@ -4818,7 +4830,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { // The index is the operand that we assume is not null. unsigned PtrIdx = Op0IsNull; auto &PtrNonNullAA = A.getAAFor( - *this, IRPosition::value(*ICmp->getOperand(PtrIdx))); + *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), + DepClassTy::REQUIRED); if (!PtrNonNullAA.isAssumedNonNull()) return false; @@ -4851,7 +4864,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, bool Stripped) -> bool { - auto &AA = A.getAAFor(*this, IRPosition::value(V)); + auto &AA = A.getAAFor(*this, IRPosition::value(V), + DepClassTy::REQUIRED); if (!Stripped && this == &AA) { // TODO: Look the instruction and check recursively. @@ -5107,11 +5121,13 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { unsigned ArgNo = CB->getArgOperandNo(&U); const auto &NoCaptureAA = A.getAAFor( - *this, IRPosition::callsite_argument(*CB, ArgNo)); + *this, IRPosition::callsite_argument(*CB, ArgNo), + DepClassTy::REQUIRED); // If a callsite argument use is nofree, we are fine. const auto &ArgNoFreeAA = A.getAAFor( - *this, IRPosition::callsite_argument(*CB, ArgNo)); + *this, IRPosition::callsite_argument(*CB, ArgNo), + DepClassTy::REQUIRED); if (!NoCaptureAA.isAssumedNoCapture() || !ArgNoFreeAA.isAssumedNoFree()) { @@ -5277,7 +5293,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return false; // Check that all call sites agree on a type. - auto &PrivCSArgAA = A.getAAFor(*this, ACSArgPos); + auto &PrivCSArgAA = + A.getAAFor(*this, ACSArgPos, DepClassTy::REQUIRED); Optional CSTy = PrivCSArgAA.getPrivatizableType(); LLVM_DEBUG({ @@ -5394,8 +5411,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { if (CBArgNo != int(ArgNo)) continue; - const auto &CBArgPrivAA = - A.getAAFor(*this, IRPosition::argument(CBArg)); + const auto &CBArgPrivAA = A.getAAFor( + *this, IRPosition::argument(CBArg), DepClassTy::REQUIRED); if (CBArgPrivAA.isValidState()) { auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType(); if (!CBArgPrivTy.hasValue()) @@ -5441,7 +5458,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Function *DCCallee = DC->getCalledFunction(); if (unsigned(DCArgNo) < DCCallee->arg_size()) { const auto &DCArgPrivAA = A.getAAFor( - *this, IRPosition::argument(*DCCallee->getArg(DCArgNo))); + *this, IRPosition::argument(*DCCallee->getArg(DCArgNo)), + DepClassTy::REQUIRED); if (DCArgPrivAA.isValidState()) { auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType(); if (!DCArgPrivTy.hasValue()) @@ -5606,7 +5624,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Argument *Arg = getAssociatedArgument(); // Query AAAlign attribute for alignment of associated argument to // determine the best alignment of loads. - const auto &AlignAA = A.getAAFor(*this, IRPosition::value(*Arg)); + const auto &AlignAA = + A.getAAFor(*this, IRPosition::value(*Arg), DepClassTy::NONE); // Callback to repair the associated function. A new alloca is placed at the // beginning and initialized with the values passed through arguments. The @@ -5693,8 +5712,8 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { if (CI->isOne()) return Obj->getType()->getPointerElementType(); if (auto *Arg = dyn_cast(Obj)) { - auto &PrivArgAA = - A.getAAFor(*this, IRPosition::argument(*Arg)); + auto &PrivArgAA = A.getAAFor( + *this, IRPosition::argument(*Arg), DepClassTy::REQUIRED); if (PrivArgAA.isAssumedPrivatizablePtr()) return Obj->getType()->getPointerElementType(); } @@ -5731,19 +5750,21 @@ struct AAPrivatizablePtrCallSiteArgument final return indicatePessimisticFixpoint(); const IRPosition &IRP = getIRPosition(); - auto &NoCaptureAA = A.getAAFor(*this, IRP); + auto &NoCaptureAA = + A.getAAFor(*this, IRP, DepClassTy::REQUIRED); if (!NoCaptureAA.isAssumedNoCapture()) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n"); return indicatePessimisticFixpoint(); } - auto &NoAliasAA = A.getAAFor(*this, IRP); + auto &NoAliasAA = A.getAAFor(*this, IRP, DepClassTy::REQUIRED); if (!NoAliasAA.isAssumedNoAlias()) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n"); return indicatePessimisticFixpoint(); } - const auto &MemBehaviorAA = A.getAAFor(*this, IRP); + const auto &MemBehaviorAA = + A.getAAFor(*this, IRP, DepClassTy::REQUIRED); if (!MemBehaviorAA.isAssumedReadOnly()) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); return indicatePessimisticFixpoint(); @@ -6015,7 +6036,8 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { // redirecting requests to the callee argument. Argument *Arg = getAssociatedArgument(); const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor(*this, ArgPos); + auto &ArgAA = + A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), ArgAA.getState()); } @@ -6104,7 +6126,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = + A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); return clampStateAndIndicateChange(getState(), FnAA.getState()); } @@ -6130,7 +6153,7 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { // state is as optimistic as it gets. if (const auto *CB = dyn_cast(&I)) { const auto &MemBehaviorAA = A.getAAFor( - *this, IRPosition::callsite_function(*CB)); + *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED); intersectAssumedBits(MemBehaviorAA.getAssumed()); return !isAtFixpoint(); } @@ -6653,8 +6676,8 @@ void AAMemoryLocationImpl::categorizePtrValue( } else if (isa(V)) { MLK = NO_LOCAL_MEM; } else if (const auto *CB = dyn_cast(&V)) { - const auto &NoAliasAA = - A.getAAFor(*this, IRPosition::callsite_returned(*CB)); + const auto &NoAliasAA = A.getAAFor( + *this, IRPosition::callsite_returned(*CB), DepClassTy::OPTIONAL); if (NoAliasAA.isAssumedNoAlias()) MLK = NO_MALLOCED_MEM; else @@ -6724,8 +6747,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I, if (auto *CB = dyn_cast(&I)) { // First check if we assume any memory is access is visible. - const auto &CBMemLocationAA = - A.getAAFor(*this, IRPosition::callsite_function(*CB)); + const auto &CBMemLocationAA = A.getAAFor( + *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I << " [" << CBMemLocationAA << "]\n"); @@ -6872,7 +6895,8 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor(*this, FnPos); + auto &FnAA = + A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); bool Changed = false; auto AccessPred = [&](const Instruction *I, const Value *Ptr, AccessKind Kind, MemoryLocationsKind MLK) { @@ -7179,13 +7203,13 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return false; - auto &LHSAA = - A.getAAFor(*this, IRPosition::value(*LHS)); + auto &LHSAA = A.getAAFor( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); QuerriedAAs.push_back(&LHSAA); auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); - auto &RHSAA = - A.getAAFor(*this, IRPosition::value(*RHS)); + auto &RHSAA = A.getAAFor( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); QuerriedAAs.push_back(&RHSAA); auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI); @@ -7208,8 +7232,8 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { if (!OpV.getType()->isIntegerTy()) return false; - auto &OpAA = - A.getAAFor(*this, IRPosition::value(OpV)); + auto &OpAA = A.getAAFor(*this, IRPosition::value(OpV), + DepClassTy::REQUIRED); QuerriedAAs.push_back(&OpAA); T.unionAssumed( OpAA.getAssumed().castOp(CastI->getOpcode(), getState().getBitWidth())); @@ -7226,11 +7250,11 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return false; - auto &LHSAA = - A.getAAFor(*this, IRPosition::value(*LHS)); + auto &LHSAA = A.getAAFor( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); QuerriedAAs.push_back(&LHSAA); - auto &RHSAA = - A.getAAFor(*this, IRPosition::value(*RHS)); + auto &RHSAA = A.getAAFor( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); QuerriedAAs.push_back(&RHSAA); auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); @@ -7279,8 +7303,8 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { if (!I || isa(I)) { // If the value is not instruction, we query AA to Attributor. - const auto &AA = - A.getAAFor(*this, IRPosition::value(V)); + const auto &AA = A.getAAFor( + *this, IRPosition::value(V), DepClassTy::REQUIRED); // Clamp operator is not used to utilize a program point CtxI. T.unionAssumed(AA.getAssumedConstantRange(A, CtxI)); @@ -7612,11 +7636,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS)); + auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), + DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS)); + auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), + DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); @@ -7674,11 +7700,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { return indicatePessimisticFixpoint(); // TODO: Use assumed simplified condition value - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS)); + auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), + DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS)); + auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), + DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); @@ -7700,7 +7728,8 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { assert(CI->getNumOperands() == 1 && "Expected cast to be unary!"); uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth(); Value *Src = CI->getOperand(0); - auto &SrcAA = A.getAAFor(*this, IRPosition::value(*Src)); + auto &SrcAA = A.getAAFor(*this, IRPosition::value(*Src), + DepClassTy::REQUIRED); if (!SrcAA.isValidState()) return indicatePessimisticFixpoint(); const DenseSet &SrcAAPVS = SrcAA.getAssumedSet(); @@ -7723,11 +7752,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS)); + auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), + DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS)); + auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), + DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); @@ -7766,7 +7797,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { Value *IncomingValue = PHI->getIncomingValue(u); auto &PotentialValuesAA = A.getAAFor( - *this, IRPosition::value(*IncomingValue)); + *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); if (!PotentialValuesAA.isValidState()) return indicatePessimisticFixpoint(); if (PotentialValuesAA.undefIsContained()) @@ -7870,7 +7901,8 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); auto AssumedBefore = getAssumed(); - auto &AA = A.getAAFor(*this, IRPosition::value(V)); + auto &AA = A.getAAFor(*this, IRPosition::value(V), + DepClassTy::REQUIRED); const auto &S = AA.getAssumed(); unionAssumed(S); return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED @@ -7963,7 +7995,8 @@ struct AANoUndefFloating : public AANoUndefImpl { ChangeStatus updateImpl(Attributor &A) override { auto VisitValueCB = [&](Value &V, const Instruction *CtxI, AANoUndef::StateType &T, bool Stripped) -> bool { - const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + const auto &AA = A.getAAFor(*this, IRPosition::value(V), + DepClassTy::REQUIRED); if (!Stripped && this == &AA) { T.indicatePessimisticFixpoint(); } else { diff --git a/lib/Transforms/IPO/OpenMPOpt.cpp b/lib/Transforms/IPO/OpenMPOpt.cpp index a5ba6edb9a0..8950517c792 100644 --- a/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/lib/Transforms/IPO/OpenMPOpt.cpp @@ -703,8 +703,8 @@ private: // Emit a load instruction and replace the use of the output value // with it. for (Instruction *UsrI : OutsideUsers) { - LoadInst *LoadI = new LoadInst(I.getType(), AllocaI, - I.getName() + ".seq.output.load", UsrI); + LoadInst *LoadI = new LoadInst( + I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); UsrI->replaceUsesOfWith(&I, LoadI); } } @@ -1955,8 +1955,8 @@ struct AAICVTrackerFunction : public AAICVTracker { if (CalledFunction->isDeclaration()) return nullptr; - const auto &ICVTrackingAA = - A.getAAFor(*this, IRPosition::callsite_returned(*CB)); + const auto &ICVTrackingAA = A.getAAFor( + *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); if (ICVTrackingAA.isAssumedTracked()) return ICVTrackingAA.getUniqueReplacementValue(ICV); @@ -2072,7 +2072,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker { ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &ICVTrackingAA = A.getAAFor( - *this, IRPosition::function(*getAnchorScope())); + *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); if (!ICVTrackingAA.isAssumedTracked()) return indicatePessimisticFixpoint(); @@ -2155,7 +2155,7 @@ struct AAICVTrackerCallSite : AAICVTracker { ChangeStatus updateImpl(Attributor &A) override { const auto &ICVTrackingAA = A.getAAFor( - *this, IRPosition::function(*getAnchorScope())); + *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); // We don't have any information, so we assume it changes the ICV. if (!ICVTrackingAA.isAssumedTracked()) @@ -2211,7 +2211,8 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker { ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &ICVTrackingAA = A.getAAFor( - *this, IRPosition::returned(*getAssociatedFunction())); + *this, IRPosition::returned(*getAssociatedFunction()), + DepClassTy::REQUIRED); // We don't have any information, so we assume it changes the ICV. if (!ICVTrackingAA.isAssumedTracked()) diff --git a/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll index 005a0f899d4..728b3d462da 100644 --- a/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -83,66 +83,127 @@ entry: } define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { -; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 -; CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 -; CHECK-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 -; CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -; CHECK: omp.precond.then: -; CHECK-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -; CHECK-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 -; CHECK-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -; CHECK-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 noundef 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1) -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] -; CHECK-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -; CHECK-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -; CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -; CHECK: omp.inner.for.cond: -; CHECK-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] -; CHECK-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] -; CHECK: omp.inner.for.cond.cleanup: -; CHECK-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -; CHECK: omp.inner.for.body: -; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 -; CHECK-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) -; CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -; CHECK: omp.body.continue: -; CHECK-NEXT: br label [[OMP_INNER_FOR_INC]] -; CHECK: omp.inner.for.inc: -; CHECK-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 -; CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -; CHECK: omp.inner.for.end: -; CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -; CHECK: omp.loop.exit: -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) -; CHECK-NEXT: br label [[OMP_PRECOND_END]] -; CHECK: omp.precond.end: -; CHECK-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@.omp_outlined. +; IS________OPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { +; IS________OPM-NEXT: entry: +; IS________OPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 +; IS________OPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +; IS________OPM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +; IS________OPM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +; IS________OPM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +; IS________OPM-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 +; IS________OPM-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* +; IS________OPM-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 +; IS________OPM-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 +; IS________OPM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 +; IS________OPM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +; IS________OPM: omp.precond.then: +; IS________OPM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +; IS________OPM-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 +; IS________OPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +; IS________OPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +; IS________OPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; IS________OPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 noundef 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1) +; IS________OPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________OPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] +; IS________OPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; IS________OPM: cond.true: +; IS________OPM-NEXT: br label [[COND_END:%.*]] +; IS________OPM: cond.false: +; IS________OPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________OPM-NEXT: br label [[COND_END]] +; IS________OPM: cond.end: +; IS________OPM-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +; IS________OPM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +; IS________OPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +; IS________OPM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +; IS________OPM: omp.inner.for.cond: +; IS________OPM-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] +; IS________OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________OPM-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] +; IS________OPM-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] +; IS________OPM: omp.inner.for.cond.cleanup: +; IS________OPM-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +; IS________OPM: omp.inner.for.body: +; IS________OPM-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 +; IS________OPM-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 +; IS________OPM-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 +; IS________OPM-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) +; IS________OPM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +; IS________OPM: omp.body.continue: +; IS________OPM-NEXT: br label [[OMP_INNER_FOR_INC]] +; IS________OPM: omp.inner.for.inc: +; IS________OPM-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 +; IS________OPM-NEXT: br label [[OMP_INNER_FOR_COND]] +; IS________OPM: omp.inner.for.end: +; IS________OPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +; IS________OPM: omp.loop.exit: +; IS________OPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; IS________OPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; IS________OPM-NEXT: br label [[OMP_PRECOND_END]] +; IS________OPM: omp.precond.end: +; IS________OPM-NEXT: ret void +; +; IS________NPM-LABEL: define {{[^@]+}}@.omp_outlined. +; IS________NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { +; IS________NPM-NEXT: entry: +; IS________NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 +; IS________NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +; IS________NPM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +; IS________NPM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +; IS________NPM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +; IS________NPM-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 +; IS________NPM-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* +; IS________NPM-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 +; IS________NPM-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 +; IS________NPM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 +; IS________NPM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +; IS________NPM: omp.precond.then: +; IS________NPM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +; IS________NPM-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 +; IS________NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +; IS________NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +; IS________NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; IS________NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 noundef 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1) +; IS________NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] +; IS________NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; IS________NPM: cond.true: +; IS________NPM-NEXT: br label [[COND_END:%.*]] +; IS________NPM: cond.false: +; IS________NPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________NPM-NEXT: br label [[COND_END]] +; IS________NPM: cond.end: +; IS________NPM-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +; IS________NPM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +; IS________NPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +; IS________NPM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +; IS________NPM: omp.inner.for.cond: +; IS________NPM-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] +; IS________NPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; IS________NPM-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] +; IS________NPM-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] +; IS________NPM: omp.inner.for.cond.cleanup: +; IS________NPM-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +; IS________NPM: omp.inner.for.body: +; IS________NPM-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 +; IS________NPM-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 +; IS________NPM-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 +; IS________NPM-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) +; IS________NPM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +; IS________NPM: omp.body.continue: +; IS________NPM-NEXT: br label [[OMP_INNER_FOR_INC]] +; IS________NPM: omp.inner.for.inc: +; IS________NPM-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 +; IS________NPM-NEXT: br label [[OMP_INNER_FOR_COND]] +; IS________NPM: omp.inner.for.end: +; IS________NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +; IS________NPM: omp.loop.exit: +; IS________NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; IS________NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; IS________NPM-NEXT: br label [[OMP_PRECOND_END]] +; IS________NPM: omp.precond.end: +; IS________NPM-NEXT: ret void ; entry: %q.addr = alloca i64, align 8