mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[IROutliner] Adding a cost model, and debug option to turn the model off.
This adds a cost model that takes into account the total number of machine instructions to be removed from each region, the number of instructions added by adding a new function with a set of instructions, and the instructions added by handling arguments. Tests not adding flags: llvm/test/Transforms/IROutliner/outlining-cost-model.ll Reviewers: jroelofs, paquette Differential Revision: https://reviews.llvm.org/D87299
This commit is contained in:
parent
6d7c1c4298
commit
2f69ee6563
@ -145,6 +145,12 @@ struct OutlinableRegion {
|
||||
/// function has been extracted, the start and end of the BasicBlock
|
||||
/// containing the called function.
|
||||
void reattachCandidate();
|
||||
|
||||
/// Get the size of the code removed from the region.
|
||||
///
|
||||
/// \param [in] TTI - The TargetTransformInfo for the parent function.
|
||||
/// \returns the code size of the region
|
||||
unsigned getBenefit(TargetTransformInfo &TTI);
|
||||
};
|
||||
|
||||
/// This class is a pass that identifies similarity in a Module, extracts
|
||||
@ -201,6 +207,28 @@ private:
|
||||
void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
|
||||
DenseSet<unsigned> &NotSame);
|
||||
|
||||
/// Find the number of instructions that will be removed by extracting the
|
||||
/// OutlinableRegions in \p CurrentGroup.
|
||||
///
|
||||
/// \param [in] CurrentGroup - The collection of OutlinableRegions to be
|
||||
/// analyzed.
|
||||
/// \returns the number of outlined instructions across all regions.
|
||||
unsigned findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
|
||||
|
||||
/// Find the number of instructions that will be added by reloading arguments.
|
||||
///
|
||||
/// \param [in] CurrentGroup - The collection of OutlinableRegions to be
|
||||
/// analyzed.
|
||||
/// \returns the number of added reload instructions across all regions.
|
||||
unsigned findCostOutputReloads(OutlinableGroup &CurrentGroup);
|
||||
|
||||
/// Find the cost and the benefit of \p CurrentGroup and save it back to
|
||||
/// \p CurrentGroup.
|
||||
///
|
||||
/// \param [in] M - The module being analyzed
|
||||
/// \param [in,out] CurrentGroup - The overall outlined section
|
||||
void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
|
||||
|
||||
/// Update the output mapping based on the load instruction, and the outputs
|
||||
/// of the extracted function.
|
||||
///
|
||||
@ -229,6 +257,11 @@ private:
|
||||
std::vector<Function *> &FuncsToRemove,
|
||||
unsigned &OutlinedFunctionNum);
|
||||
|
||||
/// If false, we do not worry if the cost is greater than the benefit. This
|
||||
/// is for debugging and testing, so that we can test small cases to ensure
|
||||
/// that the outlining is being done correctly.
|
||||
bool CostModel = true;
|
||||
|
||||
/// The set of outlined Instructions, identified by their location in the
|
||||
/// sequential ordering of instructions in a Module.
|
||||
DenseSet<unsigned> Outlined;
|
||||
|
@ -29,6 +29,13 @@
|
||||
using namespace llvm;
|
||||
using namespace IRSimilarity;
|
||||
|
||||
// This is a debug option to test small pieces of code to ensure that outlining
|
||||
// works correctly.
|
||||
static cl::opt<bool> NoCostModel(
|
||||
"ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
|
||||
cl::desc("Debug option to outline greedily, without restriction that "
|
||||
"calculated benefit outweighs cost"));
|
||||
|
||||
/// The OutlinableGroup holds all the overarching information for outlining
|
||||
/// a set of regions that are structurally similar to one another, such as the
|
||||
/// types of the overall function, the output blocks, the sets of stores needed
|
||||
@ -66,6 +73,13 @@ struct OutlinableGroup {
|
||||
/// index in ArgumentTypes is an output argument.
|
||||
unsigned NumAggregateInputs = 0;
|
||||
|
||||
/// The number of instructions that will be outlined by extracting \ref
|
||||
/// Regions.
|
||||
unsigned Benefit = 0;
|
||||
/// The number of added instructions needed for the outlining of the \ref
|
||||
/// Regions.
|
||||
unsigned Cost = 0;
|
||||
|
||||
/// For the \ref Regions, we look at every Value. If it is a constant,
|
||||
/// we check whether it is the same in Region.
|
||||
///
|
||||
@ -213,6 +227,40 @@ constantMatches(Value *V, unsigned GVN,
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
|
||||
InstructionCost Benefit(0);
|
||||
|
||||
// Estimate the benefit of outlining a specific sections of the program. We
|
||||
// delegate mostly this task to the TargetTransformInfo so that if the target
|
||||
// has specific changes, we can have a more accurate estimate.
|
||||
|
||||
// However, getInstructionCost delegates the code size calculation for
|
||||
// arithmetic instructions to getArithmeticInstrCost in
|
||||
// include/Analysis/TargetTransformImpl.h, where it always estimates that the
|
||||
// code size for a division and remainder instruction to be equal to 4, and
|
||||
// everything else to 1. This is not an accurate representation of the
|
||||
// division instruction for targets that have a native division instruction.
|
||||
// To be overly conservative, we only add 1 to the number of instructions for
|
||||
// each division instruction.
|
||||
for (Instruction &I : *StartBB) {
|
||||
switch (I.getOpcode()) {
|
||||
case Instruction::FDiv:
|
||||
case Instruction::FRem:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::SRem:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::URem:
|
||||
Benefit += 1;
|
||||
break;
|
||||
default:
|
||||
Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return *Benefit.getValue();
|
||||
}
|
||||
|
||||
/// Find whether \p Region matches the global value numbering to Constant
|
||||
/// mapping found so far.
|
||||
///
|
||||
@ -1189,6 +1237,152 @@ void IROutliner::pruneIncompatibleRegions(
|
||||
}
|
||||
}
|
||||
|
||||
unsigned IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
|
||||
unsigned RegionBenefit = 0;
|
||||
for (OutlinableRegion *Region : CurrentGroup.Regions) {
|
||||
TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
|
||||
// We add the number of instructions in the region to the benefit as an
|
||||
// estimate as to how much will be removed.
|
||||
RegionBenefit += Region->getBenefit(TTI);
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
|
||||
<< " saved instructions to overfall benefit.\n");
|
||||
CurrentGroup.Benefit += RegionBenefit;
|
||||
}
|
||||
|
||||
return RegionBenefit;
|
||||
}
|
||||
|
||||
unsigned IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
|
||||
unsigned OverallCost = 0;
|
||||
for (OutlinableRegion *Region : CurrentGroup.Regions) {
|
||||
TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
|
||||
|
||||
// Each output incurs a load after the call, so we add that to the cost.
|
||||
for (unsigned OutputGVN : Region->GVNStores) {
|
||||
Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
|
||||
assert(OV.hasValue() && "Could not find value for GVN?");
|
||||
Value *V = OV.getValue();
|
||||
unsigned LoadCost =
|
||||
TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
|
||||
TargetTransformInfo::TCK_CodeSize);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
|
||||
<< " instructions to cost for output of type "
|
||||
<< *V->getType() << "\n");
|
||||
OverallCost += LoadCost;
|
||||
}
|
||||
}
|
||||
|
||||
return OverallCost;
|
||||
}
|
||||
|
||||
/// Find the extra instructions needed to handle any output values for the
|
||||
/// region.
|
||||
///
|
||||
/// \param [in] M - The Module to outline from.
|
||||
/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
|
||||
/// \param [in] TTI - The TargetTransformInfo used to collect information for
|
||||
/// new instruction costs.
|
||||
/// \returns the additional cost to handle the outputs.
|
||||
static unsigned findCostForOutputBlocks(Module &M,
|
||||
OutlinableGroup &CurrentGroup,
|
||||
TargetTransformInfo &TTI) {
|
||||
unsigned OutputCost = 0;
|
||||
|
||||
for (const ArrayRef<unsigned> &OutputUse :
|
||||
CurrentGroup.OutputGVNCombinations) {
|
||||
IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
|
||||
for (unsigned GVN : OutputUse) {
|
||||
Optional<Value *> OV = Candidate.fromGVN(GVN);
|
||||
assert(OV.hasValue() && "Could not find value for GVN?");
|
||||
Value *V = OV.getValue();
|
||||
unsigned StoreCost =
|
||||
TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
|
||||
TargetTransformInfo::TCK_CodeSize);
|
||||
|
||||
// An instruction cost is added for each store set that needs to occur for
|
||||
// various output combinations inside the function, plus a branch to
|
||||
// return to the exit block.
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
|
||||
<< " instructions to cost for output of type "
|
||||
<< *V->getType() << "\n");
|
||||
OutputCost += StoreCost;
|
||||
}
|
||||
|
||||
unsigned BranchCost =
|
||||
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
|
||||
LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
|
||||
<< " a branch instruction\n");
|
||||
OutputCost += BranchCost;
|
||||
}
|
||||
|
||||
// If there is more than one output scheme, we must have a comparison and
|
||||
// branch for each different item in the switch statement.
|
||||
if (CurrentGroup.OutputGVNCombinations.size() > 1) {
|
||||
unsigned ComparisonCost = TTI.getCmpSelInstrCost(
|
||||
Instruction::ICmp, Type::getInt32Ty(M.getContext()),
|
||||
Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
|
||||
TargetTransformInfo::TCK_CodeSize);
|
||||
unsigned BranchCost =
|
||||
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
|
||||
|
||||
unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
|
||||
unsigned TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
|
||||
<< " instructions for each switch case for each different"
|
||||
<< " output path in a function\n");
|
||||
OutputCost += TotalCost;
|
||||
}
|
||||
|
||||
return OutputCost;
|
||||
}
|
||||
|
||||
void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
|
||||
unsigned RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
|
||||
CurrentGroup.Benefit += RegionBenefit;
|
||||
LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
|
||||
|
||||
unsigned OutputReloadCost = findCostOutputReloads(CurrentGroup);
|
||||
CurrentGroup.Cost += OutputReloadCost;
|
||||
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
|
||||
|
||||
unsigned AverageRegionBenefit = RegionBenefit / CurrentGroup.Regions.size();
|
||||
unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
|
||||
unsigned NumRegions = CurrentGroup.Regions.size();
|
||||
TargetTransformInfo &TTI =
|
||||
getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
|
||||
|
||||
// We add one region to the cost once, to account for the instructions added
|
||||
// inside of the newly created function.
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
|
||||
<< " instructions to cost for body of new function.\n");
|
||||
CurrentGroup.Cost += AverageRegionBenefit;
|
||||
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
|
||||
|
||||
// For each argument, we must add an instruction for loading the argument
|
||||
// out of the register and into a value inside of the newly outlined function.
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
|
||||
<< " instructions to cost for each argument in the new"
|
||||
<< " function.\n");
|
||||
CurrentGroup.Cost += 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic;
|
||||
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
|
||||
|
||||
// Each argument needs to either be loaded into a register or onto the stack.
|
||||
// Some arguments will only be loaded into the stack once the argument
|
||||
// registers are filled.
|
||||
LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
|
||||
<< " instructions to cost for each argument in the new"
|
||||
<< " function " << NumRegions << " times for the "
|
||||
<< "needed argument handling at the call site.\n");
|
||||
CurrentGroup.Cost +=
|
||||
2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
|
||||
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
|
||||
|
||||
CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
|
||||
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
|
||||
}
|
||||
|
||||
void IROutliner::updateOutputMapping(OutlinableRegion &Region,
|
||||
ArrayRef<Value *> Outputs,
|
||||
LoadInst *LI) {
|
||||
@ -1348,6 +1542,19 @@ unsigned IROutliner::doOutline(Module &M) {
|
||||
|
||||
CurrentGroup.collectGVNStoreSets(M);
|
||||
|
||||
if (CostModel)
|
||||
findCostBenefit(M, CurrentGroup);
|
||||
|
||||
// If we are adhering to the cost model, reattach all the candidates
|
||||
if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
|
||||
for (OutlinableRegion *OS : CurrentGroup.Regions)
|
||||
OS->reattachCandidate();
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
|
||||
<< " and benefit " << CurrentGroup.Benefit << "\n");
|
||||
|
||||
// Create functions out of all the sections, and mark them as outlined.
|
||||
OutlinedRegions.clear();
|
||||
for (OutlinableRegion *OS : CurrentGroup.Regions) {
|
||||
@ -1377,7 +1584,11 @@ unsigned IROutliner::doOutline(Module &M) {
|
||||
return OutlinedFunctionNum;
|
||||
}
|
||||
|
||||
bool IROutliner::run(Module &M) { return doOutline(M) > 0; }
|
||||
bool IROutliner::run(Module &M) {
|
||||
CostModel = !NoCostModel;
|
||||
|
||||
return doOutline(M) > 0;
|
||||
}
|
||||
|
||||
// Pass Manager Boilerplate
|
||||
class IROutlinerLegacyPass : public ModulePass {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test makes sure we are extracting the found similarity sections
|
||||
; correctly at the call site.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; Show that we do not extract allocas, as outlining allocas may cause
|
||||
; inconsistencies with the CodeExtractor's algorithm.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test ensures that we do not include llvm.assumes. There are exceptions
|
||||
; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; Show that we do not extract sections with branches as it would require extra
|
||||
; label and control flow checking.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that we do not outline callbr instruction since as we do not
|
||||
; outline any control flow change instructions.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that we do not outline calls. Special calls, such as
|
||||
; indirect or nameless calls require extra handling to ensure that there
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that catchpad instructions are not outlined even if they
|
||||
; in a similar section. Dealing with exception handling inside of an outlined
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that cleanuppad instructions are not outlined even if they
|
||||
; in a similar section. Dealing with exception handling inside of an outlined
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; Show that we do not extract freeze instructions, since extra handling is
|
||||
; required to mark any outputs used with freeze.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks to make sure that we do not outline getelementptr
|
||||
; instructions since we must make extra checks on the final operands.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that invoke instructions are not outlined even if they
|
||||
; in a similar section. Outlining does not currently handle control flow
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that landingpad instructions are not outlined even if they
|
||||
; in a similar section. Dealing with exception handling inside of an outlined
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that we do not outline memcpy intrinsics since it may require
|
||||
; extra address space checks.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that we do not outline memcpy intrinsics since it may require
|
||||
; extra address space checks.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that we do not outline memset intrinsics since it requires
|
||||
; extra address space checks.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; Show that we do not extract phi nodes as it would require extra label and
|
||||
; control flow checking.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test ensures that we do not outline vararg instructions or intrinsics, as
|
||||
; they may cause inconsistencies when outlining.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test looks ahecks that debug info is extracted along with the other
|
||||
; instructions.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test shows that we do not outline from basic blocks with their address
|
||||
; taken.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that floating point commutative instructions are not treated
|
||||
; as commutative. Even though an ffadd is technically commutative, the order
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test checks that commutative instructions where the operands are
|
||||
; swapped are outlined as the same function.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test looks at instances of constants in the different regions. If there
|
||||
; is a register in the same place as a constant in a similar region of code, we
|
||||
|
183
test/Transforms/IROutliner/outlining-cost-model.ll
Normal file
183
test/Transforms/IROutliner/outlining-cost-model.ll
Normal file
@ -0,0 +1,183 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -check-prefix=NOCOST
|
||||
|
||||
; This test checks that we have different results from when the cost model
|
||||
; is on versus when it is off. That is, if the number of instructions needed to
|
||||
; handle the arguments is greater than the number of instructions being added,
|
||||
; we do not outline.
|
||||
|
||||
define void @function1() #0 {
|
||||
; CHECK-LABEL: @function1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; NOCOST-LABEL: @function1(
|
||||
; NOCOST-NEXT: entry:
|
||||
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
|
||||
; NOCOST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%0 = load i32, i32* %a, align 4
|
||||
%1 = load i32, i32* %b, align 4
|
||||
%add = add i32 %0, %1
|
||||
%mul = mul i32 %0, %1
|
||||
%sub = sub i32 %0, %1
|
||||
%div = sdiv i32 %0, %1
|
||||
%add2 = add i32 %0, %1
|
||||
%mul2 = mul i32 %0, %1
|
||||
%sub2 = sub i32 %0, %1
|
||||
%div2 = sdiv i32 %0, %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @function2() #0 {
|
||||
; CHECK-LABEL: @function2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; NOCOST-LABEL: @function2(
|
||||
; NOCOST-NEXT: entry:
|
||||
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
|
||||
; NOCOST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%0 = load i32, i32* %a, align 4
|
||||
%1 = load i32, i32* %b, align 4
|
||||
%add = add i32 %0, %1
|
||||
%mul = mul i32 %0, %1
|
||||
%sub = sub i32 %0, %1
|
||||
%div = sdiv i32 %0, %1
|
||||
%add2 = add i32 %0, %1
|
||||
%mul2 = mul i32 %0, %1
|
||||
%sub2 = sub i32 %0, %1
|
||||
%div2 = sdiv i32 %0, %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @function3() #0 {
|
||||
; CHECK-LABEL: @function3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
|
||||
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
|
||||
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]]
|
||||
; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; NOCOST-LABEL: @function3(
|
||||
; NOCOST-NEXT: entry:
|
||||
; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
|
||||
; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
|
||||
; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
|
||||
; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
|
||||
; NOCOST-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
|
||||
; NOCOST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%output = alloca i32, align 4
|
||||
%result = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
%0 = load i32, i32* %a, align 4
|
||||
%1 = load i32, i32* %b, align 4
|
||||
%add = add i32 %0, %1
|
||||
store i32 %add, i32* %output, align 4
|
||||
%2 = load i32, i32* %output, align 4
|
||||
%3 = load i32, i32* %output, align 4
|
||||
%mul = mul i32 %2, %add
|
||||
store i32 %mul, i32* %result, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @function4() #0 {
|
||||
; CHECK-LABEL: @function4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
|
||||
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
|
||||
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]]
|
||||
; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; NOCOST-LABEL: @function4(
|
||||
; NOCOST-NEXT: entry:
|
||||
; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
||||
; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
|
||||
; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
|
||||
; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
|
||||
; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
|
||||
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
|
||||
; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
|
||||
; NOCOST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%output = alloca i32, align 4
|
||||
%result = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
%0 = load i32, i32* %a, align 4
|
||||
%1 = load i32, i32* %b, align 4
|
||||
%add = add i32 %0, %1
|
||||
store i32 %add, i32* %output, align 4
|
||||
%2 = load i32, i32* %output, align 4
|
||||
%mul = mul i32 %2, %add
|
||||
store i32 %mul, i32* %result, align 4
|
||||
ret void
|
||||
}
|
71
test/Transforms/IROutliner/outlining-debug-statements.ll
Normal file
71
test/Transforms/IROutliner/outlining-debug-statements.ll
Normal file
@ -0,0 +1,71 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test makes sure that we do not include debug statements in outlined
|
||||
; functions.
|
||||
|
||||
define void @outline_dbg1() {
|
||||
; CHECK-LABEL: @outline_dbg1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
call void @llvm.dbg.value(metadata i64 0, metadata !14, metadata !DIExpression()), !dbg !14
|
||||
store i32 4, i32* %c, align 4
|
||||
%al = load i32, i32* %a
|
||||
%bl = load i32, i32* %b
|
||||
%cl = load i32, i32* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata)
|
||||
|
||||
define void @outline_dbg2() {
|
||||
; CHECK-LABEL: @outline_dbg2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
store i32 4, i32* %c, align 4
|
||||
%al = load i32, i32* %a
|
||||
%bl = load i32, i32* %b
|
||||
%cl = load i32, i32* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #1 {
|
||||
; CHECK: entry_to_outline:
|
||||
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
|
||||
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
|
||||
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
|
||||
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
|
||||
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
|
||||
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
|
||||
|
||||
!0 = !DIFile(filename: "foo.c", directory: "/tmp")
|
||||
!1 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!5 = distinct !DICompileUnit(language: DW_LANG_C, file: !0, producer: "My Compiler", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, retainedTypes: !6, globals: !7)
|
||||
!6 = !{}
|
||||
!7 = !{}
|
||||
!11 = distinct !DISubprogram(name: "func_5", scope: !0, file: !0, line: 117, type: !12, isLocal: true, isDefinition: true, scopeLine: 118, isOptimized: false, unit: !5, retainedNodes: !6)
|
||||
!12 = !DISubroutineType(types: !13)
|
||||
!13 = !{}
|
||||
!14 = !DILocalVariable(name: "p_6", arg: 1, scope: !11, line: 117, type: !1)
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost< %s | FileCheck %s
|
||||
|
||||
; This test looks at the constants in the regions, and if it they are the
|
||||
; differents it elevates the constants to arguments.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test looks at the globals in the regions, and makes sure they are not
|
||||
; outlined if they are different values.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; These functions are constructed slightly differently so that they require
|
||||
; different output blocks for the values used outside of the region. We are
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner --ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This is a negative case to show that when we have the same set of
|
||||
; instructions, but in a different order, they are not outlined in the same way.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test tests that inputs that are replaced with the output of an outlined
|
||||
; function is still recognized as the same value.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test looks at the constants in the regions, and if it they are the
|
||||
; same it outlines them as constants rather than elevating them to arguments.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
@global1 = global i32 1, align 4
|
||||
@global2 = global i32 2, align 4
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -iroutliner < %s | FileCheck %s
|
||||
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; These functions are constructed slightly differently so that they require
|
||||
; the same output blocks for the values used outside of the region. We are
|
||||
|
Loading…
Reference in New Issue
Block a user