1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[IROutliner] Adding a cost model, and debug option to turn the model off.

This adds a cost model that takes into account the total number of
machine instructions to be removed from each region, the number of
instructions added by adding a new function with a set of instructions,
and the instructions added by handling arguments.

Tests not adding flags:

llvm/test/Transforms/IROutliner/outlining-cost-model.ll

Reviewers: jroelofs, paquette

Differential Revision: https://reviews.llvm.org/D87299
This commit is contained in:
Andrew Litteken 2020-09-14 16:58:15 -05:00
parent 6d7c1c4298
commit 2f69ee6563
34 changed files with 533 additions and 35 deletions

View File

@ -145,6 +145,12 @@ struct OutlinableRegion {
/// function has been extracted, the start and end of the BasicBlock
/// containing the called function.
void reattachCandidate();
/// Get the size of the code removed from the region.
///
/// \param [in] TTI - The TargetTransformInfo for the parent function.
/// \returns the code size of the region
unsigned getBenefit(TargetTransformInfo &TTI);
};
/// This class is a pass that identifies similarity in a Module, extracts
@ -201,6 +207,28 @@ private:
void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
DenseSet<unsigned> &NotSame);
/// Find the number of instructions that will be removed by extracting the
/// OutlinableRegions in \p CurrentGroup.
///
/// \param [in] CurrentGroup - The collection of OutlinableRegions to be
/// analyzed.
/// \returns the number of outlined instructions across all regions.
unsigned findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
/// Find the number of instructions that will be added by reloading arguments.
///
/// \param [in] CurrentGroup - The collection of OutlinableRegions to be
/// analyzed.
/// \returns the number of added reload instructions across all regions.
unsigned findCostOutputReloads(OutlinableGroup &CurrentGroup);
/// Find the cost and the benefit of \p CurrentGroup and save it back to
/// \p CurrentGroup.
///
/// \param [in] M - The module being analyzed
/// \param [in,out] CurrentGroup - The overall outlined section
void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
/// Update the output mapping based on the load instruction, and the outputs
/// of the extracted function.
///
@ -229,6 +257,11 @@ private:
std::vector<Function *> &FuncsToRemove,
unsigned &OutlinedFunctionNum);
/// If false, we do not worry if the cost is greater than the benefit. This
/// is for debugging and testing, so that we can test small cases to ensure
/// that the outlining is being done correctly.
bool CostModel = true;
/// The set of outlined Instructions, identified by their location in the
/// sequential ordering of instructions in a Module.
DenseSet<unsigned> Outlined;

View File

@ -29,6 +29,13 @@
using namespace llvm;
using namespace IRSimilarity;
// This is a debug option to test small pieces of code to ensure that outlining
// works correctly.
static cl::opt<bool> NoCostModel(
"ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
cl::desc("Debug option to outline greedily, without restriction that "
"calculated benefit outweighs cost"));
/// The OutlinableGroup holds all the overarching information for outlining
/// a set of regions that are structurally similar to one another, such as the
/// types of the overall function, the output blocks, the sets of stores needed
@ -66,6 +73,13 @@ struct OutlinableGroup {
/// index in ArgumentTypes is an output argument.
unsigned NumAggregateInputs = 0;
/// The number of instructions that will be outlined by extracting \ref
/// Regions.
unsigned Benefit = 0;
/// The number of added instructions needed for the outlining of the \ref
/// Regions.
unsigned Cost = 0;
/// For the \ref Regions, we look at every Value. If it is a constant,
/// we check whether it is the same in Region.
///
@ -213,6 +227,40 @@ constantMatches(Value *V, unsigned GVN,
return false;
}
unsigned OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
InstructionCost Benefit(0);
// Estimate the benefit of outlining a specific sections of the program. We
// delegate mostly this task to the TargetTransformInfo so that if the target
// has specific changes, we can have a more accurate estimate.
// However, getInstructionCost delegates the code size calculation for
// arithmetic instructions to getArithmeticInstrCost in
// include/Analysis/TargetTransformImpl.h, where it always estimates that the
// code size for a division and remainder instruction to be equal to 4, and
// everything else to 1. This is not an accurate representation of the
// division instruction for targets that have a native division instruction.
// To be overly conservative, we only add 1 to the number of instructions for
// each division instruction.
for (Instruction &I : *StartBB) {
switch (I.getOpcode()) {
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::UDiv:
case Instruction::URem:
Benefit += 1;
break;
default:
Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
break;
}
}
return *Benefit.getValue();
}
/// Find whether \p Region matches the global value numbering to Constant
/// mapping found so far.
///
@ -279,7 +327,7 @@ void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
}
void OutlinableGroup::collectGVNStoreSets(Module &M) {
for (OutlinableRegion *OS : Regions)
for (OutlinableRegion *OS : Regions)
OutputGVNCombinations.insert(OS->GVNStores);
// We are adding an extracted argument to decide between which output path
@ -874,7 +922,7 @@ findDuplicateOutputBlock(BasicBlock *OutputBB,
MatchingNum++;
continue;
}
WrongSize = false;
BasicBlock::iterator NIt = OutputBB->begin();
for (Instruction &I : *CompBB) {
@ -888,7 +936,7 @@ findDuplicateOutputBlock(BasicBlock *OutputBB,
NIt++;
}
if (!WrongInst && !WrongSize)
if (!WrongInst && !WrongSize)
return MatchingNum;
MatchingNum++;
@ -963,7 +1011,7 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
Region.OutputBlockNum = -1;
OutputBB->eraseFromParent();
return;
}
}
// Determine is there is a duplicate block.
Optional<unsigned> MatchingBB =
@ -1189,6 +1237,152 @@ void IROutliner::pruneIncompatibleRegions(
}
}
unsigned IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
unsigned RegionBenefit = 0;
for (OutlinableRegion *Region : CurrentGroup.Regions) {
TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
// We add the number of instructions in the region to the benefit as an
// estimate as to how much will be removed.
RegionBenefit += Region->getBenefit(TTI);
LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
<< " saved instructions to overfall benefit.\n");
CurrentGroup.Benefit += RegionBenefit;
}
return RegionBenefit;
}
unsigned IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
unsigned OverallCost = 0;
for (OutlinableRegion *Region : CurrentGroup.Regions) {
TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
// Each output incurs a load after the call, so we add that to the cost.
for (unsigned OutputGVN : Region->GVNStores) {
Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
assert(OV.hasValue() && "Could not find value for GVN?");
Value *V = OV.getValue();
unsigned LoadCost =
TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
<< " instructions to cost for output of type "
<< *V->getType() << "\n");
OverallCost += LoadCost;
}
}
return OverallCost;
}
/// Find the extra instructions needed to handle any output values for the
/// region.
///
/// \param [in] M - The Module to outline from.
/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
/// \param [in] TTI - The TargetTransformInfo used to collect information for
/// new instruction costs.
/// \returns the additional cost to handle the outputs.
static unsigned findCostForOutputBlocks(Module &M,
OutlinableGroup &CurrentGroup,
TargetTransformInfo &TTI) {
unsigned OutputCost = 0;
for (const ArrayRef<unsigned> &OutputUse :
CurrentGroup.OutputGVNCombinations) {
IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
for (unsigned GVN : OutputUse) {
Optional<Value *> OV = Candidate.fromGVN(GVN);
assert(OV.hasValue() && "Could not find value for GVN?");
Value *V = OV.getValue();
unsigned StoreCost =
TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
TargetTransformInfo::TCK_CodeSize);
// An instruction cost is added for each store set that needs to occur for
// various output combinations inside the function, plus a branch to
// return to the exit block.
LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
<< " instructions to cost for output of type "
<< *V->getType() << "\n");
OutputCost += StoreCost;
}
unsigned BranchCost =
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
<< " a branch instruction\n");
OutputCost += BranchCost;
}
// If there is more than one output scheme, we must have a comparison and
// branch for each different item in the switch statement.
if (CurrentGroup.OutputGVNCombinations.size() > 1) {
unsigned ComparisonCost = TTI.getCmpSelInstrCost(
Instruction::ICmp, Type::getInt32Ty(M.getContext()),
Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
TargetTransformInfo::TCK_CodeSize);
unsigned BranchCost =
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
unsigned TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
<< " instructions for each switch case for each different"
<< " output path in a function\n");
OutputCost += TotalCost;
}
return OutputCost;
}
void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
unsigned RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
CurrentGroup.Benefit += RegionBenefit;
LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
unsigned OutputReloadCost = findCostOutputReloads(CurrentGroup);
CurrentGroup.Cost += OutputReloadCost;
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
unsigned AverageRegionBenefit = RegionBenefit / CurrentGroup.Regions.size();
unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
unsigned NumRegions = CurrentGroup.Regions.size();
TargetTransformInfo &TTI =
getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
// We add one region to the cost once, to account for the instructions added
// inside of the newly created function.
LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
<< " instructions to cost for body of new function.\n");
CurrentGroup.Cost += AverageRegionBenefit;
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
// For each argument, we must add an instruction for loading the argument
// out of the register and into a value inside of the newly outlined function.
LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
<< " instructions to cost for each argument in the new"
<< " function.\n");
CurrentGroup.Cost += 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic;
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
// Each argument needs to either be loaded into a register or onto the stack.
// Some arguments will only be loaded into the stack once the argument
// registers are filled.
LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
<< " instructions to cost for each argument in the new"
<< " function " << NumRegions << " times for the "
<< "needed argument handling at the call site.\n");
CurrentGroup.Cost +=
2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
}
void IROutliner::updateOutputMapping(OutlinableRegion &Region,
ArrayRef<Value *> Outputs,
LoadInst *LI) {
@ -1348,6 +1542,19 @@ unsigned IROutliner::doOutline(Module &M) {
CurrentGroup.collectGVNStoreSets(M);
if (CostModel)
findCostBenefit(M, CurrentGroup);
// If we are adhering to the cost model, reattach all the candidates
if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
for (OutlinableRegion *OS : CurrentGroup.Regions)
OS->reattachCandidate();
continue;
}
LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
<< " and benefit " << CurrentGroup.Benefit << "\n");
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
@ -1377,7 +1584,11 @@ unsigned IROutliner::doOutline(Module &M) {
return OutlinedFunctionNum;
}
bool IROutliner::run(Module &M) { return doOutline(M) > 0; }
bool IROutliner::run(Module &M) {
CostModel = !NoCostModel;
return doOutline(M) > 0;
}
// Pass Manager Boilerplate
class IROutlinerLegacyPass : public ModulePass {

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test makes sure we are extracting the found similarity sections
; correctly at the call site.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Show that we do not extract allocas, as outlining allocas may cause
; inconsistencies with the CodeExtractor's algorithm.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test ensures that we do not include llvm.assumes. There are exceptions
; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Show that we do not extract sections with branches as it would require extra
; label and control flow checking.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline callbr instruction since as we do not
; outline any control flow change instructions.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline calls. Special calls, such as
; indirect or nameless calls require extra handling to ensure that there

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that catchpad instructions are not outlined even if they
; in a similar section. Dealing with exception handling inside of an outlined

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that cleanuppad instructions are not outlined even if they
; in a similar section. Dealing with exception handling inside of an outlined

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Show that we do not extract freeze instructions, since extra handling is
; required to mark any outputs used with freeze.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks to make sure that we do not outline getelementptr
; instructions since we must make extra checks on the final operands.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that invoke instructions are not outlined even if they
; in a similar section. Outlining does not currently handle control flow

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that landingpad instructions are not outlined even if they
; in a similar section. Dealing with exception handling inside of an outlined

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline memcpy intrinsics since it may require
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline memcpy intrinsics since it may require
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline memset intrinsics since it requires
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Show that we do not extract phi nodes as it would require extra label and
; control flow checking.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test ensures that we do not outline vararg instructions or intrinsics, as
; they may cause inconsistencies when outlining.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test looks ahecks that debug info is extracted along with the other
; instructions.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test shows that we do not outline from basic blocks with their address
; taken.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that floating point commutative instructions are not treated
; as commutative. Even though an ffadd is technically commutative, the order

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that commutative instructions where the operands are
; swapped are outlined as the same function.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test looks at instances of constants in the different regions. If there
; is a register in the same place as a constant in a similar region of code, we

View File

@ -0,0 +1,183 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -check-prefix=NOCOST
; This test checks that we have different results from when the cost model
; is on versus when it is off. That is, if the number of instructions needed to
; handle the arguments is greater than the number of instructions being added,
; we do not outline.
define void @function1() #0 {
; CHECK-LABEL: @function1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
; CHECK-NEXT: ret void
;
; NOCOST-LABEL: @function1(
; NOCOST-NEXT: entry:
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
; NOCOST-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
%mul = mul i32 %0, %1
%sub = sub i32 %0, %1
%div = sdiv i32 %0, %1
%add2 = add i32 %0, %1
%mul2 = mul i32 %0, %1
%sub2 = sub i32 %0, %1
%div2 = sdiv i32 %0, %1
ret void
}
define void @function2() #0 {
; CHECK-LABEL: @function2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
; CHECK-NEXT: ret void
;
; NOCOST-LABEL: @function2(
; NOCOST-NEXT: entry:
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]])
; NOCOST-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
%mul = mul i32 %0, %1
%sub = sub i32 %0, %1
%div = sdiv i32 %0, %1
%add2 = add i32 %0, %1
%mul2 = mul i32 %0, %1
%sub2 = sub i32 %0, %1
%div2 = sdiv i32 %0, %1
ret void
}
define void @function3() #0 {
; CHECK-LABEL: @function3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4
; CHECK-NEXT: ret void
;
; NOCOST-LABEL: @function3(
; NOCOST-NEXT: entry:
; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; NOCOST-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
; NOCOST-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%output = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
store i32 %add, i32* %output, align 4
%2 = load i32, i32* %output, align 4
%3 = load i32, i32* %output, align 4
%mul = mul i32 %2, %add
store i32 %mul, i32* %result, align 4
ret void
}
define void @function4() #0 {
; CHECK-LABEL: @function4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4
; CHECK-NEXT: ret void
;
; NOCOST-LABEL: @function4(
; NOCOST-NEXT: entry:
; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
; NOCOST-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%output = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
store i32 %add, i32* %output, align 4
%2 = load i32, i32* %output, align 4
%mul = mul i32 %2, %add
store i32 %mul, i32* %result, align 4
ret void
}

View File

@ -0,0 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test makes sure that we do not include debug statements in outlined
; functions.
define void @outline_dbg1() {
; CHECK-LABEL: @outline_dbg1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
call void @llvm.dbg.value(metadata i64 0, metadata !14, metadata !DIExpression()), !dbg !14
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
define void @outline_dbg2() {
; CHECK-LABEL: @outline_dbg2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #1 {
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
!0 = !DIFile(filename: "foo.c", directory: "/tmp")
!1 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!5 = distinct !DICompileUnit(language: DW_LANG_C, file: !0, producer: "My Compiler", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, retainedTypes: !6, globals: !7)
!6 = !{}
!7 = !{}
!11 = distinct !DISubprogram(name: "func_5", scope: !0, file: !0, line: 117, type: !12, isLocal: true, isDefinition: true, scopeLine: 118, isOptimized: false, unit: !5, retainedNodes: !6)
!12 = !DISubroutineType(types: !13)
!13 = !{}
!14 = !DILocalVariable(name: "p_6", arg: 1, scope: !11, line: 117, type: !1)

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost< %s | FileCheck %s
; This test looks at the constants in the regions, and if it they are the
; differents it elevates the constants to arguments.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test looks at the globals in the regions, and makes sure they are not
; outlined if they are different values.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -iroutliner < %s | FileCheck %s
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; These functions are constructed slightly differently so that they require
; different output blocks for the values used outside of the region. We are

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner --ir-outlining-no-cost < %s | FileCheck %s
; This is a negative case to show that when we have the same set of
; instructions, but in a different order, they are not outlined in the same way.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -iroutliner < %s | FileCheck %s
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test tests that inputs that are replaced with the output of an outlined
; function is still recognized as the same value.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test looks at the constants in the regions, and if it they are the
; same it outlines them as constants rather than elevating them to arguments.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
@global1 = global i32 1, align 4
@global2 = global i32 2, align 4

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -iroutliner < %s | FileCheck %s
; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; These functions are constructed slightly differently so that they require
; the same output blocks for the values used outside of the region. We are