1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 04:22:57 +02:00
llvm-mirror/include/llvm/Analysis/TargetTransformInfo.h

1864 lines
85 KiB
C
Raw Normal View History

//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This pass exposes codegen information to IR-level passes. Every
/// transformation that uses codegen information is broken into three parts:
/// 1. The IR-level analysis pass.
/// 2. The IR-level transformation interface which provides the needed
/// information.
/// 3. Codegen-level implementation which uses target-specific hooks.
///
/// This file defines #2, which is the interface that IR-level transformations
/// use for querying the codegen.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#include "llvm/ADT/Optional.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Analysis/AssumptionCache.h"
#include <functional>
namespace llvm {
namespace Intrinsic {
enum ID : unsigned;
}
class AssumptionCache;
class BranchInst;
class Function;
class GlobalValue;
class IntrinsicInst;
class LoadInst;
class Loop;
class SCEV;
class ScalarEvolution;
class StoreInst;
class SwitchInst;
class TargetLibraryInfo;
class Type;
class User;
class Value;
/// Information about a load/store intrinsic defined by the target.
struct MemIntrinsicInfo {
/// This is the pointer that the intrinsic is loading from or storing to.
/// If this is non-null, then analysis/optimization passes can assume that
/// this intrinsic is functionally equivalent to a load/store from this
/// pointer.
Value *PtrVal = nullptr;
// Ordering for atomic operations.
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
// Same Id is set by the target for corresponding load/store intrinsics.
unsigned short MatchingId = 0;
bool ReadMem = false;
bool WriteMem = false;
bool IsVolatile = false;
bool isUnordered() const {
return (Ordering == AtomicOrdering::NotAtomic ||
Ordering == AtomicOrdering::Unordered) && !IsVolatile;
}
};
/// Attributes of a target dependent hardware loop.
struct HardwareLoopInfo {
HardwareLoopInfo() = delete;
HardwareLoopInfo(Loop *L) : L(L) {}
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
const SCEV *ExitCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
bool IsNestingLegal = false; // Can a hardware loop be a parent to
// another hardware loop?
bool CounterInReg = false; // Should loop counter be updated in
// the loop via a phi?
bool PerformEntryTest = false; // Generate the intrinsic which also performs
// icmp ne zero on the loop counter value and
// produces an i1 to guard the loop entry.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
DominatorTree &DT, bool ForceNestedLoop = false,
bool ForceHardwareLoopPHI = false);
bool canAnalyze(LoopInfo &LI);
};
/// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations.
class TargetTransformInfo {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
public:
/// Construct a TTI object using a type implementing the \c Concept
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// API below.
Switch TargetTransformInfo from an immutable analysis pass that requires a TargetMachine to construct (and thus isn't always available), to an analysis group that supports layered implementations much like AliasAnalysis does. This is a pretty massive change, with a few parts that I was unable to easily separate (sorry), so I'll walk through it. The first step of this conversion was to make TargetTransformInfo an analysis group, and to sink the nonce implementations in ScalarTargetTransformInfo and VectorTargetTranformInfo into a NoTargetTransformInfo pass. This allows other passes to add a hard requirement on TTI, and assume they will always get at least on implementation. The TargetTransformInfo analysis group leverages the delegation chaining trick that AliasAnalysis uses, where the base class for the analysis group delegates to the previous analysis *pass*, allowing all but tho NoFoo analysis passes to only implement the parts of the interfaces they support. It also introduces a new trick where each pass in the group retains a pointer to the top-most pass that has been initialized. This allows passes to implement one API in terms of another API and benefit when some other pass above them in the stack has more precise results for the second API. The second step of this conversion is to create a pass that implements the TargetTransformInfo analysis using the target-independent abstractions in the code generator. This replaces the ScalarTargetTransformImpl and VectorTargetTransformImpl classes in lib/Target with a single pass in lib/CodeGen called BasicTargetTransformInfo. This class actually provides most of the TTI functionality, basing it upon the TargetLowering abstraction and other information in the target independent code generator. The third step of the conversion adds support to all TargetMachines to register custom analysis passes. This allows building those passes with access to TargetLowering or other target-specific classes, and it also allows each target to customize the set of analysis passes desired in the pass manager. The baseline LLVMTargetMachine implements this interface to add the BasicTTI pass to the pass manager, and all of the tools that want to support target-aware TTI passes call this routine on whatever target machine they end up with to add the appropriate passes. The fourth step of the conversion created target-specific TTI analysis passes for the X86 and ARM backends. These passes contain the custom logic that was previously in their extensions of the ScalarTargetTransformInfo and VectorTargetTransformInfo interfaces. I separated them into their own file, as now all of the interface bits are private and they just expose a function to create the pass itself. Then I extended these target machines to set up a custom set of analysis passes, first adding BasicTTI as a fallback, and then adding their customized TTI implementations. The fourth step required logic that was shared between the target independent layer and the specific targets to move to a different interface, as they no longer derive from each other. As a consequence, a helper functions were added to TargetLowering representing the common logic needed both in the target implementation and the codegen implementation of the TTI pass. While technically this is the only change that could have been committed separately, it would have been a nightmare to extract. The final step of the conversion was just to delete all the old boilerplate. This got rid of the ScalarTargetTransformInfo and VectorTargetTransformInfo classes, all of the support in all of the targets for producing instances of them, and all of the support in the tools for manually constructing a pass based around them. Now that TTI is a relatively normal analysis group, two things become straightforward. First, we can sink it into lib/Analysis which is a more natural layer for it to live. Second, clients of this interface can depend on it *always* being available which will simplify their code and behavior. These (and other) simplifications will follow in subsequent commits, this one is clearly big enough. Finally, I'm very aware that much of the comments and documentation needs to be updated. As soon as I had this working, and plausibly well commented, I wanted to get it committed and in front of the build bots. I'll be doing a few passes over documentation later if it sticks. Commits to update DragonEgg and Clang will be made presently. llvm-svn: 171681
2013-01-07 02:37:14 +01:00
///
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// This is used by targets to construct a TTI wrapping their target-specific
/// implementation that encodes appropriate costs for their target.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
template <typename T> TargetTransformInfo(T Impl);
Switch TargetTransformInfo from an immutable analysis pass that requires a TargetMachine to construct (and thus isn't always available), to an analysis group that supports layered implementations much like AliasAnalysis does. This is a pretty massive change, with a few parts that I was unable to easily separate (sorry), so I'll walk through it. The first step of this conversion was to make TargetTransformInfo an analysis group, and to sink the nonce implementations in ScalarTargetTransformInfo and VectorTargetTranformInfo into a NoTargetTransformInfo pass. This allows other passes to add a hard requirement on TTI, and assume they will always get at least on implementation. The TargetTransformInfo analysis group leverages the delegation chaining trick that AliasAnalysis uses, where the base class for the analysis group delegates to the previous analysis *pass*, allowing all but tho NoFoo analysis passes to only implement the parts of the interfaces they support. It also introduces a new trick where each pass in the group retains a pointer to the top-most pass that has been initialized. This allows passes to implement one API in terms of another API and benefit when some other pass above them in the stack has more precise results for the second API. The second step of this conversion is to create a pass that implements the TargetTransformInfo analysis using the target-independent abstractions in the code generator. This replaces the ScalarTargetTransformImpl and VectorTargetTransformImpl classes in lib/Target with a single pass in lib/CodeGen called BasicTargetTransformInfo. This class actually provides most of the TTI functionality, basing it upon the TargetLowering abstraction and other information in the target independent code generator. The third step of the conversion adds support to all TargetMachines to register custom analysis passes. This allows building those passes with access to TargetLowering or other target-specific classes, and it also allows each target to customize the set of analysis passes desired in the pass manager. The baseline LLVMTargetMachine implements this interface to add the BasicTTI pass to the pass manager, and all of the tools that want to support target-aware TTI passes call this routine on whatever target machine they end up with to add the appropriate passes. The fourth step of the conversion created target-specific TTI analysis passes for the X86 and ARM backends. These passes contain the custom logic that was previously in their extensions of the ScalarTargetTransformInfo and VectorTargetTransformInfo interfaces. I separated them into their own file, as now all of the interface bits are private and they just expose a function to create the pass itself. Then I extended these target machines to set up a custom set of analysis passes, first adding BasicTTI as a fallback, and then adding their customized TTI implementations. The fourth step required logic that was shared between the target independent layer and the specific targets to move to a different interface, as they no longer derive from each other. As a consequence, a helper functions were added to TargetLowering representing the common logic needed both in the target implementation and the codegen implementation of the TTI pass. While technically this is the only change that could have been committed separately, it would have been a nightmare to extract. The final step of the conversion was just to delete all the old boilerplate. This got rid of the ScalarTargetTransformInfo and VectorTargetTransformInfo classes, all of the support in all of the targets for producing instances of them, and all of the support in the tools for manually constructing a pass based around them. Now that TTI is a relatively normal analysis group, two things become straightforward. First, we can sink it into lib/Analysis which is a more natural layer for it to live. Second, clients of this interface can depend on it *always* being available which will simplify their code and behavior. These (and other) simplifications will follow in subsequent commits, this one is clearly big enough. Finally, I'm very aware that much of the comments and documentation needs to be updated. As soon as I had this working, and plausibly well commented, I wanted to get it committed and in front of the build bots. I'll be doing a few passes over documentation later if it sticks. Commits to update DragonEgg and Clang will be made presently. llvm-svn: 171681
2013-01-07 02:37:14 +01:00
/// Construct a baseline TTI object using a minimal implementation of
/// the \c Concept API below.
///
/// The TTI implementation will reflect the information in the DataLayout
/// provided if non-null.
explicit TargetTransformInfo(const DataLayout &DL);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
// Provide move semantics.
TargetTransformInfo(TargetTransformInfo &&Arg);
TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
Switch TargetTransformInfo from an immutable analysis pass that requires a TargetMachine to construct (and thus isn't always available), to an analysis group that supports layered implementations much like AliasAnalysis does. This is a pretty massive change, with a few parts that I was unable to easily separate (sorry), so I'll walk through it. The first step of this conversion was to make TargetTransformInfo an analysis group, and to sink the nonce implementations in ScalarTargetTransformInfo and VectorTargetTranformInfo into a NoTargetTransformInfo pass. This allows other passes to add a hard requirement on TTI, and assume they will always get at least on implementation. The TargetTransformInfo analysis group leverages the delegation chaining trick that AliasAnalysis uses, where the base class for the analysis group delegates to the previous analysis *pass*, allowing all but tho NoFoo analysis passes to only implement the parts of the interfaces they support. It also introduces a new trick where each pass in the group retains a pointer to the top-most pass that has been initialized. This allows passes to implement one API in terms of another API and benefit when some other pass above them in the stack has more precise results for the second API. The second step of this conversion is to create a pass that implements the TargetTransformInfo analysis using the target-independent abstractions in the code generator. This replaces the ScalarTargetTransformImpl and VectorTargetTransformImpl classes in lib/Target with a single pass in lib/CodeGen called BasicTargetTransformInfo. This class actually provides most of the TTI functionality, basing it upon the TargetLowering abstraction and other information in the target independent code generator. The third step of the conversion adds support to all TargetMachines to register custom analysis passes. This allows building those passes with access to TargetLowering or other target-specific classes, and it also allows each target to customize the set of analysis passes desired in the pass manager. The baseline LLVMTargetMachine implements this interface to add the BasicTTI pass to the pass manager, and all of the tools that want to support target-aware TTI passes call this routine on whatever target machine they end up with to add the appropriate passes. The fourth step of the conversion created target-specific TTI analysis passes for the X86 and ARM backends. These passes contain the custom logic that was previously in their extensions of the ScalarTargetTransformInfo and VectorTargetTransformInfo interfaces. I separated them into their own file, as now all of the interface bits are private and they just expose a function to create the pass itself. Then I extended these target machines to set up a custom set of analysis passes, first adding BasicTTI as a fallback, and then adding their customized TTI implementations. The fourth step required logic that was shared between the target independent layer and the specific targets to move to a different interface, as they no longer derive from each other. As a consequence, a helper functions were added to TargetLowering representing the common logic needed both in the target implementation and the codegen implementation of the TTI pass. While technically this is the only change that could have been committed separately, it would have been a nightmare to extract. The final step of the conversion was just to delete all the old boilerplate. This got rid of the ScalarTargetTransformInfo and VectorTargetTransformInfo classes, all of the support in all of the targets for producing instances of them, and all of the support in the tools for manually constructing a pass based around them. Now that TTI is a relatively normal analysis group, two things become straightforward. First, we can sink it into lib/Analysis which is a more natural layer for it to live. Second, clients of this interface can depend on it *always* being available which will simplify their code and behavior. These (and other) simplifications will follow in subsequent commits, this one is clearly big enough. Finally, I'm very aware that much of the comments and documentation needs to be updated. As soon as I had this working, and plausibly well commented, I wanted to get it committed and in front of the build bots. I'll be doing a few passes over documentation later if it sticks. Commits to update DragonEgg and Clang will be made presently. llvm-svn: 171681
2013-01-07 02:37:14 +01:00
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
// We need to define the destructor out-of-line to define our sub-classes
// out-of-line.
~TargetTransformInfo();
/// Handle the invalidation of this information.
///
/// When used as a result of \c TargetIRAnalysis this method will be called
/// when the function this was computed for changes. When it returns false,
/// the information is preserved across those changes.
[PM] Extend the explicit 'invalidate' method API on analysis results to accept an Invalidator that allows them to invalidate themselves if their dependencies are in turn invalidated. Rather than recording the dependency graph ahead of time when analysis get results from other analyses, this simply lets each result trigger the immediate invalidation of any analyses they actually depend on. They do this in a way that has three nice properties: 1) They don't have to handle transitive dependencies because the infrastructure will recurse for them. 2) The invalidate methods are still called only once. We just dynamically discover the necessary topological ordering, everything is memoized nicely. 3) The infrastructure still provides a default implementation and can access it so that only analyses which have dependencies need to do anything custom. To make this work at all, the invalidation logic also has to defer the deletion of the result objects themselves so that they can remain alive until we have collected the complete set of results to invalidate. A unittest is added here that has exactly the dependency pattern we are concerned with. It hit the use-after-free described by Sean in much detail in the long thread about analysis invalidation before this change, and even in an intermediate form of this change where we failed to defer the deletion of the result objects. There is an important problem with doing dependency invalidation that *isn't* solved here: we don't *enforce* that results correctly invalidate all the analyses whose results they depend on. I actually looked at what it would take to do that, and it isn't as hard as I had thought but the complexity it introduces seems very likely to outweigh the benefit. The technique would be to provide a base class for an analysis result that would be populated with other results, and automatically provide the invalidate method which immediately does the correct thing. This approach has some nice pros IMO: - Handles the case we care about and nothing else: only *results* that depend on other analyses trigger extra invalidation. - Localized to the result rather than centralized in the analysis manager. - Ties the storage of the reference to another result to the triggering of the invalidation of that analysis. - Still supports extending invalidation in customized ways. But the down sides here are: - Very heavy-weight meta-programming is needed to provide this base class. - Requires a pretty awful API for accessing the dependencies. Ultimately, I fear it will not pull its weight. But we can re-evaluate this at any point if we start discovering consistent problems where the invalidation and dependencies get out of sync. It will fit as a clean layer on top of the facilities in this patch that we can add if and when we need it. Note that I'm not really thrilled with the names for these APIs... The name "Invalidator" seems ok but not great. The method name "invalidate" also. In review some improvements were suggested, but they really need *other* uses of these terms to be updated as well so I'm going to do that in a follow-up commit. I'm working on the actual fixes to various analyses that need to use these, but I want to try to get tests for each of them so we don't regress. And those changes are seperable and obvious so once this goes in I should be able to roll them out throughout LLVM. Many thanks to Sean, Justin, and others for help reviewing here. Differential Revision: https://reviews.llvm.org/D23738 llvm-svn: 288077
2016-11-28 23:04:31 +01:00
bool invalidate(Function &, const PreservedAnalyses &,
FunctionAnalysisManager::Invalidator &) {
// FIXME: We should probably in some way ensure that the subtarget
// information for a function hasn't changed.
return false;
}
/// \name Generic Target Information
/// @{
/// The kind of cost model.
///
/// There are several different cost models that can be customized by the
/// target. The normalization of each cost model may be target specific.
enum TargetCostKind {
TCK_RecipThroughput, ///< Reciprocal throughput.
TCK_Latency, ///< The latency of instruction.
TCK_CodeSize ///< Instruction code size.
};
/// Query the cost of a specified instruction.
///
/// Clients should use this interface to query the cost of an existing
/// instruction. The instruction must have a valid parent (basic block).
///
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
switch (kind){
case TCK_RecipThroughput:
return getInstructionThroughput(I);
case TCK_Latency:
return getInstructionLatency(I);
case TCK_CodeSize:
return getUserCost(I);
}
llvm_unreachable("Unknown instruction cost kind");
}
/// Underlying constants for 'cost' values in this interface.
///
/// Many APIs in this interface return a cost. This enum defines the
/// fundamental values that should be used to interpret (and produce) those
/// costs. The costs are returned as an int rather than a member of this
/// enumeration because it is expected that the cost of one IR instruction
/// may have a multiplicative factor to it or otherwise won't fit directly
/// into the enum. Moreover, it is common to sum or average costs which works
/// better as simple integral values. Thus this enum only provides constants.
/// Also note that the returned costs are signed integers to make it natural
/// to add, subtract, and test with zero (a common boundary condition). It is
/// not expected that 2^32 is a realistic cost to be modeling at any point.
///
/// Note that these costs should usually reflect the intersection of code-size
/// cost and execution cost. A free instruction is typically one that folds
/// into another instruction. For example, reg-to-reg moves can often be
/// skipped by renaming the registers in the CPU, but they still are encoded
/// and thus wouldn't be considered 'free' here.
enum TargetCostConstants {
TCC_Free = 0, ///< Expected to fold away in lowering.
TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
};
/// Estimate the cost of a specific operation when lowered.
///
/// Note that this is designed to work on an arbitrary synthetic opcode, and
/// thus work for hypothetical queries before an instruction has even been
/// formed. However, this does *not* work for GEPs, and must not be called
/// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
/// analyzing a GEP's cost required more information.
///
/// Typically only the result type is required, and the operand type can be
/// omitted. However, if the opcode is one of the cast instructions, the
/// operand type is required.
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
/// Estimate the cost of a GEP operation when lowered.
///
/// The contract for this function is the same as \c getOperationCost except
/// that it supports an interface that provides extra information specific to
/// the GEP operation.
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const;
/// Estimate the cost of a EXT operation when lowered.
///
/// The contract for this function is the same as \c getOperationCost except
/// that it supports an interface that provides extra information specific to
/// the EXT operation.
int getExtCost(const Instruction *I, const Value *Src) const;
/// Estimate the cost of a function call when lowered.
///
/// The contract for this is the same as \c getOperationCost except that it
/// supports an interface that provides extra information specific to call
/// instructions.
///
/// This is the most basic query for estimating call cost: it only knows the
/// function type and (potentially) the number of arguments at the call site.
/// The latter is only interesting for varargs function types.
int getCallCost(FunctionType *FTy, int NumArgs = -1,
const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload adds the ability to reason about the particular function
/// being called in the event it is a library call with special lowering.
int getCallCost(const Function *F, int NumArgs = -1,
const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload allows specifying a set of candidate argument values.
int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
const User *U = nullptr) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
/// targets where calls are unusually expensive.
///
/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys,
const User *U = nullptr) const;
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr) const;
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
int getMemcpyCost(const Instruction *I) const;
[InlineCost] Improve the cost heuristic for Switch Summary: The motivation example is like below which has 13 cases but only 2 distinct targets ``` lor.lhs.false2: ; preds = %if.then switch i32 %Status, label %if.then27 [ i32 -7012, label %if.end35 i32 -10008, label %if.end35 i32 -10016, label %if.end35 i32 15000, label %if.end35 i32 14013, label %if.end35 i32 10114, label %if.end35 i32 10107, label %if.end35 i32 10105, label %if.end35 i32 10013, label %if.end35 i32 10011, label %if.end35 i32 7008, label %if.end35 i32 7007, label %if.end35 i32 5002, label %if.end35 ] ``` which is compiled into a balanced binary tree like this on AArch64 (similar on X86) ``` .LBB853_9: // %lor.lhs.false2 mov w8, #10012 cmp w19, w8 b.gt .LBB853_14 // BB#10: // %lor.lhs.false2 mov w8, #5001 cmp w19, w8 b.gt .LBB853_18 // BB#11: // %lor.lhs.false2 mov w8, #-10016 cmp w19, w8 b.eq .LBB853_23 // BB#12: // %lor.lhs.false2 mov w8, #-10008 cmp w19, w8 b.eq .LBB853_23 // BB#13: // %lor.lhs.false2 mov w8, #-7012 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_14: // %lor.lhs.false2 mov w8, #14012 cmp w19, w8 b.gt .LBB853_21 // BB#15: // %lor.lhs.false2 mov w8, #-10105 add w8, w19, w8 cmp w8, #9 // =9 b.hi .LBB853_17 // BB#16: // %lor.lhs.false2 orr w9, wzr, #0x1 lsl w8, w9, w8 mov w9, #517 and w8, w8, w9 cbnz w8, .LBB853_23 .LBB853_17: // %lor.lhs.false2 mov w8, #10013 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_18: // %lor.lhs.false2 mov w8, #-7007 add w8, w19, w8 cmp w8, #2 // =2 b.lo .LBB853_23 // BB#19: // %lor.lhs.false2 mov w8, #5002 cmp w19, w8 b.eq .LBB853_23 // BB#20: // %lor.lhs.false2 mov w8, #10011 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_21: // %lor.lhs.false2 mov w8, #14013 cmp w19, w8 b.eq .LBB853_23 // BB#22: // %lor.lhs.false2 mov w8, #15000 cmp w19, w8 b.ne .LBB853_3 ``` However, the inline cost model estimates the cost to be linear with the number of distinct targets and the cost of the above switch is just 2 InstrCosts. The function containing this switch is then inlined about 900 times. This change use the general way of switch lowering for the inline heuristic. It etimate the number of case clusters with the suitability check for a jump table or bit test. Considering the binary search tree built for the clusters, this change modifies the model to be linear with the size of the balanced binary tree. The model is off by default for now : -inline-generic-switch-cost=false This change was originally proposed by Haicheng in D29870. Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier Reviewed By: hans Subscribers: joerg, aemerson, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D31085 llvm-svn: 301649
2017-04-28 18:04:03 +02:00
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
/// table.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) const;
/// Estimate the cost of a given IR user when lowered.
///
/// This can estimate the cost of either a ConstantExpr or Instruction when
/// lowered. It has two primary advantages over the \c getOperationCost and
/// \c getGEPCost above, and one significant disadvantage: it can only be
/// used when the IR construct has already been formed.
///
/// The advantages are that it can inspect the SSA use graph to reason more
/// accurately about the cost. For example, all-constant-GEPs can often be
/// folded into a load or other instruction, but if they are used in some
/// other context they may not be folded. This routine can distinguish such
/// cases.
///
/// \p Operands is a list of operands which can be a result of transformations
/// of the current operands. The number of the operands on the list must equal
/// to the number of the current operands the IR user has. Their order on the
/// list must be the same as the order of the current operands the IR user
/// has.
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
int getUserCost(const User *U) const {
SmallVector<const Value *, 4> Operands(U->value_op_begin(),
U->value_op_end());
return getUserCost(U, Operands);
}
/// Return true if branch divergence exists.
///
/// Branch divergence has a significantly negative impact on GPU performance
/// when threads in the same wavefront take different paths due to conditional
/// branches.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool hasBranchDivergence() const;
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
/// builds the dependency graph, and then runs the reachability algorithm
/// starting with the sources of divergence.
bool isSourceOfDivergence(const Value *V) const;
// Returns true for the target specific
// set of operations which produce uniform result
// even taking non-uniform arguments
bool isAlwaysUniform(const Value *V) const;
/// Returns the address space ID for a target's 'flat' address space. Note
/// this is not necessarily the same as addrspace(0), which LLVM sometimes
/// refers to as the generic address space. The flat address space is a
/// generic address space that can be used access multiple segments of memory
/// with different address spaces. Access of a memory location through a
/// pointer with this address space is expected to be legal but slower
/// compared to the same memory location accessed through a pointer with a
/// different address space.
//
/// This is for targets with different pointer representations which can
/// be converted with the addrspacecast instruction. If a pointer is converted
/// to this address space, optimizations should attempt to replace the access
/// with the source address space.
///
/// \returns ~0u if the target does not have such a flat address space to
/// optimize away.
unsigned getFlatAddressSpace() const;
/// Test whether calls to a function lower to actual program function
/// calls.
///
/// The idea is to test whether the program is likely to require a 'call'
/// instruction or equivalent in order to call the given function.
///
/// FIXME: It's not clear that this is a good or useful query API. Client's
/// should probably move to simpler cost metrics using the above.
/// Alternatively, we could split the cost interface into distinct code-size
/// and execution-speed costs. This would allow modelling the core of this
/// query more accurately as a call is a single small instruction, but
/// incurs significant execution cost.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLoweredToCall(const Function *F) const;
struct LSRCost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
};
/// Parameters that control the generic loop unrolling transformation.
struct UnrollingPreferences {
[Unroll] Rework the naming and structure of the new unroll heuristics. The new naming is (to me) much easier to understand. Here is a summary of the new state of the world: - '*Threshold' is the threshold for full unrolling. It is measured against the estimated unrolled cost as computed by getUserCost in TTI (or CodeMetrics, etc). We will exceed this threshold when unrolling loops where unrolling exposes a significant degree of simplification of the logic within the loop. - '*PercentDynamicCostSavedThreshold' is the percentage of the loop's estimated dynamic execution cost which needs to be saved by unrolling to apply a discount to the estimated unrolled cost. - '*DynamicCostSavingsDiscount' is the discount applied to the estimated unrolling cost when the dynamic savings are expected to be high. When actually analyzing the loop, we now produce both an estimated unrolled cost, and an estimated rolled cost. The rolled cost is notably a dynamic estimate based on our analysis of the expected execution of each iteration. While we're still working to build up the infrastructure for making these estimates, to me it is much more clear *how* to make them better when they have reasonably descriptive names. For example, we may want to apply estimated (from heuristics or profiles) dynamic execution weights to the *dynamic* cost estimates. If we start doing that, we would also need to track the static unrolled cost and the dynamic unrolled cost, as only the latter could reasonably be weighted by profile information. This patch is sadly not without functionality change for the new unroll analysis logic. Buried in the heuristic management were several things that surprised me. For example, we never subtracted the optimized instruction count off when comparing against the unroll heursistics! I don't know if this just got lost somewhere along the way or what, but with the new accounting of things, this is much easier to keep track of and we use the post-simplification cost estimate to compare to the thresholds, and use the dynamic cost reduction ratio to select whether we can exceed the baseline threshold. The old values of these flags also don't necessarily make sense. My impression is that none of these thresholds or discounts have been tuned yet, and so they're just arbitrary placehold numbers. As such, I've not bothered to adjust for the fact that this is now a discount and not a tow-tier threshold model. We need to tune all these values once the logic is ready to be enabled. Differential Revision: http://reviews.llvm.org/D9966 llvm-svn: 239164
2015-06-05 19:01:43 +02:00
/// The cost threshold for the unrolled loop. Should be relative to the
/// getUserCost values returned by this API, and the expectation is that
/// the unrolled loop's instructions when run through that interface should
/// not exceed this cost. However, this is only an estimate. Also, specific
/// loops may be unrolled even with a cost above this threshold if deemed
/// profitable. Set this to UINT_MAX to disable the loop body cost
/// restriction.
unsigned Threshold;
/// If complete unrolling will reduce the cost of the loop, we will boost
/// the Threshold by a certain percent to allow more aggressive complete
/// unrolling. This value provides the maximum boost percentage that we
/// can apply to Threshold (The value should be no less than 100).
/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
/// MaxPercentThresholdBoost / 100)
/// E.g. if complete unrolling reduces the loop execution time by 50%
/// then we boost the threshold by the factor of 2x. If unrolling is not
/// expected to reduce the running time, then we do not increase the
/// threshold.
unsigned MaxPercentThresholdBoost;
/// The cost threshold for the unrolled loop when optimizing for size (set
/// to UINT_MAX to disable).
unsigned OptSizeThreshold;
/// The cost threshold for the unrolled loop, like Threshold, but used
/// for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned PartialThreshold;
/// The cost threshold for the unrolled loop when optimizing for size, like
/// OptSizeThreshold, but used for partial/runtime unrolling (set to
/// UINT_MAX to disable).
unsigned PartialOptSizeThreshold;
/// A forced unrolling factor (the number of concatenated bodies of the
/// original loop in the unrolled loop body). When set to 0, the unrolling
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
/// A forced peeling factor (the number of bodied of the original loop
/// that should be peeled off before the loop body). When set to 0, the
/// unrolling transformation will select a peeling factor based on profile
/// information and other factors.
unsigned PeelCount;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
// using the appropriate cost threshold, but may not exceed this number
// (set to UINT_MAX to disable). This does not apply in cases where the
// loop is being fully unrolled.
unsigned MaxCount;
/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
/// applies even if full unrolling is selected. This allows a target to fall
/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
unsigned FullUnrollMaxCount;
// Represents number of instructions optimized when "back edge"
// becomes "fall through" in unrolled loop.
// For now we count a conditional branch on a backedge and a comparison
// feeding it.
unsigned BEInsns;
/// Allow partial unrolling (unrolling of loops to expand the size of the
/// loop body, not only to eliminate small constant-trip-count loops).
bool Partial;
/// Allow runtime unrolling (unrolling of loops to expand the size of the
/// loop body even when the number of loop iterations is not known at
/// compile time).
bool Runtime;
/// Allow generation of a loop remainder (extra iterations after unroll).
bool AllowRemainder;
/// Allow emitting expensive instructions (such as divisions) when computing
/// the trip count of a loop for runtime unrolling.
bool AllowExpensiveTripCount;
/// Apply loop unroll on any kind of loop
/// (mainly to loops that fail runtime unrolling).
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
/// Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
/// loop.
unsigned UnrollAndJamInnerLoopThreshold;
};
/// Get target-customized preferences for the generic loop unrolling
/// transformation. The caller will initialize UP with the current
/// target-independent defaults.
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) const;
/// Query the target whether it would be profitable to convert the given loop
/// into a hardware loop.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const;
/// @}
/// \name Scalar Target Information
/// @{
/// Flags indicating the kind of support for population count.
///
/// Compared to the SW implementation, HW support is supposed to
/// significantly boost the performance when the population is dense, and it
/// may or may not degrade performance if the population is sparse. A HW
/// support is considered as "Fast" if it can outperform, or is on a par
/// with, SW implementation when the population is sparse; otherwise, it is
/// considered as "Slow".
enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
/// Return true if the specified immediate is legal add immediate, that
2014-01-24 19:22:55 +01:00
/// is the target has add instructions which can add a register with the
/// immediate without having to materialize the immediate into a register.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLegalAddImmediate(int64_t Imm) const;
/// Return true if the specified immediate is legal icmp immediate,
2014-01-24 19:22:55 +01:00
/// that is the target has icmp instructions which can compare a register
/// against the immediate without having to materialize the immediate into a
/// register.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLegalICmpImmediate(int64_t Imm) const;
/// Return true if the addressing mode represented by AM is legal for
2014-01-24 19:22:55 +01:00
/// this target, for a load/store of the specified type.
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// If target returns true in LSRWithInstrQueries(), I may be valid.
/// TODO: Handle pre/postinc as well.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0,
Instruction *I = nullptr) const;
/// Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;
/// Return true if the target can fuse a compare and branch.
/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;
/// Return true if the target can save a compare for loop count, for example
/// hardware loop saves a compare.
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) const;
/// \return True is LSR should make efforts to create/preserve post-inc
/// addressing mode expressions.
bool shouldFavorPostInc() const;
/// Return true if LSR should make efforts to generate indexed addressing
/// modes that operate across loop iterations.
bool shouldFavorBackedgeIndex(const Loop *L) const;
/// Return true if the target supports masked load.
bool isLegalMaskedStore(Type *DataType) const;
/// Return true if the target supports masked store.
bool isLegalMaskedLoad(Type *DataType) const;
/// Return true if the target supports nontemporal store.
bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
/// Return true if the target supports nontemporal load.
bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType) const;
/// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType) const;
/// Return true if the target supports masked compress store.
bool isLegalMaskedCompressStore(Type *DataType) const;
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
/// can enable more aggressive transformations for division and remainder than
/// would typically be allowed using throughput or size cost models.
bool hasDivRemOp(Type *DataType, bool IsSigned) const;
/// Return true if the given instruction (assumed to be a memory access
/// instruction) has a volatile variant. If that's the case then we can avoid
/// addrspacecast to generic AS for volatile loads/stores. Default
/// implementation returns false, which prevents address space inference for
/// volatile loads/stores.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
/// Return true if target doesn't mind addresses in vectors.
bool prefersVectorizedAddressing() const;
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
/// TODO: Handle pre/postinc as well.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
/// Return true if the loop strength reduce pass should make
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
/// immediate offset and no index register.
bool LSRWithInstrQueries() const;
/// Return true if it's free to truncate a value of type Ty1 to type
2014-01-24 19:22:55 +01:00
/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
/// by referencing its sub-register AX.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isTruncateFree(Type *Ty1, Type *Ty2) const;
/// Return true if it is profitable to hoist instruction in the
/// then/else to before if.
bool isProfitableToHoist(Instruction *I) const;
bool useAA() const;
/// Return true if this type is legal.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isTypeLegal(Type *Ty) const;
/// Returns the target's jmp_buf alignment in bytes.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getJumpBufAlignment() const;
/// Returns the target's jmp_buf size in bytes.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getJumpBufSize() const;
/// Return true if switches should be turned into lookup tables for the
2014-01-24 19:22:55 +01:00
/// target.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool shouldBuildLookupTables() const;
/// Return true if switches should be turned into lookup tables
/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;
/// Return true if the input function which is cold at all call sites,
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF) const;
/// If target has efficient vector element load/store instructions, it can
/// return true here so that insertion/extraction costs are not added to
/// the scalarization cost of a load/store.
bool supportsEfficientVectorElementLoadStore() const;
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
/// Returns options for expansion of memcmp. IsZeroCmp is
// true if this is the expansion of memcmp(p1, p2, s) == 0.
struct MemCmpExpansionOptions {
// Return true if memcmp expansion is enabled.
operator bool() const { return MaxNumLoads > 0; }
// Maximum number of load operations.
unsigned MaxNumLoads = 0;
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
// For memcmp expansion when the memcmp result is only compared equal or
// not-equal to 0, allow up to this number of load pairs per block. As an
// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
// a0 = load2bytes &a[0]
// b0 = load2bytes &b[0]
// a2 = load1byte &a[2]
// b2 = load1byte &b[2]
// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
unsigned NumLoadsPerBlock = 1;
// Set to true to allow overlapping loads. For example, 7-byte compares can
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
// requires all loads in LoadSizes to be doable in an unaligned way.
bool AllowOverlappingLoads = false;
};
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
/// Enable matching of interleaved access groups that contain predicated
/// accesses or gaps and therefore vectorized using masked
/// vector loads/stores.
bool enableMaskedInterleavedAccessVectorization() const;
/// Indicate that it is potentially unsafe to automatically vectorize
/// floating-point operations because the semantics of vector and scalar
/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
/// does not support IEEE-754 denormal numbers, while depending on the
/// platform, scalar floating-point math does.
/// This applies to floating-point math operations and calls, not memory
/// operations, shuffles, or casts.
bool isFPVectorizationPotentiallyUnsafe() const;
/// Determine if the target supports unaligned memory accesses.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth, unsigned AddressSpace = 0,
unsigned Alignment = 1,
bool *Fast = nullptr) const;
/// Return hardware support for population count.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
/// Return true if the hardware has a fast square-root instruction.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool haveFastSqrt(Type *Ty) const;
/// Return true if it is faster to check if a floating-point value is NaN
/// (or not-NaN) versus a comparison against a constant FP zero value.
/// Targets should override this if materializing a 0.0 for comparison is
/// generally as cheap as checking for ordered/unordered.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
/// Return the expected cost of supporting the floating point operation
/// of the specified type.
int getFPOpCost(Type *Ty) const;
/// Return the expected cost of materializing for the given integer
2014-01-24 19:22:55 +01:00
/// immediate of the specified type.
int getIntImmCost(const APInt &Imm, Type *Ty) const;
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) const;
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const;
/// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost
/// functions in that it is subtarget agnostic. This is useful when you e.g.
/// target one ISA such as Aarch32 but smaller encodings could be possible
/// with another such as Thumb. This return value is used as a penalty when
/// the total costs for a constant is calculated (the bigger the cost, the
/// more beneficial constant hoisting is).
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) const;
/// @}
/// \name Vector Target Information
/// @{
/// The various kinds of shuffle patterns for vector queries.
enum ShuffleKind {
SK_Broadcast, ///< Broadcast element 0 to all other elements.
SK_Reverse, ///< Reverse the order of the vector.
SK_Select, ///< Selects elements from the corresponding lane of
///< either source operand. This is equivalent to a
///< vector select with a constant condition operand.
SK_Transpose, ///< Transpose two vectors.
SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
///< with any shuffle mask.
SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
///< shuffle mask.
};
/// Additional information about an operand's possible values.
enum OperandValueKind {
OK_AnyValue, // Operand can have any value.
OK_UniformValue, // Operand is uniform (splat of a value).
OK_UniformConstantValue, // Operand is uniform constant.
OK_NonUniformConstantValue // Operand is a non uniform constant value.
};
/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
/// \return The number of scalar or vector registers that the target has.
/// If 'Vectors' is true, it returns the number of vector registers. If it is
/// set to false, it returns the number of scalar registers.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getNumberOfRegisters(bool Vector) const;
/// \return The width of the largest scalar or vector register type.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getRegisterBitWidth(bool Vector) const;
[SLP] Enable 64-bit wide vectorization on AArch64 ARM Neon has native support for half-sized vector registers (64 bits). This is beneficial for example for 2D and 3D graphics. This patch adds the option to lower MinVecRegSize from 128 via a TTI in the SLP Vectorizer. *** Performance Analysis This change was motivated by some internal benchmarks but it is also beneficial on SPEC and the LLVM testsuite. The results are with -O3 and PGO. A negative percentage is an improvement. The testsuite was run with a sample size of 4. ** SPEC * CFP2006/482.sphinx3 -3.34% A pretty hot loop is SLP vectorized resulting in nice instruction reduction. This used to be a +22% regression before rL299482. * CFP2000/177.mesa -3.34% * CINT2000/256.bzip2 +6.97% My current plan is to extend the fix in rL299482 to i16 which brings the regression down to +2.5%. There are also other problems with the codegen in this loop so there is further room for improvement. ** LLVM testsuite * SingleSource/Benchmarks/Misc/ReedSolomon -10.75% There are multiple small SLP vectorizations outside the hot code. It's a bit surprising that it adds up to 10%. Some of this may be code-layout noise. * MultiSource/Benchmarks/VersaBench/beamformer/beamformer -8.40% The opt-viewer screenshot can be seen at F3218284. We start at a colder store but the tree leads us into the hottest loop. * MultiSource/Applications/lambda-0.1.3/lambda -2.68% * MultiSource/Benchmarks/Bullet/bullet -2.18% This is using 3D vectors. * SingleSource/Benchmarks/Shootout-C++/Shootout-C++-lists +6.67% Noise, binary is unchanged. * MultiSource/Benchmarks/Ptrdist/anagram/anagram +4.90% There is an additional SLP in the cold code. The test runs for ~1sec and prints out over 2000 lines. This is most likely noise. * MultiSource/Applications/aha/aha +1.63% * MultiSource/Applications/JM/lencod/lencod +1.41% * SingleSource/Benchmarks/Misc/richards_benchmark +1.15% Differential Revision: https://reviews.llvm.org/D31965 llvm-svn: 303116
2017-05-15 23:15:01 +02:00
/// \return The width of the smallest vector register type.
unsigned getMinVectorRegisterBitWidth() const;
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
/// creating vectors that span multiple vector registers.
/// If false, the vectorization factor will be chosen based on the
/// size of the widest element type.
bool shouldMaximizeVectorBandwidth(bool OptSize) const;
/// \return The minimum vectorization factor for types of given element
/// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
unsigned getMinimumVF(unsigned ElemWidth) const;
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
/// \return The size of a cache line in bytes.
unsigned getCacheLineSize() const;
2017-08-24 11:46:25 +02:00
/// The possible cache levels
enum class CacheLevel {
L1D, // The L1 data cache
L2D, // The L2 data cache
// We currently do not model L3 caches, as their sizes differ widely between
// microarchitectures. Also, we currently do not have a use for L3 cache
// size modeling yet.
};
/// \return The size of the cache level in bytes, if available.
llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
/// \return The associativity of the cache level, if available.
llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
/// \return How much before a load we should place the prefetch instruction.
/// This is currently measured in number of instructions.
unsigned getPrefetchDistance() const;
/// \return Some HW prefetchers can handle accesses up to a certain constant
/// stride. This is the minimum stride in bytes where it makes sense to start
/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
/// \return The maximum number of iterations to prefetch ahead. If the
/// required number of iterations is more than this number, no prefetching is
/// performed.
unsigned getMaxPrefetchIterationsAhead() const;
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;
/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
static OperandValueKind getOperandInfo(Value *V,
OperandValueProperties &OpProps);
/// This is an approximation of reciprocal throughput of a math/logic op.
/// A higher cost indicates less expected throughput.
/// From Agner Fog's guides, reciprocal throughput is "the average number of
/// clock cycles per instruction when the instructions are not part of a
/// limiting dependency chain."
/// Therefore, costs should be scaled to account for multiple execution units
/// on the target that can process this type of instruction. For example, if
/// there are 5 scalar integer units and 2 vector integer units that can
/// calculate an 'add' in a single cycle, this model should indicate that the
/// cost of the vector add instruction is 2.5 times the cost of the scalar
/// add instruction.
/// \p Args is an optional argument which holds the instruction operands
/// values so the TTI can analyze those values searching for special
/// cases or optimizations based on those values.
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
Type *SubTp = nullptr) const;
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use
/// -1 to indicate that there is no information about the index value.
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index = -1) const;
/// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br.
int getCFInstrCost(unsigned Opcode) const;
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
/// 'I' parameter.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy = nullptr, const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const;
/// \return The cost of Gather or Scatter operation
/// \p Opcode - is a type of memory access Load or Store
/// \p DataTy - a vector type of the data to be loaded or stored
/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
/// \p VariableMask - true when the memory access is predicated with a mask
/// that is not a compile-time constant
/// \p Alignment - alignment of single element
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment) const;
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
/// \p Factor is the interleave factor
/// \p Indices is the indices for interleaved load members (as interleaved
/// load allows gaps)
/// \p Alignment is the alignment of the memory operation
/// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace,
bool UseMaskForCond = false,
bool UseMaskForGaps = false) const;
/// Calculate the cost of performing a vector reduction.
Costmodel: Add support for horizontal vector reductions Upcoming SLP vectorization improvements will want to be able to estimate costs of horizontal reductions. Add infrastructure to support this. We model reductions as a series of (shufflevector,add) tuples ultimately followed by an extractelement. For example, for an add-reduction of <4 x float> we could generate the following sequence: (v0, v1, v2, v3) \ \ / / \ \ / + + (v0+v2, v1+v3, undef, undef) \ / ((v0+v2) + (v1+v3), undef, undef) %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 %r = extractelement <4 x float> %bin.rdx8, i32 0 This commit adds a cost model interface "getReductionCost(Opcode, Ty, Pairwise)" that will allow clients to ask for the cost of such a reduction (as backends might generate more efficient code than the cost of the individual instructions summed up). This interface is excercised by the CostModel analysis pass which looks for reduction patterns like the one above - starting at extractelements - and if it sees a matching sequence will call the cost model interface. We will also support a second form of pairwise reduction that is well supported on common architectures (haddps, vpadd, faddp). (v0, v1, v2, v3) \ / \ / (v0+v1, v2+v3, undef, undef) \ / ((v0+v1)+(v2+v3), undef, undef, undef) %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 %r = extractelement <4 x float> %bin.rdx.1, i32 0 llvm-svn: 190876
2013-09-17 20:06:50 +02:00
///
/// This is the cost of reducing the vector value of type \p Ty to a scalar
/// value using the operation denoted by \p Opcode. The form of the reduction
/// can either be a pairwise reduction or a reduction that splits the vector
/// at every reduction level.
///
/// Pairwise:
/// (v0, v1, v2, v3)
/// ((v0+v1), (v2+v3), undef, undef)
Costmodel: Add support for horizontal vector reductions Upcoming SLP vectorization improvements will want to be able to estimate costs of horizontal reductions. Add infrastructure to support this. We model reductions as a series of (shufflevector,add) tuples ultimately followed by an extractelement. For example, for an add-reduction of <4 x float> we could generate the following sequence: (v0, v1, v2, v3) \ \ / / \ \ / + + (v0+v2, v1+v3, undef, undef) \ / ((v0+v2) + (v1+v3), undef, undef) %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 %r = extractelement <4 x float> %bin.rdx8, i32 0 This commit adds a cost model interface "getReductionCost(Opcode, Ty, Pairwise)" that will allow clients to ask for the cost of such a reduction (as backends might generate more efficient code than the cost of the individual instructions summed up). This interface is excercised by the CostModel analysis pass which looks for reduction patterns like the one above - starting at extractelements - and if it sees a matching sequence will call the cost model interface. We will also support a second form of pairwise reduction that is well supported on common architectures (haddps, vpadd, faddp). (v0, v1, v2, v3) \ / \ / (v0+v1, v2+v3, undef, undef) \ / ((v0+v1)+(v2+v3), undef, undef, undef) %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 %r = extractelement <4 x float> %bin.rdx.1, i32 0 llvm-svn: 190876
2013-09-17 20:06:50 +02:00
/// Split:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const;
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
bool IsUnsigned) const;
Costmodel: Add support for horizontal vector reductions Upcoming SLP vectorization improvements will want to be able to estimate costs of horizontal reductions. Add infrastructure to support this. We model reductions as a series of (shufflevector,add) tuples ultimately followed by an extractelement. For example, for an add-reduction of <4 x float> we could generate the following sequence: (v0, v1, v2, v3) \ \ / / \ \ / + + (v0+v2, v1+v3, undef, undef) \ / ((v0+v2) + (v1+v3), undef, undef) %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 %r = extractelement <4 x float> %bin.rdx8, i32 0 This commit adds a cost model interface "getReductionCost(Opcode, Ty, Pairwise)" that will allow clients to ask for the cost of such a reduction (as backends might generate more efficient code than the cost of the individual instructions summed up). This interface is excercised by the CostModel analysis pass which looks for reduction patterns like the one above - starting at extractelements - and if it sees a matching sequence will call the cost model interface. We will also support a second form of pairwise reduction that is well supported on common architectures (haddps, vpadd, faddp). (v0, v1, v2, v3) \ / \ / (v0+v1, v2+v3, undef, undef) \ / ((v0+v1)+(v2+v3), undef, undef, undef) %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 %r = extractelement <4 x float> %bin.rdx.1, i32 0 llvm-svn: 190876
2013-09-17 20:06:50 +02:00
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized with VF.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1) const;
/// \returns The cost of Intrinsic instructions. Types analysis only.
/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
/// arguments and the return value will be computed based on types.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX) const;
/// \returns The cost of Call instructions.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
/// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getNumberOfParts(Type *Tp) const;
/// \returns The cost of the address computation. For most targets this can be
/// merged into the instruction indexing mode. Some targets might want to
/// distinguish between address computation for memory operations on vector
/// types and scalar types. Such targets should override this function.
/// The 'SE' parameter holds pointer for the scalar evolution object which
/// is used in order to get the Ptr step value in case of constant stride.
/// The 'Ptr' parameter holds SCEV of the access pointer.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
const SCEV *Ptr = nullptr) const;
/// \returns The cost, if any, of keeping values of the given types alive
/// over a callsite.
///
/// Some types may require the use of register classes that do not have
/// any callee-saved registers, so would require a spill and fill.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
/// \returns True if the intrinsic is a supported memory intrinsic. Info
/// will contain additional information - whether the intrinsic may write
/// or read to memory, volatility and the pointer. Info is undefined
/// if false is returned.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
/// \returns The maximum element size, in bytes, for an element
/// unordered-atomic memory intrinsic.
unsigned getAtomicMemIntrinsicMaxElementSize() const;
/// \returns A value which is the result of the given memory intrinsic. New
/// instructions may be created to extract the result from the given intrinsic
/// memory operation. Returns nullptr if the target cannot create a result
/// from the given intrinsic.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) const;
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAlign, unsigned DestAlign) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
/// \param RemainingBytes The number of bytes to copy.
///
/// Calculates the operand types to use when copying \p RemainingBytes of
/// memory, where source and destination alignments are \p SrcAlign and
/// \p DestAlign respectively.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
LLVMContext &Context,
unsigned RemainingBytes,
unsigned SrcAlign,
unsigned DestAlign) const;
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
/// \returns True if the caller and callee agree on how \p Args will be passed
/// to the callee.
/// \param[out] Args The list of compatible arguments. The implementation may
/// filter out any incompatible args from this list.
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
/// The type of load/store indexing.
enum MemIndexedMode {
MIM_Unindexed, ///< No indexing.
MIM_PreInc, ///< Pre-incrementing.
MIM_PreDec, ///< Pre-decrementing.
MIM_PostInc, ///< Post-incrementing.
MIM_PostDec ///< Post-decrementing.
};
/// \returns True if the specified indexed load for the given type is legal.
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns True if the specified indexed store for the given type is legal.
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns The bitwidth of the largest vector type that should be used to
/// load/store in the given address space.
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
/// \returns True if the load instruction is legal to vectorize.
bool isLegalToVectorizeLoad(LoadInst *LI) const;
/// \returns True if the store instruction is legal to vectorize.
bool isLegalToVectorizeStore(StoreInst *SI) const;
/// \returns True if it is legal to vectorize the given load chain.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const;
/// \returns True if it is legal to vectorize the given store chain.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const;
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes loads or has a better vector factor.
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes stores or has a better vector factor.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
/// Flags describing the kind of vector reduction.
struct ReductionFlags {
ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
bool IsSigned; ///< Whether the operation is a signed int reduction.
bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
};
/// \returns True if the target wants to handle the given reduction idiom in
/// the intrinsics form instead of the shuffle form.
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block. Inter-block localization is the same as what currently happens, except now it only runs on the entry block because that's where the problematic constants with long live ranges come from. The second phase is a new intra-block localization phase which attempts to re-sink the already localized instructions further right before one of the multiple uses. One additional change is to also localize G_GLOBAL_VALUE as they're constants too. However, on some targets like arm64 it takes multiple instructions to materialize the value, so some additional heuristics with a TTI hook have been introduced attempt to prevent code size regressions when localizing these. Overall, these changes improve CTMark code size on arm64 by 1.2%. Full code size results: Program baseline new diff ------------------------------------------------------------------------------ test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6% test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6% test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4% test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2% test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2% test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9% test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5% test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0% test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0% test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0% Geomean difference -1.2% Differential Revision: https://reviews.llvm.org/D63303 llvm-svn: 363632
2019-06-18 01:20:29 +02:00
/// \returns the size cost of rematerializing a GlobalValue address relative
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
/// @}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
private:
/// Estimate the latency of specified instruction.
/// Returns 1 as the default value.
int getInstructionLatency(const Instruction *I) const;
/// Returns the expected throughput cost of the instruction.
/// Returns -1 if the cost is unknown.
int getInstructionThroughput(const Instruction *I) const;
/// The abstract base class used to type erase specific TTI
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// implementations.
class Concept;
/// The template model for the base class which wraps a concrete
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// implementation in a type erased interface.
template <typename T> class Model;
std::unique_ptr<Concept> TTIImpl;
};
class TargetTransformInfo::Concept {
public:
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) = 0;
virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
virtual int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments, const User *U) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
[InlineCost] Improve the cost heuristic for Switch Summary: The motivation example is like below which has 13 cases but only 2 distinct targets ``` lor.lhs.false2: ; preds = %if.then switch i32 %Status, label %if.then27 [ i32 -7012, label %if.end35 i32 -10008, label %if.end35 i32 -10016, label %if.end35 i32 15000, label %if.end35 i32 14013, label %if.end35 i32 10114, label %if.end35 i32 10107, label %if.end35 i32 10105, label %if.end35 i32 10013, label %if.end35 i32 10011, label %if.end35 i32 7008, label %if.end35 i32 7007, label %if.end35 i32 5002, label %if.end35 ] ``` which is compiled into a balanced binary tree like this on AArch64 (similar on X86) ``` .LBB853_9: // %lor.lhs.false2 mov w8, #10012 cmp w19, w8 b.gt .LBB853_14 // BB#10: // %lor.lhs.false2 mov w8, #5001 cmp w19, w8 b.gt .LBB853_18 // BB#11: // %lor.lhs.false2 mov w8, #-10016 cmp w19, w8 b.eq .LBB853_23 // BB#12: // %lor.lhs.false2 mov w8, #-10008 cmp w19, w8 b.eq .LBB853_23 // BB#13: // %lor.lhs.false2 mov w8, #-7012 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_14: // %lor.lhs.false2 mov w8, #14012 cmp w19, w8 b.gt .LBB853_21 // BB#15: // %lor.lhs.false2 mov w8, #-10105 add w8, w19, w8 cmp w8, #9 // =9 b.hi .LBB853_17 // BB#16: // %lor.lhs.false2 orr w9, wzr, #0x1 lsl w8, w9, w8 mov w9, #517 and w8, w8, w9 cbnz w8, .LBB853_23 .LBB853_17: // %lor.lhs.false2 mov w8, #10013 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_18: // %lor.lhs.false2 mov w8, #-7007 add w8, w19, w8 cmp w8, #2 // =2 b.lo .LBB853_23 // BB#19: // %lor.lhs.false2 mov w8, #5002 cmp w19, w8 b.eq .LBB853_23 // BB#20: // %lor.lhs.false2 mov w8, #10011 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_21: // %lor.lhs.false2 mov w8, #14013 cmp w19, w8 b.eq .LBB853_23 // BB#22: // %lor.lhs.false2 mov w8, #15000 cmp w19, w8 b.ne .LBB853_3 ``` However, the inline cost model estimates the cost to be linear with the number of distinct targets and the cost of the above switch is just 2 InstrCosts. The function containing this switch is then inlined about 900 times. This change use the general way of switch lowering for the inline heuristic. It etimate the number of case clusters with the suitability check for a jump table or bit test. Considering the binary search tree built for the clusters, this change modifies the model to be linear with the size of the balanced binary tree. The model is off by default for now : -inline-generic-switch-cost=false This change was originally proposed by Haicheng in D29870. Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier Reviewed By: hans Subscribers: joerg, aemerson, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D31085 llvm-svn: 301649
2017-04-28 18:04:03 +02:00
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) = 0;
virtual int
getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace,
Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) = 0;
virtual bool LSRWithInstrQueries() = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual bool isTypeLegal(Type *Ty) = 0;
virtual unsigned getJumpBufAlignment() = 0;
virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
virtual unsigned
getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
unsigned Alignment,
bool *Fast) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
[SLP] Enable 64-bit wide vectorization on AArch64 ARM Neon has native support for half-sized vector registers (64 bits). This is beneficial for example for 2D and 3D graphics. This patch adds the option to lower MinVecRegSize from 128 via a TTI in the SLP Vectorizer. *** Performance Analysis This change was motivated by some internal benchmarks but it is also beneficial on SPEC and the LLVM testsuite. The results are with -O3 and PGO. A negative percentage is an improvement. The testsuite was run with a sample size of 4. ** SPEC * CFP2006/482.sphinx3 -3.34% A pretty hot loop is SLP vectorized resulting in nice instruction reduction. This used to be a +22% regression before rL299482. * CFP2000/177.mesa -3.34% * CINT2000/256.bzip2 +6.97% My current plan is to extend the fix in rL299482 to i16 which brings the regression down to +2.5%. There are also other problems with the codegen in this loop so there is further room for improvement. ** LLVM testsuite * SingleSource/Benchmarks/Misc/ReedSolomon -10.75% There are multiple small SLP vectorizations outside the hot code. It's a bit surprising that it adds up to 10%. Some of this may be code-layout noise. * MultiSource/Benchmarks/VersaBench/beamformer/beamformer -8.40% The opt-viewer screenshot can be seen at F3218284. We start at a colder store but the tree leads us into the hottest loop. * MultiSource/Applications/lambda-0.1.3/lambda -2.68% * MultiSource/Benchmarks/Bullet/bullet -2.18% This is using 3D vectors. * SingleSource/Benchmarks/Shootout-C++/Shootout-C++-lists +6.67% Noise, binary is unchanged. * MultiSource/Benchmarks/Ptrdist/anagram/anagram +4.90% There is an additional SLP in the cold code. The test runs for ~1sec and prints out over 2000 lines. This is most likely noise. * MultiSource/Applications/aha/aha +1.63% * MultiSource/Applications/JM/lencod/lencod +1.41% * SingleSource/Benchmarks/Misc/richards_benchmark +1.15% Differential Revision: https://reviews.llvm.org/D31965 llvm-svn: 303116
2017-05-15 23:15:01 +02:00
virtual unsigned getMinVectorRegisterBitWidth() = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() = 0;
virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) = 0;
virtual int getCFInstrCost(unsigned Opcode) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) = 0;
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) = 0;
virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned Alignment,
unsigned AddressSpace,
bool UseMaskForCond = false,
bool UseMaskForGaps = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm, bool IsUnsigned) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual unsigned getNumberOfParts(Type *Tp) = 0;
virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) = 0;
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAlign,
unsigned DestAlign) const = 0;
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
virtual bool
areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const = 0;
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block. Inter-block localization is the same as what currently happens, except now it only runs on the entry block because that's where the problematic constants with long live ranges come from. The second phase is a new intra-block localization phase which attempts to re-sink the already localized instructions further right before one of the multiple uses. One additional change is to also localize G_GLOBAL_VALUE as they're constants too. However, on some targets like arm64 it takes multiple instructions to materialize the value, so some additional heuristics with a TTI hook have been introduced attempt to prevent code size regressions when localizing these. Overall, these changes improve CTMark code size on arm64 by 1.2%. Full code size results: Program baseline new diff ------------------------------------------------------------------------------ test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6% test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6% test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4% test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2% test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2% test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9% test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5% test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0% test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0% test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0% Geomean difference -1.2% Differential Revision: https://reviews.llvm.org/D63303 llvm-svn: 363632
2019-06-18 01:20:29 +02:00
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
};
template <typename T>
class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
T Impl;
public:
Model(T Impl) : Impl(std::move(Impl)) {}
~Model() override {}
const DataLayout &getDataLayout() const override {
return Impl.getDataLayout();
}
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getOperationCost(Opcode, Ty, OpTy);
}
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getExtCost(const Instruction *I, const Value *Src) override {
return Impl.getExtCost(I, Src);
}
int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
return Impl.getCallCost(FTy, NumArgs, U);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getCallCost(const Function *F, int NumArgs, const User *U) override {
return Impl.getCallCost(F, NumArgs, U);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments, const User *U) override {
return Impl.getCallCost(F, Arguments, U);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
return Impl.getUserCost(U, Operands);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
}
bool isAlwaysUniform(const Value *V) override {
return Impl.isAlwaysUniform(V);
}
unsigned getFlatAddressSpace() override {
return Impl.getFlatAddressSpace();
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
bool isLegalICmpImmediate(int64_t Imm) override {
return Impl.isLegalICmpImmediate(Imm);
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
Instruction *I) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace, I);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
bool canMacroFuseCmp() override {
return Impl.canMacroFuseCmp();
}
bool canSaveCmp(Loop *L, BranchInst **BI,
ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) override {
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
}
bool shouldFavorPostInc() const override {
return Impl.shouldFavorPostInc();
}
bool shouldFavorBackedgeIndex(const Loop *L) const override {
return Impl.shouldFavorBackedgeIndex(L);
}
bool isLegalMaskedStore(Type *DataType) override {
return Impl.isLegalMaskedStore(DataType);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
bool isLegalMaskedLoad(Type *DataType) override {
return Impl.isLegalMaskedLoad(DataType);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);
}
bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
bool isLegalMaskedScatter(Type *DataType) override {
return Impl.isLegalMaskedScatter(DataType);
}
bool isLegalMaskedGather(Type *DataType) override {
return Impl.isLegalMaskedGather(DataType);
}
bool isLegalMaskedCompressStore(Type *DataType) override {
return Impl.isLegalMaskedCompressStore(DataType);
}
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
return Impl.hasVolatileVariant(I, AddrSpace);
}
bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) override {
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
bool LSRWithInstrQueries() override {
return Impl.LSRWithInstrQueries();
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isTruncateFree(Type *Ty1, Type *Ty2) override {
return Impl.isTruncateFree(Ty1, Ty2);
}
bool isProfitableToHoist(Instruction *I) override {
return Impl.isProfitableToHoist(I);
}
bool useAA() override { return Impl.useAA(); }
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
bool useColdCCForColdCall(Function &F) override {
return Impl.useColdCCForColdCall(F);
}
unsigned getScalarizationOverhead(Type *Ty, bool Insert,
bool Extract) override {
return Impl.getScalarizationOverhead(Ty, Insert, Extract);
}
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF) override {
return Impl.getOperandsScalarizationOverhead(Args, VF);
}
bool supportsEfficientVectorElementLoadStore() override {
return Impl.supportsEfficientVectorElementLoadStore();
}
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const override {
return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
}
bool enableMaskedInterleavedAccessVectorization() override {
return Impl.enableMaskedInterleavedAccessVectorization();
}
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth, unsigned AddressSpace,
unsigned Alignment, bool *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);
}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
}
int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) override {
return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
}
int getIntImmCost(const APInt &Imm, Type *Ty) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getIntImmCost(Imm, Ty);
}
int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
}
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
unsigned getNumberOfRegisters(bool Vector) override {
return Impl.getNumberOfRegisters(Vector);
}
unsigned getRegisterBitWidth(bool Vector) const override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getRegisterBitWidth(Vector);
}
[SLP] Enable 64-bit wide vectorization on AArch64 ARM Neon has native support for half-sized vector registers (64 bits). This is beneficial for example for 2D and 3D graphics. This patch adds the option to lower MinVecRegSize from 128 via a TTI in the SLP Vectorizer. *** Performance Analysis This change was motivated by some internal benchmarks but it is also beneficial on SPEC and the LLVM testsuite. The results are with -O3 and PGO. A negative percentage is an improvement. The testsuite was run with a sample size of 4. ** SPEC * CFP2006/482.sphinx3 -3.34% A pretty hot loop is SLP vectorized resulting in nice instruction reduction. This used to be a +22% regression before rL299482. * CFP2000/177.mesa -3.34% * CINT2000/256.bzip2 +6.97% My current plan is to extend the fix in rL299482 to i16 which brings the regression down to +2.5%. There are also other problems with the codegen in this loop so there is further room for improvement. ** LLVM testsuite * SingleSource/Benchmarks/Misc/ReedSolomon -10.75% There are multiple small SLP vectorizations outside the hot code. It's a bit surprising that it adds up to 10%. Some of this may be code-layout noise. * MultiSource/Benchmarks/VersaBench/beamformer/beamformer -8.40% The opt-viewer screenshot can be seen at F3218284. We start at a colder store but the tree leads us into the hottest loop. * MultiSource/Applications/lambda-0.1.3/lambda -2.68% * MultiSource/Benchmarks/Bullet/bullet -2.18% This is using 3D vectors. * SingleSource/Benchmarks/Shootout-C++/Shootout-C++-lists +6.67% Noise, binary is unchanged. * MultiSource/Benchmarks/Ptrdist/anagram/anagram +4.90% There is an additional SLP in the cold code. The test runs for ~1sec and prints out over 2000 lines. This is most likely noise. * MultiSource/Applications/aha/aha +1.63% * MultiSource/Applications/JM/lencod/lencod +1.41% * SingleSource/Benchmarks/Misc/richards_benchmark +1.15% Differential Revision: https://reviews.llvm.org/D31965 llvm-svn: 303116
2017-05-15 23:15:01 +02:00
unsigned getMinVectorRegisterBitWidth() override {
return Impl.getMinVectorRegisterBitWidth();
}
bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
return Impl.shouldMaximizeVectorBandwidth(OptSize);
}
unsigned getMinimumVF(unsigned ElemWidth) const override {
return Impl.getMinimumVF(ElemWidth);
}
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
unsigned getCacheLineSize() override {
return Impl.getCacheLineSize();
}
llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
2017-08-24 11:46:25 +02:00
return Impl.getCacheSize(Level);
}
llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
2017-08-24 11:46:25 +02:00
return Impl.getCacheAssociativity(Level);
}
unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
unsigned getMinPrefetchStride() override {
return Impl.getMinPrefetchStride();
}
unsigned getMaxPrefetchIterationsAhead() override {
return Impl.getMaxPrefetchIterationsAhead();
}
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
[InlineCost] Improve the cost heuristic for Switch Summary: The motivation example is like below which has 13 cases but only 2 distinct targets ``` lor.lhs.false2: ; preds = %if.then switch i32 %Status, label %if.then27 [ i32 -7012, label %if.end35 i32 -10008, label %if.end35 i32 -10016, label %if.end35 i32 15000, label %if.end35 i32 14013, label %if.end35 i32 10114, label %if.end35 i32 10107, label %if.end35 i32 10105, label %if.end35 i32 10013, label %if.end35 i32 10011, label %if.end35 i32 7008, label %if.end35 i32 7007, label %if.end35 i32 5002, label %if.end35 ] ``` which is compiled into a balanced binary tree like this on AArch64 (similar on X86) ``` .LBB853_9: // %lor.lhs.false2 mov w8, #10012 cmp w19, w8 b.gt .LBB853_14 // BB#10: // %lor.lhs.false2 mov w8, #5001 cmp w19, w8 b.gt .LBB853_18 // BB#11: // %lor.lhs.false2 mov w8, #-10016 cmp w19, w8 b.eq .LBB853_23 // BB#12: // %lor.lhs.false2 mov w8, #-10008 cmp w19, w8 b.eq .LBB853_23 // BB#13: // %lor.lhs.false2 mov w8, #-7012 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_14: // %lor.lhs.false2 mov w8, #14012 cmp w19, w8 b.gt .LBB853_21 // BB#15: // %lor.lhs.false2 mov w8, #-10105 add w8, w19, w8 cmp w8, #9 // =9 b.hi .LBB853_17 // BB#16: // %lor.lhs.false2 orr w9, wzr, #0x1 lsl w8, w9, w8 mov w9, #517 and w8, w8, w9 cbnz w8, .LBB853_23 .LBB853_17: // %lor.lhs.false2 mov w8, #10013 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_18: // %lor.lhs.false2 mov w8, #-7007 add w8, w19, w8 cmp w8, #2 // =2 b.lo .LBB853_23 // BB#19: // %lor.lhs.false2 mov w8, #5002 cmp w19, w8 b.eq .LBB853_23 // BB#20: // %lor.lhs.false2 mov w8, #10011 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_21: // %lor.lhs.false2 mov w8, #14013 cmp w19, w8 b.eq .LBB853_23 // BB#22: // %lor.lhs.false2 mov w8, #15000 cmp w19, w8 b.ne .LBB853_3 ``` However, the inline cost model estimates the cost to be linear with the number of distinct targets and the cost of the above switch is just 2 InstrCosts. The function containing this switch is then inlined about 900 times. This change use the general way of switch lowering for the inline heuristic. It etimate the number of case clusters with the suitability check for a jump table or bit test. Considering the binary search tree built for the clusters, this change modifies the model to be linear with the size of the balanced binary tree. The model is off by default for now : -inline-generic-switch-cost=false This change was originally proposed by Haicheng in D29870. Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier Reviewed By: hans Subscribers: joerg, aemerson, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D31085 llvm-svn: 301649
2017-04-28 18:04:03 +02:00
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) override {
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) override {
return Impl.getCastInstrCost(Opcode, Dst, Src, I);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
}
int getCFInstrCost(unsigned Opcode) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getCFInstrCost(Opcode);
}
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) override {
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment);
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond,
bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps);
}
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) override {
return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm, bool IsUnsigned) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
}
int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) override {
return Impl.getCallInstrCost(F, RetTy, Tys);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
}
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr) override {
return Impl.getAddressComputationCost(Ty, SE, Ptr);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
}
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
return Impl.getCostOfKeepingLiveOverCall(Tys);
}
bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) override {
return Impl.getTgtMemIntrinsic(Inst, Info);
}
unsigned getAtomicMemIntrinsicMaxElementSize() const override {
return Impl.getAtomicMemIntrinsicMaxElementSize();
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) override {
return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAlign,
unsigned DestAlign) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
}
void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
LLVMContext &Context,
unsigned RemainingBytes,
unsigned SrcAlign,
unsigned DestAlign) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAlign, DestAlign);
}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override {
return Impl.areInlineCompatible(Caller, Callee);
}
bool areFunctionArgsABICompatible(
const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const override {
return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
}
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
}
bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
}
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
}
bool isLegalToVectorizeLoad(LoadInst *LI) const override {
return Impl.isLegalToVectorizeLoad(LI);
}
bool isLegalToVectorizeStore(StoreInst *SI) const override {
return Impl.isLegalToVectorizeStore(SI);
}
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
}
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block. Inter-block localization is the same as what currently happens, except now it only runs on the entry block because that's where the problematic constants with long live ranges come from. The second phase is a new intra-block localization phase which attempts to re-sink the already localized instructions further right before one of the multiple uses. One additional change is to also localize G_GLOBAL_VALUE as they're constants too. However, on some targets like arm64 it takes multiple instructions to materialize the value, so some additional heuristics with a TTI hook have been introduced attempt to prevent code size regressions when localizing these. Overall, these changes improve CTMark code size on arm64 by 1.2%. Full code size results: Program baseline new diff ------------------------------------------------------------------------------ test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6% test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6% test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4% test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2% test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2% test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9% test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5% test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0% test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0% test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0% Geomean difference -1.2% Differential Revision: https://reviews.llvm.org/D63303 llvm-svn: 363632
2019-06-18 01:20:29 +02:00
unsigned getGISelRematGlobalCost() const override {
return Impl.getGISelRematGlobalCost();
}
int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
};
template <typename T>
TargetTransformInfo::TargetTransformInfo(T Impl)
: TTIImpl(new Model<T>(Impl)) {}
/// Analysis pass providing the \c TargetTransformInfo.
///
/// The core idea of the TargetIRAnalysis is to expose an interface through
/// which LLVM targets can analyze and provide information about the middle
/// end's target-independent IR. This supports use cases such as target-aware
/// cost modeling of IR constructs.
///
/// This is a function analysis because much of the cost modeling for targets
/// is done in a subtarget specific way and LLVM supports compiling different
/// functions targeting different subtargets in order to support runtime
/// dispatch according to the observed subtarget.
class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
public:
typedef TargetTransformInfo Result;
/// Default construct a target IR analysis.
///
/// This will use the module's datalayout to construct a baseline
/// conservative TTI result.
TargetIRAnalysis();
/// Construct an IR analysis pass around a target-provide callback.
///
/// The callback will be called with a particular function for which the TTI
/// is needed and must return a TTI object for that function.
TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
// Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
: TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis &&Arg)
: TTICallback(std::move(Arg.TTICallback)) {}
TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
TTICallback = RHS.TTICallback;
return *this;
}
TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
TTICallback = std::move(RHS.TTICallback);
return *this;
}
Result run(const Function &F, FunctionAnalysisManager &);
private:
friend AnalysisInfoMixin<TargetIRAnalysis>;
[PM] Change the static object whose address is used to uniquely identify analyses to have a common type which is enforced rather than using a char object and a `void *` type when used as an identifier. This has a number of advantages. First, it at least helps some of the confusion raised in Justin Lebar's code review of why `void *` was being used everywhere by having a stronger type that connects to documentation about this. However, perhaps more importantly, it addresses a serious issue where the alignment of these pointer-like identifiers was unknown. This made it hard to use them in pointer-like data structures. We were already dodging this in dangerous ways to create the "all analyses" entry. In a subsequent patch I attempted to use these with TinyPtrVector and things fell apart in a very bad way. And it isn't just a compile time or type system issue. Worse than that, the actual alignment of these pointer-like opaque identifiers wasn't guaranteed to be a useful alignment as they were just characters. This change introduces a type to use as the "key" object whose address forms the opaque identifier. This both forces the objects to have proper alignment, and provides type checking that we get it right everywhere. It also makes the types somewhat less mysterious than `void *`. We could go one step further and introduce a truly opaque pointer-like type to return from the `ID()` static function rather than returning `AnalysisKey *`, but that didn't seem to be a clear win so this is just the initial change to get to a reliably typed and aligned object serving is a key for all the analyses. Thanks to Richard Smith and Justin Lebar for helping pick plausible names and avoid making this refactoring many times. =] And thanks to Sean for the super fast review! While here, I've tried to move away from the "PassID" nomenclature entirely as it wasn't really helping and is overloaded with old pass manager constructs. Now we have IDs for analyses, and key objects whose address can be used as IDs. Where possible and clear I've shortened this to just "ID". In a few places I kept "AnalysisID" to make it clear what was being identified. Differential Revision: https://reviews.llvm.org/D27031 llvm-svn: 287783
2016-11-23 18:53:26 +01:00
static AnalysisKey Key;
/// The callback used to produce a result.
///
/// We use a completely opaque callback so that targets can provide whatever
/// mechanism they desire for constructing the TTI for a given function.
///
/// FIXME: Should we really use std::function? It's relatively inefficient.
/// It might be possible to arrange for even stateful callbacks to outlive
/// the analysis and thus use a function_ref which would be lighter weight.
/// This may also be less error prone as the callback is likely to reference
/// the external TargetMachine, and that reference needs to never dangle.
std::function<Result(const Function &)> TTICallback;
/// Helper function used as the callback in the default constructor.
static Result getDefaultTTI(const Function &F);
};
/// Wrapper pass for TargetTransformInfo.
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
///
/// This pass can be constructed from a TTI object which it stores internally
/// and is queried by passes.
class TargetTransformInfoWrapperPass : public ImmutablePass {
TargetIRAnalysis TIRA;
Optional<TargetTransformInfo> TTI;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
virtual void anchor();
public:
static char ID;
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// We must provide a default constructor for the pass but it should
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
/// never be used.
///
/// Use the constructor below or call one of the creation routines.
TargetTransformInfoWrapperPass();
explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
[PM] Change the core design of the TTI analysis to use a polymorphic type erased interface and a single analysis pass rather than an extremely complex analysis group. The end result is that the TTI analysis can contain a type erased implementation that supports the polymorphic TTI interface. We can build one from a target-specific implementation or from a dummy one in the IR. I've also factored all of the code into "mix-in"-able base classes, including CRTP base classes to facilitate calling back up to the most specialized form when delegating horizontally across the surface. These aren't as clean as I would like and I'm planning to work on cleaning some of this up, but I wanted to start by putting into the right form. There are a number of reasons for this change, and this particular design. The first and foremost reason is that an analysis group is complete overkill, and the chaining delegation strategy was so opaque, confusing, and high overhead that TTI was suffering greatly for it. Several of the TTI functions had failed to be implemented in all places because of the chaining-based delegation making there be no checking of this. A few other functions were implemented with incorrect delegation. The message to me was very clear working on this -- the delegation and analysis group structure was too confusing to be useful here. The other reason of course is that this is *much* more natural fit for the new pass manager. This will lay the ground work for a type-erased per-function info object that can look up the correct subtarget and even cache it. Yet another benefit is that this will significantly simplify the interaction of the pass managers and the TargetMachine. See the future work below. The downside of this change is that it is very, very verbose. I'm going to work to improve that, but it is somewhat an implementation necessity in C++ to do type erasure. =/ I discussed this design really extensively with Eric and Hal prior to going down this path, and afterward showed them the result. No one was really thrilled with it, but there doesn't seem to be a substantially better alternative. Using a base class and virtual method dispatch would make the code much shorter, but as discussed in the update to the programmer's manual and elsewhere, a polymorphic interface feels like the more principled approach even if this is perhaps the least compelling example of it. ;] Ultimately, there is still a lot more to be done here, but this was the huge chunk that I couldn't really split things out of because this was the interface change to TTI. I've tried to minimize all the other parts of this. The follow up work should include at least: 1) Improving the TargetMachine interface by having it directly return a TTI object. Because we have a non-pass object with value semantics and an internal type erasure mechanism, we can narrow the interface of the TargetMachine to *just* do what we need: build and return a TTI object that we can then insert into the pass pipeline. 2) Make the TTI object be fully specialized for a particular function. This will include splitting off a minimal form of it which is sufficient for the inliner and the old pass manager. 3) Add a new pass manager analysis which produces TTI objects from the target machine for each function. This may actually be done as part of #2 in order to use the new analysis to implement #2. 4) Work on narrowing the API between TTI and the targets so that it is easier to understand and less verbose to type erase. 5) Work on narrowing the API between TTI and its clients so that it is easier to understand and less verbose to forward. 6) Try to improve the CRTP-based delegation. I feel like this code is just a bit messy and exacerbating the complexity of implementing the TTI in each target. Many thanks to Eric and Hal for their help here. I ended up blocked on this somewhat more abruptly than I expected, and so I appreciate getting it sorted out very quickly. Differential Revision: http://reviews.llvm.org/D7293 llvm-svn: 227669
2015-01-31 04:43:40 +01:00
TargetTransformInfo &getTTI(const Function &F);
};
/// Create an analysis pass wrapper around a TTI object.
///
/// This analysis pass just holds the TTI instance and makes it available to
/// clients.
ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
} // End llvm namespace
#endif