mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[Hexagon] Improve balancing of address calculation
Rebalances address calculation trees and applies Hexagon-specific optimizations to the trees to improve instruction selection. Patch by Tobias Edler von Koch. llvm-svn: 277151
This commit is contained in:
parent
c5d7dc7ef7
commit
4ca53a9c57
@ -32,6 +32,24 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
|
||||
cl::desc("Maximum number of uses of a global address such that we still us a"
|
||||
"constant extended instruction"));
|
||||
|
||||
static
|
||||
cl::opt<bool>
|
||||
EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
|
||||
cl::desc("Rebalance address calculation trees to improve "
|
||||
"instruction selection"));
|
||||
|
||||
// Rebalance only if this allows e.g. combining a GA with an offset or
|
||||
// factoring out a shift.
|
||||
static
|
||||
cl::opt<bool>
|
||||
RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
|
||||
cl::desc("Rebalance address tree only if this allows optimizations"));
|
||||
|
||||
static
|
||||
cl::opt<bool>
|
||||
RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
|
||||
cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Selector Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -42,14 +60,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
|
||||
///
|
||||
namespace {
|
||||
class HexagonDAGToDAGISel : public SelectionDAGISel {
|
||||
const HexagonTargetMachine &HTM;
|
||||
const HexagonSubtarget *HST;
|
||||
const HexagonInstrInfo *HII;
|
||||
const HexagonRegisterInfo *HRI;
|
||||
public:
|
||||
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
|
||||
CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
|
||||
: SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
|
||||
HRI(nullptr) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override {
|
||||
@ -92,7 +109,6 @@ public:
|
||||
std::vector<SDValue> &OutOps) override;
|
||||
bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
|
||||
void SelectLoad(SDNode *N);
|
||||
void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
|
||||
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
|
||||
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
|
||||
void SelectStore(SDNode *N);
|
||||
@ -179,6 +195,17 @@ private:
|
||||
bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
|
||||
bool orIsAdd(const SDNode *N) const;
|
||||
bool isAlignedMemNode(const MemSDNode *N) const;
|
||||
|
||||
SmallDenseMap<SDNode *,int> RootWeights;
|
||||
SmallDenseMap<SDNode *,int> RootHeights;
|
||||
SmallDenseMap<const Value *,int> GAUsesInFunction;
|
||||
int getWeight(SDNode *N);
|
||||
int getHeight(SDNode *N);
|
||||
SDValue getMultiplierForSHL(SDNode *N);
|
||||
SDValue factorOutPowerOf2(SDValue V, unsigned Power);
|
||||
unsigned getUsesInFunction(const Value *V);
|
||||
SDValue balanceSubTree(SDNode *N, bool Factorize = false);
|
||||
void rebalanceAddressTrees();
|
||||
}; // end HexagonDAGToDAGISel
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -1373,6 +1400,16 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
|
||||
SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
|
||||
ReplaceNode(T0.getNode(), NewShl.getNode());
|
||||
}
|
||||
|
||||
if (EnableAddressRebalancing) {
|
||||
rebalanceAddressTrees();
|
||||
|
||||
DEBUG(
|
||||
dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
|
||||
CurDAG->dump();
|
||||
dbgs() << "************* End SelectionDAG after preprocessing ********\n";
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
|
||||
@ -1540,3 +1577,701 @@ bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const {
|
||||
bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
|
||||
return N->getAlignment() >= N->getMemoryVT().getStoreSize();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Rebalancing of address calculation trees
|
||||
|
||||
static bool isOpcodeHandled(const SDNode *N) {
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::ADD:
|
||||
case ISD::MUL:
|
||||
return true;
|
||||
case ISD::SHL:
|
||||
// We only handle constant shifts because these can be easily flattened
|
||||
// into multiplications by 2^Op1.
|
||||
return isa<ConstantSDNode>(N->getOperand(1).getNode());
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Return the weight of an SDNode
|
||||
int HexagonDAGToDAGISel::getWeight(SDNode *N) {
|
||||
if (!isOpcodeHandled(N))
|
||||
return 1;
|
||||
assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
|
||||
assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
|
||||
assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
|
||||
return RootWeights[N];
|
||||
}
|
||||
|
||||
int HexagonDAGToDAGISel::getHeight(SDNode *N) {
|
||||
if (!isOpcodeHandled(N))
|
||||
return 0;
|
||||
assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
|
||||
"Cannot query height of unvisited/RAUW'd node!");
|
||||
return RootHeights[N];
|
||||
}
|
||||
|
||||
struct WeightedLeaf {
|
||||
SDValue Value;
|
||||
int Weight;
|
||||
int InsertionOrder;
|
||||
|
||||
WeightedLeaf() : Value(SDValue()) { }
|
||||
|
||||
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
|
||||
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
|
||||
assert(Weight >= 0 && "Weight must be >= 0");
|
||||
}
|
||||
|
||||
static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
|
||||
assert(A.Value.getNode() && B.Value.getNode());
|
||||
return A.Weight == B.Weight ?
|
||||
(A.InsertionOrder > B.InsertionOrder) :
|
||||
(A.Weight > B.Weight);
|
||||
}
|
||||
};
|
||||
|
||||
/// A specialized priority queue for WeigthedLeaves. It automatically folds
|
||||
/// constants and allows removal of non-top elements while maintaining the
|
||||
/// priority order.
|
||||
class LeafPrioQueue {
|
||||
SmallVector<WeightedLeaf, 8> Q;
|
||||
bool HaveConst;
|
||||
WeightedLeaf ConstElt;
|
||||
unsigned Opcode;
|
||||
|
||||
public:
|
||||
bool empty() {
|
||||
return (!HaveConst && Q.empty());
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
return Q.size() + HaveConst;
|
||||
}
|
||||
|
||||
bool hasConst() {
|
||||
return HaveConst;
|
||||
}
|
||||
|
||||
const WeightedLeaf &top() {
|
||||
if (HaveConst)
|
||||
return ConstElt;
|
||||
return Q.front();
|
||||
}
|
||||
|
||||
WeightedLeaf pop() {
|
||||
if (HaveConst) {
|
||||
HaveConst = false;
|
||||
return ConstElt;
|
||||
}
|
||||
std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
|
||||
return Q.pop_back_val();
|
||||
}
|
||||
|
||||
void push(WeightedLeaf L, bool SeparateConst=true) {
|
||||
if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
|
||||
if (Opcode == ISD::MUL &&
|
||||
cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
|
||||
return;
|
||||
if (Opcode == ISD::ADD &&
|
||||
cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
|
||||
return;
|
||||
|
||||
HaveConst = true;
|
||||
ConstElt = L;
|
||||
} else {
|
||||
Q.push_back(L);
|
||||
std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
|
||||
}
|
||||
}
|
||||
|
||||
/// Push L to the bottom of the queue regardless of its weight. If L is
|
||||
/// constant, it will not be folded with other constants in the queue.
|
||||
void pushToBottom(WeightedLeaf L) {
|
||||
L.Weight = 1000;
|
||||
push(L, false);
|
||||
}
|
||||
|
||||
/// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
|
||||
/// lowest weight and remove it from the queue.
|
||||
WeightedLeaf findSHL(uint64_t MaxAmount);
|
||||
|
||||
WeightedLeaf findMULbyConst();
|
||||
|
||||
LeafPrioQueue(unsigned Opcode) :
|
||||
HaveConst(false), Opcode(Opcode) { }
|
||||
};
|
||||
|
||||
WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
|
||||
int ResultPos;
|
||||
WeightedLeaf Result;
|
||||
|
||||
for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
|
||||
const WeightedLeaf &L = Q[Pos];
|
||||
const SDValue &Val = L.Value;
|
||||
if (Val.getOpcode() != ISD::SHL ||
|
||||
!isa<ConstantSDNode>(Val.getOperand(1)) ||
|
||||
Val.getConstantOperandVal(1) > MaxAmount)
|
||||
continue;
|
||||
if (!Result.Value.getNode() || Result.Weight > L.Weight ||
|
||||
(Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
|
||||
{
|
||||
Result = L;
|
||||
ResultPos = Pos;
|
||||
}
|
||||
}
|
||||
|
||||
if (Result.Value.getNode()) {
|
||||
Q.erase(&Q[ResultPos]);
|
||||
std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
WeightedLeaf LeafPrioQueue::findMULbyConst() {
|
||||
int ResultPos;
|
||||
WeightedLeaf Result;
|
||||
|
||||
for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
|
||||
const WeightedLeaf &L = Q[Pos];
|
||||
const SDValue &Val = L.Value;
|
||||
if (Val.getOpcode() != ISD::MUL ||
|
||||
!isa<ConstantSDNode>(Val.getOperand(1)) ||
|
||||
Val.getConstantOperandVal(1) > 127)
|
||||
continue;
|
||||
if (!Result.Value.getNode() || Result.Weight > L.Weight ||
|
||||
(Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
|
||||
{
|
||||
Result = L;
|
||||
ResultPos = Pos;
|
||||
}
|
||||
}
|
||||
|
||||
if (Result.Value.getNode()) {
|
||||
Q.erase(&Q[ResultPos]);
|
||||
std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
|
||||
uint64_t MulFactor = 1 << N->getConstantOperandVal(1);
|
||||
return CurDAG->getConstant(MulFactor, SDLoc(N),
|
||||
N->getOperand(1).getValueType());
|
||||
}
|
||||
|
||||
/// @returns the value x for which 2^x is a factor of Val
|
||||
static unsigned getPowerOf2Factor(SDValue Val) {
|
||||
if (Val.getOpcode() == ISD::MUL) {
|
||||
unsigned MaxFactor = 0;
|
||||
for (int i=0; i < 2; ++i) {
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
|
||||
if (!C)
|
||||
continue;
|
||||
const APInt &CInt = C->getAPIntValue();
|
||||
if (CInt.getBoolValue())
|
||||
MaxFactor = CInt.countTrailingZeros();
|
||||
}
|
||||
return MaxFactor;
|
||||
}
|
||||
if (Val.getOpcode() == ISD::SHL) {
|
||||
if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
|
||||
return 0;
|
||||
return (unsigned) Val.getConstantOperandVal(1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @returns true if V>>Amount will eliminate V's operation on its child
|
||||
static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
|
||||
if (V.getOpcode() == ISD::MUL) {
|
||||
SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
|
||||
for (int i=0; i < 2; ++i)
|
||||
if (isa<ConstantSDNode>(Ops[i].getNode()) &&
|
||||
V.getConstantOperandVal(i) % ((uint64_t)1 << Amount) == 0) {
|
||||
uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
|
||||
return (NewConst == 1);
|
||||
}
|
||||
} else if (V.getOpcode() == ISD::SHL) {
|
||||
return (Amount == V.getConstantOperandVal(1));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
|
||||
SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
|
||||
if (V.getOpcode() == ISD::MUL) {
|
||||
for (int i=0; i < 2; ++i) {
|
||||
if (isa<ConstantSDNode>(Ops[i].getNode()) &&
|
||||
V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
|
||||
uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
|
||||
if (NewConst == 1)
|
||||
return Ops[!i];
|
||||
Ops[i] = CurDAG->getConstant(NewConst,
|
||||
SDLoc(V), V.getValueType());
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (V.getOpcode() == ISD::SHL) {
|
||||
uint64_t ShiftAmount = V.getConstantOperandVal(1);
|
||||
if (ShiftAmount == Power)
|
||||
return Ops[0];
|
||||
Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
|
||||
SDLoc(V), V.getValueType());
|
||||
}
|
||||
|
||||
return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
|
||||
}
|
||||
|
||||
static bool isTargetConstant(const SDValue &V) {
|
||||
return V.getOpcode() == HexagonISD::CONST32 ||
|
||||
V.getOpcode() == HexagonISD::CONST32_GP;
|
||||
}
|
||||
|
||||
unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
|
||||
if (GAUsesInFunction.count(V))
|
||||
return GAUsesInFunction[V];
|
||||
|
||||
unsigned Result = 0;
|
||||
const Function *CurF = CurDAG->getMachineFunction().getFunction();
|
||||
for (const User *U : V->users()) {
|
||||
if (isa<Instruction>(U) &&
|
||||
cast<Instruction>(U)->getParent()->getParent() == CurF)
|
||||
++Result;
|
||||
}
|
||||
|
||||
GAUsesInFunction[V] = Result;
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Note - After calling this, N may be dead. It may have been replaced by a
|
||||
/// new node, so always use the returned value in place of N.
|
||||
///
|
||||
/// @returns The SDValue taking the place of N (which could be N if it is
|
||||
/// unchanged)
|
||||
SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
|
||||
assert(RootWeights.count(N) && "Cannot balance non-root node.");
|
||||
assert(RootWeights[N] != -2 && "This node was RAUW'd!");
|
||||
assert(!TopLevel || N->getOpcode() == ISD::ADD);
|
||||
|
||||
// Return early if this node was already visited
|
||||
if (RootWeights[N] != -1)
|
||||
return SDValue(N, 0);
|
||||
|
||||
assert(isOpcodeHandled(N));
|
||||
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
|
||||
// Return early if the operands will remain unchanged or are all roots
|
||||
if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
|
||||
(!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
|
||||
SDNode *Op0N = Op0.getNode();
|
||||
int Weight;
|
||||
if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
|
||||
Weight = getWeight(balanceSubTree(Op0N).getNode());
|
||||
// Weight = calculateWeight(Op0N);
|
||||
} else
|
||||
Weight = getWeight(Op0N);
|
||||
|
||||
SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
|
||||
if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
|
||||
Weight += getWeight(balanceSubTree(Op1N).getNode());
|
||||
// Weight += calculateWeight(Op1N);
|
||||
} else
|
||||
Weight += getWeight(Op1N);
|
||||
|
||||
RootWeights[N] = Weight;
|
||||
RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
|
||||
getHeight(N->getOperand(1).getNode())) + 1;
|
||||
|
||||
DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
|
||||
<< " Height=" << RootHeights[N] << "): ");
|
||||
DEBUG(N->dump());
|
||||
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "** Balancing root node: ");
|
||||
DEBUG(N->dump());
|
||||
|
||||
unsigned NOpcode = N->getOpcode();
|
||||
|
||||
LeafPrioQueue Leaves(NOpcode);
|
||||
SmallVector<SDValue, 4> Worklist;
|
||||
Worklist.push_back(SDValue(N, 0));
|
||||
|
||||
// SHL nodes will be converted to MUL nodes
|
||||
if (NOpcode == ISD::SHL)
|
||||
NOpcode = ISD::MUL;
|
||||
|
||||
bool CanFactorize = false;
|
||||
WeightedLeaf Mul1, Mul2;
|
||||
unsigned MaxPowerOf2 = 0;
|
||||
WeightedLeaf GA;
|
||||
|
||||
// Do not try to factor out a shift if there is already a shift at the tip of
|
||||
// the tree.
|
||||
bool HaveTopLevelShift = false;
|
||||
if (TopLevel &&
|
||||
((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
|
||||
Op0.getConstantOperandVal(1) < 4) ||
|
||||
(isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
|
||||
Op1.getConstantOperandVal(1) < 4)))
|
||||
HaveTopLevelShift = true;
|
||||
|
||||
// Flatten the subtree into an ordered list of leaves; at the same time
|
||||
// determine whether the tree is already balanced.
|
||||
int InsertionOrder = 0;
|
||||
SmallDenseMap<SDValue, int> NodeHeights;
|
||||
bool Imbalanced = false;
|
||||
int CurrentWeight = 0;
|
||||
while (!Worklist.empty()) {
|
||||
SDValue Child = Worklist.pop_back_val();
|
||||
|
||||
if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
|
||||
// CASE 1: Child is a root note
|
||||
|
||||
int Weight = RootWeights[Child.getNode()];
|
||||
if (Weight == -1) {
|
||||
Child = balanceSubTree(Child.getNode());
|
||||
// calculateWeight(Child.getNode());
|
||||
Weight = getWeight(Child.getNode());
|
||||
} else if (Weight == -2) {
|
||||
// Whoops, this node was RAUWd by one of the balanceSubTree calls we
|
||||
// made. Our worklist isn't up to date anymore.
|
||||
// Restart the whole process.
|
||||
DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
|
||||
return balanceSubTree(N, TopLevel);
|
||||
}
|
||||
|
||||
NodeHeights[Child] = 1;
|
||||
CurrentWeight += Weight;
|
||||
|
||||
unsigned PowerOf2;
|
||||
if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
|
||||
(Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
|
||||
Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
|
||||
// Try to identify two factorizable MUL/SHL children greedily. Leave
|
||||
// them out of the priority queue for now so we can deal with them
|
||||
// after.
|
||||
if (!Mul1.Value.getNode()) {
|
||||
Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
|
||||
MaxPowerOf2 = PowerOf2;
|
||||
} else {
|
||||
Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
|
||||
MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
|
||||
|
||||
// Our addressing modes can only shift by a maximum of 3
|
||||
if (MaxPowerOf2 > 3)
|
||||
MaxPowerOf2 = 3;
|
||||
|
||||
CanFactorize = true;
|
||||
}
|
||||
} else
|
||||
Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
|
||||
} else if (!isOpcodeHandled(Child.getNode())) {
|
||||
// CASE 2: Child is an unhandled kind of node (e.g. constant)
|
||||
int Weight = getWeight(Child.getNode());
|
||||
|
||||
NodeHeights[Child] = getHeight(Child.getNode());
|
||||
CurrentWeight += Weight;
|
||||
|
||||
if (isTargetConstant(Child) && !GA.Value.getNode())
|
||||
GA = WeightedLeaf(Child, Weight, InsertionOrder++);
|
||||
else
|
||||
Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
|
||||
} else {
|
||||
// CASE 3: Child is a subtree of same opcode
|
||||
// Visit children first, then flatten.
|
||||
unsigned ChildOpcode = Child.getOpcode();
|
||||
assert(ChildOpcode == NOpcode ||
|
||||
(NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
|
||||
|
||||
// Convert SHL to MUL
|
||||
SDValue Op1;
|
||||
if (ChildOpcode == ISD::SHL)
|
||||
Op1 = getMultiplierForSHL(Child.getNode());
|
||||
else
|
||||
Op1 = Child->getOperand(1);
|
||||
|
||||
if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
|
||||
assert(!NodeHeights.count(Child) && "Parent visited before children?");
|
||||
// Visit children first, then re-visit this node
|
||||
Worklist.push_back(Child);
|
||||
Worklist.push_back(Op1);
|
||||
Worklist.push_back(Child->getOperand(0));
|
||||
} else {
|
||||
// Back at this node after visiting the children
|
||||
if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
|
||||
Imbalanced = true;
|
||||
|
||||
NodeHeights[Child] = std::max(NodeHeights[Op1],
|
||||
NodeHeights[Child->getOperand(0)]) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
|
||||
<< " weight=" << CurrentWeight << " imbalanced="
|
||||
<< Imbalanced << "\n");
|
||||
|
||||
// Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
|
||||
// This factors out a shift in order to match memw(a<<Y+b).
|
||||
if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
|
||||
willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
|
||||
DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
|
||||
int Weight = Mul1.Weight + Mul2.Weight;
|
||||
int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
|
||||
SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
|
||||
SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
|
||||
SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
|
||||
Mul1Factored, Mul2Factored);
|
||||
SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
|
||||
Mul1.Value.getValueType());
|
||||
SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
|
||||
Sum, Const);
|
||||
NodeHeights[New] = Height;
|
||||
Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
|
||||
} else if (Mul1.Value.getNode()) {
|
||||
// We failed to factorize two MULs, so now the Muls are left outside the
|
||||
// queue... add them back.
|
||||
Leaves.push(Mul1);
|
||||
if (Mul2.Value.getNode())
|
||||
Leaves.push(Mul2);
|
||||
CanFactorize = false;
|
||||
}
|
||||
|
||||
// Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
|
||||
// and the root node itself is not used more than twice. This reduces the
|
||||
// amount of additional constant extenders introduced by this optimization.
|
||||
bool CombinedGA = false;
|
||||
if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
|
||||
GA.Value.hasOneUse() && N->use_size() < 3) {
|
||||
GlobalAddressSDNode *GANode =
|
||||
cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
|
||||
ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
|
||||
|
||||
if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
|
||||
getTargetLowering()->isOffsetFoldingLegal(GANode)) {
|
||||
DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
|
||||
<< "): ");
|
||||
DEBUG(GANode->dump());
|
||||
|
||||
SDValue NewTGA =
|
||||
CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
|
||||
GANode->getValueType(0),
|
||||
GANode->getOffset() + (uint64_t)Offset->getSExtValue());
|
||||
GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
|
||||
GA.Value.getValueType(), NewTGA);
|
||||
GA.Weight += Leaves.top().Weight;
|
||||
|
||||
NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
|
||||
CombinedGA = true;
|
||||
|
||||
Leaves.pop(); // Remove the offset constant from the queue
|
||||
}
|
||||
}
|
||||
|
||||
if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
|
||||
(RebalanceOnlyImbalancedTrees && !Imbalanced)) {
|
||||
RootWeights[N] = CurrentWeight;
|
||||
RootHeights[N] = NodeHeights[SDValue(N, 0)];
|
||||
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
// Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
|
||||
if (NOpcode == ISD::ADD && GA.Value.getNode()) {
|
||||
WeightedLeaf SHL = Leaves.findSHL(31);
|
||||
if (SHL.Value.getNode()) {
|
||||
int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
|
||||
GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
|
||||
GA.Value.getValueType(),
|
||||
GA.Value, SHL.Value);
|
||||
GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
|
||||
NodeHeights[GA.Value] = Height;
|
||||
}
|
||||
}
|
||||
|
||||
if (GA.Value.getNode())
|
||||
Leaves.push(GA);
|
||||
|
||||
// If this is the top level and we haven't factored out a shift, we should try
|
||||
// to move a constant to the bottom to match addressing modes like memw(rX+C)
|
||||
if (TopLevel && !CanFactorize && Leaves.hasConst()) {
|
||||
DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
|
||||
Leaves.pushToBottom(Leaves.pop());
|
||||
}
|
||||
|
||||
// Rebuild the tree using Huffman's algorithm
|
||||
while (Leaves.size() > 1) {
|
||||
WeightedLeaf L0 = Leaves.pop();
|
||||
|
||||
// See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
|
||||
// otherwise just get the next leaf
|
||||
WeightedLeaf L1 = Leaves.findMULbyConst();
|
||||
if (!L1.Value.getNode())
|
||||
L1 = Leaves.pop();
|
||||
|
||||
assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
|
||||
|
||||
SDValue V0 = L0.Value;
|
||||
int V0Weight = L0.Weight;
|
||||
SDValue V1 = L1.Value;
|
||||
int V1Weight = L1.Weight;
|
||||
|
||||
// Make sure that none of these nodes have been RAUW'd
|
||||
if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
|
||||
(RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
|
||||
DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
|
||||
return balanceSubTree(N, TopLevel);
|
||||
}
|
||||
|
||||
ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
|
||||
ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue NewNode;
|
||||
|
||||
if (V0C && !V1C) {
|
||||
std::swap(V0, V1);
|
||||
std::swap(V0C, V1C);
|
||||
}
|
||||
|
||||
// Calculate height of this node
|
||||
assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
|
||||
"Children must have been visited before re-combining them!");
|
||||
int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
|
||||
|
||||
// Rebuild this node (and restore SHL from MUL if needed)
|
||||
if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
|
||||
NewNode = CurDAG->getNode(
|
||||
ISD::SHL, SDLoc(V0), VT, V0,
|
||||
CurDAG->getConstant(
|
||||
V1C->getAPIntValue().logBase2(), SDLoc(N),
|
||||
getTargetLowering()->getScalarShiftAmountTy(CurDAG->getDataLayout(), V0.getValueType())));
|
||||
else
|
||||
NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
|
||||
|
||||
NodeHeights[NewNode] = Height;
|
||||
|
||||
int Weight = V0Weight + V1Weight;
|
||||
Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
|
||||
|
||||
DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
|
||||
<< Height << "):\n");
|
||||
DEBUG(NewNode.dump());
|
||||
}
|
||||
|
||||
assert(Leaves.size() == 1);
|
||||
SDValue NewRoot = Leaves.top().Value;
|
||||
|
||||
assert(NodeHeights.count(NewRoot));
|
||||
int Height = NodeHeights[NewRoot];
|
||||
|
||||
// Restore SHL if we earlier converted it to a MUL
|
||||
if (NewRoot.getOpcode() == ISD::MUL) {
|
||||
ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
|
||||
if (V1C && V1C->getAPIntValue().isPowerOf2()) {
|
||||
EVT VT = NewRoot.getValueType();
|
||||
SDValue V0 = NewRoot.getOperand(0);
|
||||
NewRoot = CurDAG->getNode(
|
||||
ISD::SHL, SDLoc(NewRoot), VT, V0,
|
||||
CurDAG->getConstant(V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
|
||||
getTargetLowering()->getScalarShiftAmountTy(
|
||||
CurDAG->getDataLayout(), V0.getValueType())));
|
||||
}
|
||||
}
|
||||
|
||||
if (N != NewRoot.getNode()) {
|
||||
DEBUG(dbgs() << "--> Root is now: ");
|
||||
DEBUG(NewRoot.dump());
|
||||
|
||||
// Replace all uses of old root by new root
|
||||
CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
|
||||
// Mark that we have RAUW'd N
|
||||
RootWeights[N] = -2;
|
||||
} else {
|
||||
DEBUG(dbgs() << "--> Root unchanged.\n");
|
||||
}
|
||||
|
||||
RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
|
||||
RootHeights[NewRoot.getNode()] = Height;
|
||||
|
||||
return NewRoot;
|
||||
}
|
||||
|
||||
void HexagonDAGToDAGISel::rebalanceAddressTrees() {
|
||||
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
||||
E = CurDAG->allnodes_end(); I != E;) {
|
||||
SDNode *N = &*I++;
|
||||
if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
|
||||
continue;
|
||||
|
||||
SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
|
||||
if (BasePtr.getOpcode() != ISD::ADD)
|
||||
continue;
|
||||
|
||||
// We've already processed this node
|
||||
if (RootWeights.count(BasePtr.getNode()))
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
|
||||
DEBUG(N->dump());
|
||||
|
||||
// FindRoots
|
||||
SmallVector<SDNode *, 4> Worklist;
|
||||
|
||||
Worklist.push_back(BasePtr.getOperand(0).getNode());
|
||||
Worklist.push_back(BasePtr.getOperand(1).getNode());
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
SDNode *N = Worklist.pop_back_val();
|
||||
unsigned Opcode = N->getOpcode();
|
||||
|
||||
if (!isOpcodeHandled(N))
|
||||
continue;
|
||||
|
||||
Worklist.push_back(N->getOperand(0).getNode());
|
||||
Worklist.push_back(N->getOperand(1).getNode());
|
||||
|
||||
// Not a root if it has only one use and same opcode as its parent
|
||||
if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
|
||||
continue;
|
||||
|
||||
// This root node has already been processed
|
||||
if (RootWeights.count(N))
|
||||
continue;
|
||||
|
||||
RootWeights[N] = -1;
|
||||
}
|
||||
|
||||
// Balance node itself
|
||||
RootWeights[BasePtr.getNode()] = -1;
|
||||
SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
|
||||
|
||||
if (N->getOpcode() == ISD::LOAD)
|
||||
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
|
||||
NewBasePtr, N->getOperand(2));
|
||||
else
|
||||
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
|
||||
NewBasePtr, N->getOperand(3));
|
||||
|
||||
DEBUG(dbgs() << "--> Final node: ");
|
||||
DEBUG(N->dump());
|
||||
}
|
||||
|
||||
CurDAG->RemoveDeadNodes();
|
||||
GAUsesInFunction.clear();
|
||||
RootHeights.clear();
|
||||
RootWeights.clear();
|
||||
}
|
||||
|
||||
|
||||
|
54
test/CodeGen/Hexagon/addr-calc-opt.ll
Normal file
54
test/CodeGen/Hexagon/addr-calc-opt.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
|
||||
;
|
||||
; Test whether we can produce minimal code for this complex address
|
||||
; calculation.
|
||||
;
|
||||
|
||||
; CHECK: r0 = memub(r{{[0-9]+}}<<#3{{ *}}+{{ *}}##the_global+516)
|
||||
|
||||
%0 = type { [3 x %1] }
|
||||
%1 = type { %2, i8, i8, i8, i8, i8, [4 x i8], i8, [10 x i8], [10 x i8], [10 x i8], i8, [3 x %4], i16, i16, i16, i16, i32, i8, [4 x i8], i8, i8, i8, i8, %5, i8, i8, i8, i8, i8, i16, i8, i8, i8, i16, i16, i8, i8, [2 x i8], [2 x i8], i8, i8, i8, i8, i8, i16, i16, i8, i8, i8, i8, i8, i8, %9, i8, [6 x [2 x i8]], i16, i32, %10, [28 x i8], [4 x %17] }
|
||||
%2 = type { %3 }
|
||||
%3 = type { i8, i8, i8, i8, i8, i16, i16, i16, i16, i16 }
|
||||
%4 = type { i16, i16 }
|
||||
%5 = type { [10 x %6] }
|
||||
%6 = type { [2 x %7] }
|
||||
%7 = type { i8, [2 x %8] }
|
||||
%8 = type { [4 x i8] }
|
||||
%9 = type { i8 }
|
||||
%10 = type { %11, %13 }
|
||||
%11 = type { [2 x [2 x i8]], [2 x [8 x %12]], [6 x i16], [6 x i16] }
|
||||
%12 = type { i8, i8 }
|
||||
%13 = type { [4 x %12], [4 x %12], [2 x [4 x %14]], [6 x i16] }
|
||||
%14 = type { %15, %16 }
|
||||
%15 = type { i8, i8 }
|
||||
%16 = type { i8, i8 }
|
||||
%17 = type { i8, i8, %1*, i16, i16, i16, i64, i32, i32, %18, i8, %21, i8, [2 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i16, i16, [2 x i16], i16, [2 x i32], [2 x i16], [2 x i16], i8, i8, [6 x %23], i8, i8, i8, %24, %25, %26, %28 }
|
||||
%18 = type { %19, [10 x %20] }
|
||||
%19 = type { i32 }
|
||||
%20 = type { [2 x i8], [2 x i8], i8, i8, i8, i8 }
|
||||
%21 = type { i8, i8, i8, [8 x %22] }
|
||||
%22 = type { i8, i8, i8, i32 }
|
||||
%23 = type { i32, i16, i16, [2 x i16], [2 x i16], [2 x i16], i32 }
|
||||
%24 = type { [2 x i32], [2 x i64*], [2 x i64*], [2 x i64*], [2 x i32], [2 x i32], i32 }
|
||||
%25 = type { [2 x i32], [2 x i32], [2 x i32] }
|
||||
%26 = type { i8, i8, i8, i16, i16, %27, i32, i32, i32, i16 }
|
||||
%27 = type { i64 }
|
||||
%28 = type { %29, %31, [24 x i8] }
|
||||
%29 = type { [2 x %30], [16 x i32] }
|
||||
%30 = type { [16 x i32], [8 x i32], [16 x i32], [64 x i32], [2 x i32], i64, i32, i32, i32, i32 }
|
||||
%31 = type { [2 x %32] }
|
||||
%32 = type { [4 x %33], i32 }
|
||||
%33 = type { i32, i32 }
|
||||
|
||||
@the_global = external global %0
|
||||
|
||||
; Function Attrs: nounwind optsize readonly ssp
|
||||
define zeroext i8 @myFun(i8 zeroext, i8 zeroext) {
|
||||
%3 = zext i8 %1 to i32
|
||||
%4 = zext i8 %0 to i32
|
||||
%5 = getelementptr inbounds %0, %0* @the_global, i32 0, i32 0, i32 %4, i32 60, i32 0, i32 9, i32 1, i32 %3, i32 0, i32 0
|
||||
%6 = load i8, i8* %5, align 4
|
||||
ret i8 %6
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user