1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[Hexagon] Improve balancing of address calculation

Rebalances address calculation trees and applies Hexagon-specific
optimizations to the trees to improve instruction selection.

Patch by Tobias Edler von Koch.

llvm-svn: 277151
This commit is contained in:
Krzysztof Parzyszek 2016-07-29 15:15:35 +00:00
parent c5d7dc7ef7
commit 4ca53a9c57
2 changed files with 792 additions and 3 deletions

View File

@ -32,6 +32,24 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
cl::desc("Maximum number of uses of a global address such that we still us a"
"constant extended instruction"));
static
cl::opt<bool>
EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
cl::desc("Rebalance address calculation trees to improve "
"instruction selection"));
// Rebalance only if this allows e.g. combining a GA with an offset or
// factoring out a shift.
static
cl::opt<bool>
RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
cl::desc("Rebalance address tree only if this allows optimizations"));
static
cl::opt<bool>
RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
@ -42,14 +60,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
///
namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonTargetMachine &HTM;
const HexagonSubtarget *HST;
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
public:
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
: SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
HRI(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@ -92,7 +109,6 @@ public:
std::vector<SDValue> &OutOps) override;
bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
void SelectLoad(SDNode *N);
void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
void SelectStore(SDNode *N);
@ -179,6 +195,17 @@ private:
bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
bool orIsAdd(const SDNode *N) const;
bool isAlignedMemNode(const MemSDNode *N) const;
SmallDenseMap<SDNode *,int> RootWeights;
SmallDenseMap<SDNode *,int> RootHeights;
SmallDenseMap<const Value *,int> GAUsesInFunction;
int getWeight(SDNode *N);
int getHeight(SDNode *N);
SDValue getMultiplierForSHL(SDNode *N);
SDValue factorOutPowerOf2(SDValue V, unsigned Power);
unsigned getUsesInFunction(const Value *V);
SDValue balanceSubTree(SDNode *N, bool Factorize = false);
void rebalanceAddressTrees();
}; // end HexagonDAGToDAGISel
} // end anonymous namespace
@ -1373,6 +1400,16 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
ReplaceNode(T0.getNode(), NewShl.getNode());
}
if (EnableAddressRebalancing) {
rebalanceAddressTrees();
DEBUG(
dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
CurDAG->dump();
dbgs() << "************* End SelectionDAG after preprocessing ********\n";
);
}
}
void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
@ -1540,3 +1577,701 @@ bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const {
bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
return N->getAlignment() >= N->getMemoryVT().getStoreSize();
}
////////////////////////////////////////////////////////////////////////////////
// Rebalancing of address calculation trees
static bool isOpcodeHandled(const SDNode *N) {
switch (N->getOpcode()) {
case ISD::ADD:
case ISD::MUL:
return true;
case ISD::SHL:
// We only handle constant shifts because these can be easily flattened
// into multiplications by 2^Op1.
return isa<ConstantSDNode>(N->getOperand(1).getNode());
default:
return false;
}
}
/// \brief Return the weight of an SDNode
int HexagonDAGToDAGISel::getWeight(SDNode *N) {
if (!isOpcodeHandled(N))
return 1;
assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
return RootWeights[N];
}
int HexagonDAGToDAGISel::getHeight(SDNode *N) {
if (!isOpcodeHandled(N))
return 0;
assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
"Cannot query height of unvisited/RAUW'd node!");
return RootHeights[N];
}
struct WeightedLeaf {
SDValue Value;
int Weight;
int InsertionOrder;
WeightedLeaf() : Value(SDValue()) { }
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
assert(Weight >= 0 && "Weight must be >= 0");
}
static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
assert(A.Value.getNode() && B.Value.getNode());
return A.Weight == B.Weight ?
(A.InsertionOrder > B.InsertionOrder) :
(A.Weight > B.Weight);
}
};
/// A specialized priority queue for WeigthedLeaves. It automatically folds
/// constants and allows removal of non-top elements while maintaining the
/// priority order.
class LeafPrioQueue {
SmallVector<WeightedLeaf, 8> Q;
bool HaveConst;
WeightedLeaf ConstElt;
unsigned Opcode;
public:
bool empty() {
return (!HaveConst && Q.empty());
}
size_t size() {
return Q.size() + HaveConst;
}
bool hasConst() {
return HaveConst;
}
const WeightedLeaf &top() {
if (HaveConst)
return ConstElt;
return Q.front();
}
WeightedLeaf pop() {
if (HaveConst) {
HaveConst = false;
return ConstElt;
}
std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
return Q.pop_back_val();
}
void push(WeightedLeaf L, bool SeparateConst=true) {
if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
if (Opcode == ISD::MUL &&
cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
return;
if (Opcode == ISD::ADD &&
cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
return;
HaveConst = true;
ConstElt = L;
} else {
Q.push_back(L);
std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
}
}
/// Push L to the bottom of the queue regardless of its weight. If L is
/// constant, it will not be folded with other constants in the queue.
void pushToBottom(WeightedLeaf L) {
L.Weight = 1000;
push(L, false);
}
/// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
/// lowest weight and remove it from the queue.
WeightedLeaf findSHL(uint64_t MaxAmount);
WeightedLeaf findMULbyConst();
LeafPrioQueue(unsigned Opcode) :
HaveConst(false), Opcode(Opcode) { }
};
WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
int ResultPos;
WeightedLeaf Result;
for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
const WeightedLeaf &L = Q[Pos];
const SDValue &Val = L.Value;
if (Val.getOpcode() != ISD::SHL ||
!isa<ConstantSDNode>(Val.getOperand(1)) ||
Val.getConstantOperandVal(1) > MaxAmount)
continue;
if (!Result.Value.getNode() || Result.Weight > L.Weight ||
(Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
{
Result = L;
ResultPos = Pos;
}
}
if (Result.Value.getNode()) {
Q.erase(&Q[ResultPos]);
std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
}
return Result;
}
WeightedLeaf LeafPrioQueue::findMULbyConst() {
int ResultPos;
WeightedLeaf Result;
for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
const WeightedLeaf &L = Q[Pos];
const SDValue &Val = L.Value;
if (Val.getOpcode() != ISD::MUL ||
!isa<ConstantSDNode>(Val.getOperand(1)) ||
Val.getConstantOperandVal(1) > 127)
continue;
if (!Result.Value.getNode() || Result.Weight > L.Weight ||
(Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
{
Result = L;
ResultPos = Pos;
}
}
if (Result.Value.getNode()) {
Q.erase(&Q[ResultPos]);
std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
}
return Result;
}
SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
uint64_t MulFactor = 1 << N->getConstantOperandVal(1);
return CurDAG->getConstant(MulFactor, SDLoc(N),
N->getOperand(1).getValueType());
}
/// @returns the value x for which 2^x is a factor of Val
static unsigned getPowerOf2Factor(SDValue Val) {
if (Val.getOpcode() == ISD::MUL) {
unsigned MaxFactor = 0;
for (int i=0; i < 2; ++i) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
if (!C)
continue;
const APInt &CInt = C->getAPIntValue();
if (CInt.getBoolValue())
MaxFactor = CInt.countTrailingZeros();
}
return MaxFactor;
}
if (Val.getOpcode() == ISD::SHL) {
if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
return 0;
return (unsigned) Val.getConstantOperandVal(1);
}
return 0;
}
/// @returns true if V>>Amount will eliminate V's operation on its child
static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
if (V.getOpcode() == ISD::MUL) {
SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
for (int i=0; i < 2; ++i)
if (isa<ConstantSDNode>(Ops[i].getNode()) &&
V.getConstantOperandVal(i) % ((uint64_t)1 << Amount) == 0) {
uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
return (NewConst == 1);
}
} else if (V.getOpcode() == ISD::SHL) {
return (Amount == V.getConstantOperandVal(1));
}
return false;
}
SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
if (V.getOpcode() == ISD::MUL) {
for (int i=0; i < 2; ++i) {
if (isa<ConstantSDNode>(Ops[i].getNode()) &&
V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
if (NewConst == 1)
return Ops[!i];
Ops[i] = CurDAG->getConstant(NewConst,
SDLoc(V), V.getValueType());
break;
}
}
} else if (V.getOpcode() == ISD::SHL) {
uint64_t ShiftAmount = V.getConstantOperandVal(1);
if (ShiftAmount == Power)
return Ops[0];
Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
SDLoc(V), V.getValueType());
}
return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
}
static bool isTargetConstant(const SDValue &V) {
return V.getOpcode() == HexagonISD::CONST32 ||
V.getOpcode() == HexagonISD::CONST32_GP;
}
unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
if (GAUsesInFunction.count(V))
return GAUsesInFunction[V];
unsigned Result = 0;
const Function *CurF = CurDAG->getMachineFunction().getFunction();
for (const User *U : V->users()) {
if (isa<Instruction>(U) &&
cast<Instruction>(U)->getParent()->getParent() == CurF)
++Result;
}
GAUsesInFunction[V] = Result;
return Result;
}
/// Note - After calling this, N may be dead. It may have been replaced by a
/// new node, so always use the returned value in place of N.
///
/// @returns The SDValue taking the place of N (which could be N if it is
/// unchanged)
SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
assert(RootWeights.count(N) && "Cannot balance non-root node.");
assert(RootWeights[N] != -2 && "This node was RAUW'd!");
assert(!TopLevel || N->getOpcode() == ISD::ADD);
// Return early if this node was already visited
if (RootWeights[N] != -1)
return SDValue(N, 0);
assert(isOpcodeHandled(N));
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Return early if the operands will remain unchanged or are all roots
if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
(!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
SDNode *Op0N = Op0.getNode();
int Weight;
if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
Weight = getWeight(balanceSubTree(Op0N).getNode());
// Weight = calculateWeight(Op0N);
} else
Weight = getWeight(Op0N);
SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
Weight += getWeight(balanceSubTree(Op1N).getNode());
// Weight += calculateWeight(Op1N);
} else
Weight += getWeight(Op1N);
RootWeights[N] = Weight;
RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
getHeight(N->getOperand(1).getNode())) + 1;
DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
<< " Height=" << RootHeights[N] << "): ");
DEBUG(N->dump());
return SDValue(N, 0);
}
DEBUG(dbgs() << "** Balancing root node: ");
DEBUG(N->dump());
unsigned NOpcode = N->getOpcode();
LeafPrioQueue Leaves(NOpcode);
SmallVector<SDValue, 4> Worklist;
Worklist.push_back(SDValue(N, 0));
// SHL nodes will be converted to MUL nodes
if (NOpcode == ISD::SHL)
NOpcode = ISD::MUL;
bool CanFactorize = false;
WeightedLeaf Mul1, Mul2;
unsigned MaxPowerOf2 = 0;
WeightedLeaf GA;
// Do not try to factor out a shift if there is already a shift at the tip of
// the tree.
bool HaveTopLevelShift = false;
if (TopLevel &&
((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
Op0.getConstantOperandVal(1) < 4) ||
(isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
Op1.getConstantOperandVal(1) < 4)))
HaveTopLevelShift = true;
// Flatten the subtree into an ordered list of leaves; at the same time
// determine whether the tree is already balanced.
int InsertionOrder = 0;
SmallDenseMap<SDValue, int> NodeHeights;
bool Imbalanced = false;
int CurrentWeight = 0;
while (!Worklist.empty()) {
SDValue Child = Worklist.pop_back_val();
if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
// CASE 1: Child is a root note
int Weight = RootWeights[Child.getNode()];
if (Weight == -1) {
Child = balanceSubTree(Child.getNode());
// calculateWeight(Child.getNode());
Weight = getWeight(Child.getNode());
} else if (Weight == -2) {
// Whoops, this node was RAUWd by one of the balanceSubTree calls we
// made. Our worklist isn't up to date anymore.
// Restart the whole process.
DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
return balanceSubTree(N, TopLevel);
}
NodeHeights[Child] = 1;
CurrentWeight += Weight;
unsigned PowerOf2;
if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
(Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
// Try to identify two factorizable MUL/SHL children greedily. Leave
// them out of the priority queue for now so we can deal with them
// after.
if (!Mul1.Value.getNode()) {
Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
MaxPowerOf2 = PowerOf2;
} else {
Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
// Our addressing modes can only shift by a maximum of 3
if (MaxPowerOf2 > 3)
MaxPowerOf2 = 3;
CanFactorize = true;
}
} else
Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
} else if (!isOpcodeHandled(Child.getNode())) {
// CASE 2: Child is an unhandled kind of node (e.g. constant)
int Weight = getWeight(Child.getNode());
NodeHeights[Child] = getHeight(Child.getNode());
CurrentWeight += Weight;
if (isTargetConstant(Child) && !GA.Value.getNode())
GA = WeightedLeaf(Child, Weight, InsertionOrder++);
else
Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
} else {
// CASE 3: Child is a subtree of same opcode
// Visit children first, then flatten.
unsigned ChildOpcode = Child.getOpcode();
assert(ChildOpcode == NOpcode ||
(NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
// Convert SHL to MUL
SDValue Op1;
if (ChildOpcode == ISD::SHL)
Op1 = getMultiplierForSHL(Child.getNode());
else
Op1 = Child->getOperand(1);
if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
assert(!NodeHeights.count(Child) && "Parent visited before children?");
// Visit children first, then re-visit this node
Worklist.push_back(Child);
Worklist.push_back(Op1);
Worklist.push_back(Child->getOperand(0));
} else {
// Back at this node after visiting the children
if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
Imbalanced = true;
NodeHeights[Child] = std::max(NodeHeights[Op1],
NodeHeights[Child->getOperand(0)]) + 1;
}
}
}
DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
<< " weight=" << CurrentWeight << " imbalanced="
<< Imbalanced << "\n");
// Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
// This factors out a shift in order to match memw(a<<Y+b).
if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
int Weight = Mul1.Weight + Mul2.Weight;
int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
Mul1Factored, Mul2Factored);
SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
Mul1.Value.getValueType());
SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
Sum, Const);
NodeHeights[New] = Height;
Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
} else if (Mul1.Value.getNode()) {
// We failed to factorize two MULs, so now the Muls are left outside the
// queue... add them back.
Leaves.push(Mul1);
if (Mul2.Value.getNode())
Leaves.push(Mul2);
CanFactorize = false;
}
// Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
// and the root node itself is not used more than twice. This reduces the
// amount of additional constant extenders introduced by this optimization.
bool CombinedGA = false;
if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
GA.Value.hasOneUse() && N->use_size() < 3) {
GlobalAddressSDNode *GANode =
cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
getTargetLowering()->isOffsetFoldingLegal(GANode)) {
DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
<< "): ");
DEBUG(GANode->dump());
SDValue NewTGA =
CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
GANode->getValueType(0),
GANode->getOffset() + (uint64_t)Offset->getSExtValue());
GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
GA.Value.getValueType(), NewTGA);
GA.Weight += Leaves.top().Weight;
NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
CombinedGA = true;
Leaves.pop(); // Remove the offset constant from the queue
}
}
if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
(RebalanceOnlyImbalancedTrees && !Imbalanced)) {
RootWeights[N] = CurrentWeight;
RootHeights[N] = NodeHeights[SDValue(N, 0)];
return SDValue(N, 0);
}
// Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
if (NOpcode == ISD::ADD && GA.Value.getNode()) {
WeightedLeaf SHL = Leaves.findSHL(31);
if (SHL.Value.getNode()) {
int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
GA.Value.getValueType(),
GA.Value, SHL.Value);
GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
NodeHeights[GA.Value] = Height;
}
}
if (GA.Value.getNode())
Leaves.push(GA);
// If this is the top level and we haven't factored out a shift, we should try
// to move a constant to the bottom to match addressing modes like memw(rX+C)
if (TopLevel && !CanFactorize && Leaves.hasConst()) {
DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
Leaves.pushToBottom(Leaves.pop());
}
// Rebuild the tree using Huffman's algorithm
while (Leaves.size() > 1) {
WeightedLeaf L0 = Leaves.pop();
// See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
// otherwise just get the next leaf
WeightedLeaf L1 = Leaves.findMULbyConst();
if (!L1.Value.getNode())
L1 = Leaves.pop();
assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
SDValue V0 = L0.Value;
int V0Weight = L0.Weight;
SDValue V1 = L1.Value;
int V1Weight = L1.Weight;
// Make sure that none of these nodes have been RAUW'd
if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
(RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
return balanceSubTree(N, TopLevel);
}
ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
EVT VT = N->getValueType(0);
SDValue NewNode;
if (V0C && !V1C) {
std::swap(V0, V1);
std::swap(V0C, V1C);
}
// Calculate height of this node
assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
"Children must have been visited before re-combining them!");
int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
// Rebuild this node (and restore SHL from MUL if needed)
if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
NewNode = CurDAG->getNode(
ISD::SHL, SDLoc(V0), VT, V0,
CurDAG->getConstant(
V1C->getAPIntValue().logBase2(), SDLoc(N),
getTargetLowering()->getScalarShiftAmountTy(CurDAG->getDataLayout(), V0.getValueType())));
else
NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
NodeHeights[NewNode] = Height;
int Weight = V0Weight + V1Weight;
Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
<< Height << "):\n");
DEBUG(NewNode.dump());
}
assert(Leaves.size() == 1);
SDValue NewRoot = Leaves.top().Value;
assert(NodeHeights.count(NewRoot));
int Height = NodeHeights[NewRoot];
// Restore SHL if we earlier converted it to a MUL
if (NewRoot.getOpcode() == ISD::MUL) {
ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
if (V1C && V1C->getAPIntValue().isPowerOf2()) {
EVT VT = NewRoot.getValueType();
SDValue V0 = NewRoot.getOperand(0);
NewRoot = CurDAG->getNode(
ISD::SHL, SDLoc(NewRoot), VT, V0,
CurDAG->getConstant(V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
getTargetLowering()->getScalarShiftAmountTy(
CurDAG->getDataLayout(), V0.getValueType())));
}
}
if (N != NewRoot.getNode()) {
DEBUG(dbgs() << "--> Root is now: ");
DEBUG(NewRoot.dump());
// Replace all uses of old root by new root
CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
// Mark that we have RAUW'd N
RootWeights[N] = -2;
} else {
DEBUG(dbgs() << "--> Root unchanged.\n");
}
RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
RootHeights[NewRoot.getNode()] = Height;
return NewRoot;
}
void HexagonDAGToDAGISel::rebalanceAddressTrees() {
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E;) {
SDNode *N = &*I++;
if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
continue;
SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
if (BasePtr.getOpcode() != ISD::ADD)
continue;
// We've already processed this node
if (RootWeights.count(BasePtr.getNode()))
continue;
DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
DEBUG(N->dump());
// FindRoots
SmallVector<SDNode *, 4> Worklist;
Worklist.push_back(BasePtr.getOperand(0).getNode());
Worklist.push_back(BasePtr.getOperand(1).getNode());
while (!Worklist.empty()) {
SDNode *N = Worklist.pop_back_val();
unsigned Opcode = N->getOpcode();
if (!isOpcodeHandled(N))
continue;
Worklist.push_back(N->getOperand(0).getNode());
Worklist.push_back(N->getOperand(1).getNode());
// Not a root if it has only one use and same opcode as its parent
if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
continue;
// This root node has already been processed
if (RootWeights.count(N))
continue;
RootWeights[N] = -1;
}
// Balance node itself
RootWeights[BasePtr.getNode()] = -1;
SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
if (N->getOpcode() == ISD::LOAD)
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
NewBasePtr, N->getOperand(2));
else
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
NewBasePtr, N->getOperand(3));
DEBUG(dbgs() << "--> Final node: ");
DEBUG(N->dump());
}
CurDAG->RemoveDeadNodes();
GAUsesInFunction.clear();
RootHeights.clear();
RootWeights.clear();
}

View File

@ -0,0 +1,54 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
;
; Test whether we can produce minimal code for this complex address
; calculation.
;
; CHECK: r0 = memub(r{{[0-9]+}}<<#3{{ *}}+{{ *}}##the_global+516)
%0 = type { [3 x %1] }
%1 = type { %2, i8, i8, i8, i8, i8, [4 x i8], i8, [10 x i8], [10 x i8], [10 x i8], i8, [3 x %4], i16, i16, i16, i16, i32, i8, [4 x i8], i8, i8, i8, i8, %5, i8, i8, i8, i8, i8, i16, i8, i8, i8, i16, i16, i8, i8, [2 x i8], [2 x i8], i8, i8, i8, i8, i8, i16, i16, i8, i8, i8, i8, i8, i8, %9, i8, [6 x [2 x i8]], i16, i32, %10, [28 x i8], [4 x %17] }
%2 = type { %3 }
%3 = type { i8, i8, i8, i8, i8, i16, i16, i16, i16, i16 }
%4 = type { i16, i16 }
%5 = type { [10 x %6] }
%6 = type { [2 x %7] }
%7 = type { i8, [2 x %8] }
%8 = type { [4 x i8] }
%9 = type { i8 }
%10 = type { %11, %13 }
%11 = type { [2 x [2 x i8]], [2 x [8 x %12]], [6 x i16], [6 x i16] }
%12 = type { i8, i8 }
%13 = type { [4 x %12], [4 x %12], [2 x [4 x %14]], [6 x i16] }
%14 = type { %15, %16 }
%15 = type { i8, i8 }
%16 = type { i8, i8 }
%17 = type { i8, i8, %1*, i16, i16, i16, i64, i32, i32, %18, i8, %21, i8, [2 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i16, i16, [2 x i16], i16, [2 x i32], [2 x i16], [2 x i16], i8, i8, [6 x %23], i8, i8, i8, %24, %25, %26, %28 }
%18 = type { %19, [10 x %20] }
%19 = type { i32 }
%20 = type { [2 x i8], [2 x i8], i8, i8, i8, i8 }
%21 = type { i8, i8, i8, [8 x %22] }
%22 = type { i8, i8, i8, i32 }
%23 = type { i32, i16, i16, [2 x i16], [2 x i16], [2 x i16], i32 }
%24 = type { [2 x i32], [2 x i64*], [2 x i64*], [2 x i64*], [2 x i32], [2 x i32], i32 }
%25 = type { [2 x i32], [2 x i32], [2 x i32] }
%26 = type { i8, i8, i8, i16, i16, %27, i32, i32, i32, i16 }
%27 = type { i64 }
%28 = type { %29, %31, [24 x i8] }
%29 = type { [2 x %30], [16 x i32] }
%30 = type { [16 x i32], [8 x i32], [16 x i32], [64 x i32], [2 x i32], i64, i32, i32, i32, i32 }
%31 = type { [2 x %32] }
%32 = type { [4 x %33], i32 }
%33 = type { i32, i32 }
@the_global = external global %0
; Function Attrs: nounwind optsize readonly ssp
define zeroext i8 @myFun(i8 zeroext, i8 zeroext) {
%3 = zext i8 %1 to i32
%4 = zext i8 %0 to i32
%5 = getelementptr inbounds %0, %0* @the_global, i32 0, i32 0, i32 %4, i32 60, i32 0, i32 9, i32 1, i32 %3, i32 0, i32 0
%6 = load i8, i8* %5, align 4
ret i8 %6
}