//===- HexagonStoreWidening.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Replace sequences of "narrow" stores to adjacent memory locations with // a fewer "wide" stores that have the same effect. // For example, replace: // S4_storeirb_io %100, 0, 0 ; store-immediate-byte // S4_storeirb_io %100, 1, 0 ; store-immediate-byte // with // S4_storeirh_io %100, 0, 0 ; store-immediate-halfword // The above is the general idea. The actual cases handled by the code // may be a bit more complex. // The purpose of this pass is to reduce the number of outstanding stores, // or as one could say, "reduce store queue pressure". Also, wide stores // mean fewer stores, and since there are only two memory instructions allowed // per packet, it also means fewer packets, and ultimately fewer cycles. //===---------------------------------------------------------------------===// #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #define DEBUG_TYPE "hexagon-widen-stores" using namespace llvm; namespace llvm { FunctionPass *createHexagonStoreWidening(); void initializeHexagonStoreWideningPass(PassRegistry&); } // end namespace llvm namespace { struct HexagonStoreWidening : public MachineFunctionPass { const HexagonInstrInfo *TII; const HexagonRegisterInfo *TRI; const MachineRegisterInfo *MRI; AliasAnalysis *AA; MachineFunction *MF; public: static char ID; HexagonStoreWidening() : MachineFunctionPass(ID) { initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "Hexagon Store Widening"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } static bool handledStoreType(const MachineInstr *MI); private: static const int MaxWideSize = 4; using InstrGroup = std::vector; using InstrGroupList = std::vector; bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group); void createStoreGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups); bool processBasicBlock(MachineBasicBlock &MBB); bool processStoreGroup(InstrGroup &Group); bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); bool replaceStores(InstrGroup &OG, InstrGroup &NG); bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); }; } // end anonymous namespace char HexagonStoreWidening::ID = 0; INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", "Hexason Store Widening", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", "Hexagon Store Widening", false, false) // Some local helper functions... static unsigned getBaseAddressRegister(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(0); assert(MO.isReg() && "Expecting register operand"); return MO.getReg(); } static int64_t getStoreOffset(const MachineInstr *MI) { unsigned OpC = MI->getOpcode(); assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); switch (OpC) { case Hexagon::S4_storeirb_io: case Hexagon::S4_storeirh_io: case Hexagon::S4_storeiri_io: { const MachineOperand &MO = MI->getOperand(1); assert(MO.isImm() && "Expecting immediate offset"); return MO.getImm(); } } dbgs() << *MI; llvm_unreachable("Store offset calculation missing for a handled opcode"); return 0; } static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { assert(!MI->memoperands_empty() && "Expecting memory operands"); return **MI->memoperands_begin(); } // Filtering function: any stores whose opcodes are not "approved" of by // this function will not be subjected to widening. inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { // For now, only handle stores of immediate values. // Also, reject stores to stack slots. unsigned Opc = MI->getOpcode(); switch (Opc) { case Hexagon::S4_storeirb_io: case Hexagon::S4_storeirh_io: case Hexagon::S4_storeiri_io: // Base address must be a register. (Implement FI later.) return MI->getOperand(0).isReg(); default: return false; } } // Check if the machine memory operand MMO is aliased with any of the // stores in the store group Stores. bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO) { if (!MMO.getValue()) return true; MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); for (auto SI : Stores) { const MachineMemOperand &SMO = getStoreTarget(SI); if (!SMO.getValue()) return true; MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); if (!AA->isNoAlias(L, SL)) return true; } return false; } // Check if the machine instruction MI accesses any storage aliased with // any store in the group Stores. bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, const MachineInstr *MI) { for (auto &I : MI->memoperands()) if (instrAliased(Stores, *I)) return true; return false; } // Inspect a machine basic block, and generate store groups out of stores // encountered in the block. // // A store group is a group of stores that use the same base register, // and which can be reordered within that group without altering the // semantics of the program. A single store group could be widened as // a whole, if there existed a single store instruction with the same // semantics as the entire group. In many cases, a single store group // may need more than one wide store. void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups) { InstrGroup AllInsns; // Copy all instruction pointers from the basic block to a temporary // list. This will allow operating on the list, and modifying its // elements without affecting the basic block. for (auto &I : MBB) AllInsns.push_back(&I); // Traverse all instructions in the AllInsns list, and if we encounter // a store, then try to create a store group starting at that instruction // i.e. a sequence of independent stores that can be widened. for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { MachineInstr *MI = *I; // Skip null pointers (processed instructions). if (!MI || !handledStoreType(MI)) continue; // Found a store. Try to create a store group. InstrGroup G; createStoreGroup(MI, I+1, E, G); if (G.size() > 1) StoreGroups.push_back(G); } } // Create a single store group. The stores need to be independent between // themselves, and also there cannot be other instructions between them // that could read or modify storage being stored into. void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { assert(handledStoreType(BaseStore) && "Unexpected instruction"); unsigned BaseReg = getBaseAddressRegister(BaseStore); InstrGroup Other; Group.push_back(BaseStore); for (auto I = Begin; I != End; ++I) { MachineInstr *MI = *I; if (!MI) continue; if (handledStoreType(MI)) { // If this store instruction is aliased with anything already in the // group, terminate the group now. if (instrAliased(Group, getStoreTarget(MI))) return; // If this store is aliased to any of the memory instructions we have // seen so far (that are not a part of this group), terminate the group. if (instrAliased(Other, getStoreTarget(MI))) return; unsigned BR = getBaseAddressRegister(MI); if (BR == BaseReg) { Group.push_back(MI); *I = nullptr; continue; } } // Assume calls are aliased to everything. if (MI->isCall() || MI->hasUnmodeledSideEffects()) return; if (MI->mayLoadOrStore()) { if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) return; Other.push_back(MI); } } // for } // Check if store instructions S1 and S2 are adjacent. More precisely, // S2 has to access memory immediately following that accessed by S1. bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2) { if (!handledStoreType(S1) || !handledStoreType(S2)) return false; const MachineMemOperand &S1MO = getStoreTarget(S1); // Currently only handling immediate stores. int Off1 = S1->getOperand(1).getImm(); int Off2 = S2->getOperand(1).getImm(); return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) : int(Off1+S1MO.getSize()) == Off2; } /// Given a sequence of adjacent stores, and a maximum size of a single wide /// store, pick a group of stores that can be replaced by a single store /// of size not exceeding MaxSize. The selected sequence will be recorded /// in OG ("old group" of instructions). /// OG should be empty on entry, and should be left empty if the function /// fails. bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize) { assert(Begin != End && "No instructions to analyze"); assert(OG.empty() && "Old group not empty on entry"); if (std::distance(Begin, End) <= 1) return false; MachineInstr *FirstMI = *Begin; assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); unsigned Alignment = FirstMMO.getAlign().value(); unsigned SizeAccum = FirstMMO.getSize(); unsigned FirstOffset = getStoreOffset(FirstMI); // The initial value of SizeAccum should always be a power of 2. assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); // If the size of the first store equals to or exceeds the limit, do nothing. if (SizeAccum >= MaxSize) return false; // If the size of the first store is greater than or equal to the address // stored to, then the store cannot be made any wider. if (SizeAccum >= Alignment) return false; // The offset of a store will put restrictions on how wide the store can be. // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. // If the first store already exhausts the offset limits, quit. Test this // by checking if the next wider size would exceed the limit. if ((2*SizeAccum-1) & FirstOffset) return false; OG.push_back(FirstMI); MachineInstr *S1 = FirstMI; // Pow2Num will be the largest number of elements in OG such that the sum // of sizes of stores 0...Pow2Num-1 will be a power of 2. unsigned Pow2Num = 1; unsigned Pow2Size = SizeAccum; // Be greedy: keep accumulating stores as long as they are to adjacent // memory locations, and as long as the total number of bytes stored // does not exceed the limit (MaxSize). // Keep track of when the total size covered is a power of 2, since // this is a size a single store can cover. for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { MachineInstr *S2 = *I; // Stores are sorted, so if S1 and S2 are not adjacent, there won't be // any other store to fill the "hole". if (!storesAreAdjacent(S1, S2)) break; unsigned S2Size = getStoreTarget(S2).getSize(); if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) break; OG.push_back(S2); SizeAccum += S2Size; if (isPowerOf2_32(SizeAccum)) { Pow2Num = OG.size(); Pow2Size = SizeAccum; } if ((2*Pow2Size-1) & FirstOffset) break; S1 = S2; } // The stores don't add up to anything that can be widened. Clean up. if (Pow2Num <= 1) { OG.clear(); return false; } // Only leave the stored being widened. OG.resize(Pow2Num); TotalSize = Pow2Size; return true; } /// Given an "old group" OG of stores, create a "new group" NG of instructions /// to replace them. Ideally, NG would only have a single instruction in it, /// but that may only be possible for store-immediate. bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize) { // XXX Current limitations: // - only expect stores of immediate values in OG, // - only handle a TotalSize of up to 4. if (TotalSize > 4) return false; unsigned Acc = 0; // Value accumulator. unsigned Shift = 0; for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { MachineInstr *MI = *I; const MachineMemOperand &MMO = getStoreTarget(MI); MachineOperand &SO = MI->getOperand(2); // Source. assert(SO.isImm() && "Expecting an immediate operand"); unsigned NBits = MMO.getSize()*8; unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); unsigned Val = (SO.getImm() & Mask) << Shift; Acc |= Val; Shift += NBits; } MachineInstr *FirstSt = OG.front(); DebugLoc DL = OG.back()->getDebugLoc(); const MachineMemOperand &OldM = getStoreTarget(FirstSt); MachineMemOperand *NewM = MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), TotalSize, OldM.getAlign(), OldM.getAAInfo()); if (Acc < 0x10000) { // Create mem[hw] = #Acc unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; assert(WOpc && "Unexpected size"); int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); const MCInstrDesc &StD = TII->get(WOpc); MachineOperand &MR = FirstSt->getOperand(0); int64_t Off = FirstSt->getOperand(1).getImm(); MachineInstr *StI = BuildMI(*MF, DL, StD) .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) .addImm(Off) .addImm(Val); StI->addMemOperand(*MF, NewM); NG.push_back(StI); } else { // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) .addImm(int(Acc)); NG.push_back(TfrI); unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; assert(WOpc && "Unexpected size"); const MCInstrDesc &StD = TII->get(WOpc); MachineOperand &MR = FirstSt->getOperand(0); int64_t Off = FirstSt->getOperand(1).getImm(); MachineInstr *StI = BuildMI(*MF, DL, StD) .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) .addImm(Off) .addReg(VReg, RegState::Kill); StI->addMemOperand(*MF, NewM); NG.push_back(StI); } return true; } // Replace instructions from the old group OG with instructions from the // new group NG. Conceptually, remove all instructions in OG, and then // insert all instructions in NG, starting at where the first instruction // from OG was (in the order in which they appeared in the basic block). // (The ordering in OG does not have to match the order in the basic block.) bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { LLVM_DEBUG({ dbgs() << "Replacing:\n"; for (auto I : OG) dbgs() << " " << *I; dbgs() << "with\n"; for (auto I : NG) dbgs() << " " << *I; }); MachineBasicBlock *MBB = OG.back()->getParent(); MachineBasicBlock::iterator InsertAt = MBB->end(); // Need to establish the insertion point. The best one is right before // the first store in the OG, but in the order in which the stores occur // in the program list. Since the ordering in OG does not correspond // to the order in the program list, we need to do some work to find // the insertion point. // Create a set of all instructions in OG (for quick lookup). SmallPtrSet InstrSet; for (auto I : OG) InstrSet.insert(I); // Traverse the block, until we hit an instruction from OG. for (auto &I : *MBB) { if (InstrSet.count(&I)) { InsertAt = I; break; } } assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); bool AtBBStart = false; // InsertAt points at the first instruction that will be removed. We need // to move it out of the way, so it remains valid after removing all the // old stores, and so we are able to recover it back to the proper insertion // position. if (InsertAt != MBB->begin()) --InsertAt; else AtBBStart = true; for (auto I : OG) I->eraseFromParent(); if (!AtBBStart) ++InsertAt; else InsertAt = MBB->begin(); for (auto I : NG) MBB->insert(InsertAt, I); return true; } // Break up the group into smaller groups, each of which can be replaced by // a single wide store. Widen each such smaller group and replace the old // instructions with the widened ones. bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { bool Changed = false; InstrGroup::iterator I = Group.begin(), E = Group.end(); InstrGroup OG, NG; // Old and new groups. unsigned CollectedSize; while (I != E) { OG.clear(); NG.clear(); bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && createWideStores(OG, NG, CollectedSize) && replaceStores(OG, NG); if (!Succ) continue; assert(OG.size() > 1 && "Created invalid group"); assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); I += OG.size()-1; Changed = true; } return Changed; } // Process a single basic block: create the store groups, and replace them // with the widened stores, if possible. Processing of each basic block // is independent from processing of any other basic block. This transfor- // mation could be stopped after having processed any basic block without // any ill effects (other than not having performed widening in the unpro- // cessed blocks). Also, the basic blocks can be processed in any order. bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { InstrGroupList SGs; bool Changed = false; createStoreGroups(MBB, SGs); auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { return getStoreOffset(A) < getStoreOffset(B); }; for (auto &G : SGs) { assert(G.size() > 1 && "Store group with fewer than 2 elements"); llvm::sort(G, Less); Changed |= processStoreGroup(G); } return Changed; } bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { if (skipFunction(MFn.getFunction())) return false; MF = &MFn; auto &ST = MFn.getSubtarget(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); MRI = &MFn.getRegInfo(); AA = &getAnalysis().getAAResults(); bool Changed = false; for (auto &B : MFn) Changed |= processBasicBlock(B); return Changed; } FunctionPass *llvm::createHexagonStoreWidening() { return new HexagonStoreWidening(); }