1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[Hexagon] Optimize stack slot spills

Replace spills to memory with spills to registers, if possible. This
applies mostly to predicate registers (both scalar and vector), since
they are very limited in number. A spill of a predicate register may
happen even if there is a general-purpose register available. In cases
like this the stack spill/reload may be eliminated completely.

This optimization will consider all stack objects, regardless of where
they came from and try to match the live range of the stack slot with
a dead range of a register from an appropriate register class.

llvm-svn: 260758
This commit is contained in:
Krzysztof Parzyszek 2016-02-12 22:53:35 +00:00
parent 85466f02fc
commit 702277f07f
10 changed files with 1442 additions and 6 deletions

View File

@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonBitSimplify.cpp
HexagonBitTracker.cpp
HexagonBlockRanges.cpp
HexagonCFGOptimizer.cpp
HexagonCommonGEP.cpp
HexagonCopyToCombine.cpp

View File

@ -0,0 +1,484 @@
//===--- HexagonBlockRanges.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hbr"
#include "HexagonBlockRanges.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <vector>
using namespace llvm;
bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const {
// If A contains start(), or "this" contains A.start(), then overlap.
IndexType S = start(), E = end(), AS = A.start(), AE = A.end();
if (AS == S)
return true;
bool SbAE = (S < AE) || (S == AE && A.TiedEnd); // S-before-AE.
bool ASbE = (AS < E) || (AS == E && TiedEnd); // AS-before-E.
if ((AS < S && SbAE) || (S < AS && ASbE))
return true;
// Otherwise no overlap.
return false;
}
bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const {
if (start() <= A.start()) {
// Treat "None" in the range end as equal to the range start.
IndexType E = (end() != IndexType::None) ? end() : start();
IndexType AE = (A.end() != IndexType::None) ? A.end() : A.start();
if (AE <= E)
return true;
}
return false;
}
void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) {
// Allow merging adjacent ranges.
assert(end() == A.start() || overlaps(A));
IndexType AS = A.start(), AE = A.end();
if (AS < start() || start() == IndexType::None)
setStart(AS);
if (end() < AE || end() == IndexType::None) {
setEnd(AE);
TiedEnd = A.TiedEnd;
} else {
if (end() == AE)
TiedEnd |= A.TiedEnd;
}
if (A.Fixed)
Fixed = true;
}
void HexagonBlockRanges::RangeList::include(const RangeList &RL) {
for (auto &R : RL)
if (std::find(begin(), end(), R) == end())
push_back(R);
}
// Merge all overlapping ranges in the list, so that all that remains
// is a list of disjoint ranges.
void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
if (empty())
return;
std::sort(begin(), end());
iterator Iter = begin();
while (Iter != end()-1) {
iterator Next = std::next(Iter);
// If MergeAdjacent is true, merge ranges A and B, where A.end == B.start.
// This allows merging dead ranges, but is not valid for live ranges.
bool Merge = MergeAdjacent && (Iter->end() == Next->start());
if (Merge || Iter->overlaps(*Next)) {
Iter->merge(*Next);
erase(Next);
continue;
}
++Iter;
}
}
// Compute a range A-B and add it to the list.
void HexagonBlockRanges::RangeList::addsub(const IndexRange &A,
const IndexRange &B) {
// Exclusion of non-overlapping ranges makes some checks simpler
// later in this function.
if (!A.overlaps(B)) {
// A - B = A.
add(A);
return;
}
IndexType AS = A.start(), AE = A.end();
IndexType BS = B.start(), BE = B.end();
// If AE is None, then A is included in B, since A and B overlap.
// The result of subtraction if empty, so just return.
if (AE == IndexType::None)
return;
if (AS < BS) {
// A starts before B.
// AE cannot be None since A and B overlap.
assert(AE != IndexType::None);
// Add the part of A that extends on the "less" side of B.
add(AS, BS, A.Fixed, false);
}
if (BE < AE) {
// BE cannot be Exit here.
if (BE == IndexType::None)
add(BS, AE, A.Fixed, false);
else
add(BE, AE, A.Fixed, false);
}
}
// Subtract a given range from each element in the list.
void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) {
// Cannot assume that the list is unionized (i.e. contains only non-
// overlapping ranges.
RangeList T;
for (iterator Next, I = begin(); I != end(); I = Next) {
IndexRange &Rg = *I;
if (Rg.overlaps(Range)) {
T.addsub(Rg, Range);
Next = this->erase(I);
} else {
Next = std::next(I);
}
}
include(T);
}
HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
: Block(B) {
IndexType Idx = IndexType::First;
First = Idx;
for (auto &In : B) {
if (In.isDebugValue())
continue;
assert(getIndex(&In) == IndexType::None && "Instruction already in map");
Map.insert(std::make_pair(Idx, &In));
++Idx;
}
Last = B.empty() ? IndexType::None : unsigned(Idx)-1;
}
MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const {
auto F = Map.find(Idx);
return (F != Map.end()) ? F->second : 0;
}
HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex(
MachineInstr *MI) const {
for (auto &I : Map)
if (I.second == MI)
return I.first;
return IndexType::None;
}
HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex(
IndexType Idx) const {
assert (Idx != IndexType::None);
if (Idx == IndexType::Entry)
return IndexType::None;
if (Idx == IndexType::Exit)
return Last;
if (Idx == First)
return IndexType::Entry;
return unsigned(Idx)-1;
}
HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex(
IndexType Idx) const {
assert (Idx != IndexType::None);
if (Idx == IndexType::Entry)
return IndexType::First;
if (Idx == IndexType::Exit || Idx == Last)
return IndexType::None;
return unsigned(Idx)+1;
}
void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI,
MachineInstr *NewMI) {
for (auto &I : Map) {
if (I.second != OldMI)
continue;
if (NewMI != nullptr)
I.second = NewMI;
else
Map.erase(I.first);
break;
}
}
HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
: MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()),
TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
Reserved(TRI.getReservedRegs(mf)) {
// Consider all non-allocatable registers as reserved.
for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) {
auto *RC = *I;
if (RC->isAllocatable())
continue;
for (unsigned R : *RC)
Reserved[R] = true;
}
}
HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
const MachineBasicBlock &B) {
RegisterSet LiveIns;
for (auto I : B.liveins())
if (!Reserved[I.PhysReg])
LiveIns.insert({I.PhysReg, 0});
return LiveIns;
}
HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
RegisterRef R, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) {
RegisterSet SRs;
if (R.Sub != 0) {
SRs.insert(R);
return SRs;
}
if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) {
MCSubRegIterator I(R.Reg, &TRI);
if (!I.isValid())
SRs.insert({R.Reg, 0});
for (; I.isValid(); ++I)
SRs.insert({*I, 0});
} else {
assert(TargetRegisterInfo::isVirtualRegister(R.Reg));
auto &RC = *MRI.getRegClass(R.Reg);
unsigned PReg = *RC.begin();
MCSubRegIndexIterator I(PReg, &TRI);
if (!I.isValid())
SRs.insert({R.Reg, 0});
for (; I.isValid(); ++I)
SRs.insert({R.Reg, I.getSubRegIndex()});
}
return SRs;
}
void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
RegToRangeMap &LiveMap) {
std::map<RegisterRef,IndexType> LastDef, LastUse;
RegisterSet LiveOnEntry;
MachineBasicBlock &B = IndexMap.getBlock();
MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
for (auto R : getLiveIns(B))
for (auto S : expandToSubRegs(R, MRI, TRI))
LiveOnEntry.insert(S);
for (auto R : LiveOnEntry)
LastDef[R] = IndexType::Entry;
auto closeRange = [&LastUse,&LastDef,&LiveMap] (RegisterRef R) -> void {
auto LD = LastDef[R], LU = LastUse[R];
if (LD == IndexType::None)
LD = IndexType::Entry;
if (LU == IndexType::None)
LU = IndexType::Exit;
LiveMap[R].add(LD, LU, false, false);
LastUse[R] = LastDef[R] = IndexType::None;
};
for (auto &In : B) {
if (In.isDebugValue())
continue;
IndexType Index = IndexMap.getIndex(&In);
// Process uses first.
for (auto &Op : In.operands()) {
if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
continue;
bool IsKill = Op.isKill();
for (auto S : expandToSubRegs(R, MRI, TRI)) {
LastUse[S] = Index;
if (IsKill)
closeRange(S);
}
}
// Process defs.
for (auto &Op : In.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.isUndef())
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
continue;
for (auto S : expandToSubRegs(R, MRI, TRI)) {
if (LastDef[S] != IndexType::None)
closeRange(S);
LastDef[S] = Index;
}
}
}
// Collect live-on-exit.
RegisterSet LiveOnExit;
for (auto *SB : B.successors())
for (auto R : getLiveIns(*SB))
for (auto S : expandToSubRegs(R, MRI, TRI))
LiveOnExit.insert(S);
for (auto R : LiveOnExit)
LastUse[R] = IndexType::Exit;
// Process remaining registers.
RegisterSet Left;
for (auto &I : LastUse)
if (I.second != IndexType::None)
Left.insert(I.first);
for (auto &I : LastDef)
if (I.second != IndexType::None)
Left.insert(I.first);
for (auto R : Left)
closeRange(R);
// Finalize the live ranges.
for (auto &P : LiveMap)
P.second.unionize();
}
HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap(
InstrIndexMap &IndexMap) {
RegToRangeMap LiveMap;
DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
computeInitialLiveRanges(IndexMap, LiveMap);
DEBUG(dbgs() << __func__ << ": live map\n"
<< PrintRangeMap(LiveMap, TRI) << '\n');
return LiveMap;
}
HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) {
RegToRangeMap DeadMap;
auto addDeadRanges = [&IndexMap,&LiveMap,&DeadMap] (RegisterRef R) -> void {
auto F = LiveMap.find(R);
if (F == LiveMap.end() || F->second.empty()) {
DeadMap[R].add(IndexType::Entry, IndexType::Exit, false, false);
return;
}
RangeList &RL = F->second;
RangeList::iterator A = RL.begin(), Z = RL.end()-1;
// Try to create the initial range.
if (A->start() != IndexType::Entry) {
IndexType DE = IndexMap.getPrevIndex(A->start());
if (DE != IndexType::Entry)
DeadMap[R].add(IndexType::Entry, DE, false, false);
}
while (A != Z) {
// Creating a dead range that follows A. Pay attention to empty
// ranges (i.e. those ending with "None").
IndexType AE = (A->end() == IndexType::None) ? A->start() : A->end();
IndexType DS = IndexMap.getNextIndex(AE);
++A;
IndexType DE = IndexMap.getPrevIndex(A->start());
if (DS < DE)
DeadMap[R].add(DS, DE, false, false);
}
// Try to create the final range.
if (Z->end() != IndexType::Exit) {
IndexType ZE = (Z->end() == IndexType::None) ? Z->start() : Z->end();
IndexType DS = IndexMap.getNextIndex(ZE);
if (DS < IndexType::Exit)
DeadMap[R].add(DS, IndexType::Exit, false, false);
}
};
MachineFunction &MF = *IndexMap.getBlock().getParent();
auto &MRI = MF.getRegInfo();
unsigned NumRegs = TRI.getNumRegs();
BitVector Visited(NumRegs);
for (unsigned R = 1; R < NumRegs; ++R) {
for (auto S : expandToSubRegs({R,0}, MRI, TRI)) {
if (Reserved[S.Reg] || Visited[S.Reg])
continue;
addDeadRanges(S);
Visited[S.Reg] = true;
}
}
for (auto &P : LiveMap)
if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
addDeadRanges(P.first);
DEBUG(dbgs() << __func__ << ": dead map\n"
<< PrintRangeMap(DeadMap, TRI) << '\n');
return DeadMap;
}
raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx) {
if (Idx == HexagonBlockRanges::IndexType::None)
return OS << '-';
if (Idx == HexagonBlockRanges::IndexType::Entry)
return OS << 'n';
if (Idx == HexagonBlockRanges::IndexType::Exit)
return OS << 'x';
return OS << unsigned(Idx)-HexagonBlockRanges::IndexType::First+1;
}
// A mapping to translate between instructions and their indices.
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::IndexRange &IR) {
OS << '[' << IR.start() << ':' << IR.end() << (IR.TiedEnd ? '}' : ']');
if (IR.Fixed)
OS << '!';
return OS;
}
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::RangeList &RL) {
for (auto &R : RL)
OS << R << " ";
return OS;
}
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::InstrIndexMap &M) {
for (auto &In : M.Block) {
HexagonBlockRanges::IndexType Idx = M.getIndex(&In);
OS << Idx << (Idx == M.Last ? ". " : " ") << In;
}
return OS;
}
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::PrintRangeMap &P) {
for (auto &I : P.Map) {
const HexagonBlockRanges::RangeList &RL = I.second;
OS << PrintReg(I.first.Reg, &P.TRI, I.first.Sub) << " -> " << RL << "\n";
}
return OS;
}

View File

@ -0,0 +1,240 @@
//===--- HexagonBlockRanges.h ---------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef HEXAGON_BLOCK_RANGES_H
#define HEXAGON_BLOCK_RANGES_H
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/MC/MCRegisterInfo.h" // For MCPhysReg.
#include <map>
#include <set>
#include <vector>
namespace llvm {
class Function;
class HexagonSubtarget;
class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
class MCInstrDesc;
class raw_ostream;
class TargetInstrInfo;
class TargetRegisterClass;
class TargetRegisterInfo;
class Type;
}
using namespace llvm;
struct HexagonBlockRanges {
HexagonBlockRanges(MachineFunction &MF);
struct RegisterRef {
unsigned Reg, Sub;
bool operator<(RegisterRef R) const {
return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
}
};
typedef std::set<RegisterRef> RegisterSet;
// This is to represent an "index", which is an abstraction of a position
// of an instruction within a basic block.
class IndexType {
public:
enum : unsigned {
None = 0,
Entry = 1,
Exit = 2,
First = 11 // 10th + 1st
};
static bool isInstr(IndexType X) { return X.Index >= First; }
IndexType() : Index(None) {}
IndexType(unsigned Idx) : Index(Idx) {}
operator unsigned() const;
bool operator== (unsigned x) const;
bool operator== (IndexType Idx) const;
bool operator!= (unsigned x) const;
bool operator!= (IndexType Idx) const;
IndexType operator++ ();
bool operator< (unsigned Idx) const;
bool operator< (IndexType Idx) const;
bool operator<= (IndexType Idx) const;
private:
bool operator> (IndexType Idx) const;
bool operator>= (IndexType Idx) const;
unsigned Index;
};
// A range of indices, essentially a representation of a live range.
// This is also used to represent "dead ranges", i.e. ranges where a
// register is dead.
class IndexRange : public std::pair<IndexType,IndexType> {
public:
IndexRange() : Fixed(false), TiedEnd(false) {}
IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false)
: std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {}
IndexType start() const { return first; }
IndexType end() const { return second; }
bool operator< (const IndexRange &A) const {
return start() < A.start();
}
bool overlaps(const IndexRange &A) const;
bool contains(const IndexRange &A) const;
void merge(const IndexRange &A);
bool Fixed; // Can be renamed? "Fixed" means "no".
bool TiedEnd; // The end is not a use, but a dead def tied to a use.
private:
void setStart(const IndexType &S) { first = S; }
void setEnd(const IndexType &E) { second = E; }
};
// A list of index ranges. This represents liveness of a register
// in a basic block.
class RangeList : public std::vector<IndexRange> {
public:
void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd) {
push_back(IndexRange(Start, End, Fixed, TiedEnd));
}
void add(const IndexRange &Range) {
push_back(Range);
}
void include(const RangeList &RL);
void unionize(bool MergeAdjacent = false);
void subtract(const IndexRange &Range);
private:
void addsub(const IndexRange &A, const IndexRange &B);
};
class InstrIndexMap {
public:
InstrIndexMap(MachineBasicBlock &B);
MachineInstr *getInstr(IndexType Idx) const;
IndexType getIndex(MachineInstr *MI) const;
MachineBasicBlock &getBlock() const { return Block; }
IndexType getPrevIndex(IndexType Idx) const;
IndexType getNextIndex(IndexType Idx) const;
void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI);
friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map);
IndexType First, Last;
private:
MachineBasicBlock &Block;
std::map<IndexType,MachineInstr*> Map;
};
typedef std::map<RegisterRef,RangeList> RegToRangeMap;
RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap);
RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap);
static RegisterSet expandToSubRegs(RegisterRef R,
const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI);
struct PrintRangeMap {
PrintRangeMap(const RegToRangeMap &M, const TargetRegisterInfo &I)
: Map(M), TRI(I) {}
friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P);
private:
const RegToRangeMap &Map;
const TargetRegisterInfo &TRI;
};
private:
RegisterSet getLiveIns(const MachineBasicBlock &B);
void computeInitialLiveRanges(InstrIndexMap &IndexMap,
RegToRangeMap &LiveMap);
MachineFunction &MF;
const HexagonSubtarget &HST;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
BitVector Reserved;
};
inline HexagonBlockRanges::IndexType::operator unsigned() const {
assert(Index >= First);
return Index;
}
inline bool HexagonBlockRanges::IndexType::operator== (unsigned x) const {
return Index == x;
}
inline bool HexagonBlockRanges::IndexType::operator== (IndexType Idx) const {
return Index == Idx.Index;
}
inline bool HexagonBlockRanges::IndexType::operator!= (unsigned x) const {
return Index != x;
}
inline bool HexagonBlockRanges::IndexType::operator!= (IndexType Idx) const {
return Index != Idx.Index;
}
inline
HexagonBlockRanges::IndexType HexagonBlockRanges::IndexType::operator++ () {
assert(Index != None);
assert(Index != Exit);
if (Index == Entry)
Index = First;
else
++Index;
return *this;
}
inline bool HexagonBlockRanges::IndexType::operator< (unsigned Idx) const {
return operator< (IndexType(Idx));
}
inline bool HexagonBlockRanges::IndexType::operator< (IndexType Idx) const {
// !(x < x).
if (Index == Idx.Index)
return false;
// !(None < x) for all x.
// !(x < None) for all x.
if (Index == None || Idx.Index == None)
return false;
// !(Exit < x) for all x.
// !(x < Entry) for all x.
if (Index == Exit || Idx.Index == Entry)
return false;
// Entry < x for all x != Entry.
// x < Exit for all x != Exit.
if (Index == Entry || Idx.Index == Exit)
return true;
return Index < Idx.Index;
}
inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const {
return operator==(Idx) || operator<(Idx);
}
raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx);
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::IndexRange &IR);
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::RangeList &RL);
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::InstrIndexMap &M);
raw_ostream &operator<< (raw_ostream &OS,
const HexagonBlockRanges::PrintRangeMap &P);
#endif

View File

@ -10,6 +10,7 @@
#define DEBUG_TYPE "hexagon-pei"
#include "HexagonBlockRanges.h"
#include "HexagonFrameLowering.h"
#include "HexagonInstrInfo.h"
#include "HexagonMachineFunctionInfo.h"
@ -147,6 +148,9 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
cl::Hidden, cl::desc("Use allocframe more conservatively"));
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
cl::init(true), cl::desc("Optimize spill slots"));
namespace llvm {
void initializeHexagonCallFrameInformationPass(PassRegistry&);
@ -1046,13 +1050,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
if (!MRI.reg_nodbg_empty(FreeReg))
if (MRI.isPhysRegUsed(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
if (!MRI.reg_nodbg_empty(*AI)) {
if (MRI.isPhysRegUsed(*AI)) {
IsCurrentRegUsed = true;
break;
}
@ -1634,7 +1638,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Replace predicate register pseudo spill code.
SmallVector<unsigned,8> NewRegs;
expandSpillMacros(MF, NewRegs);
if (OptimizeSpillSlots)
optimizeSpillSlots(MF, NewRegs);
// We need to reserve a a spill slot if scavenging could potentially require
// spilling a scavenged register.
@ -1665,6 +1670,354 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
HexagonBlockRanges::IndexRange &FIR,
HexagonBlockRanges::InstrIndexMap &IndexMap,
HexagonBlockRanges::RegToRangeMap &DeadMap,
const TargetRegisterClass *RC) const {
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
auto &MRI = MF.getRegInfo();
auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
auto F = DeadMap.find({Reg,0});
if (F == DeadMap.end())
return false;
for (auto &DR : F->second)
if (DR.contains(FIR))
return true;
return false;
};
for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
bool Dead = true;
for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
if (isDead(R.Reg))
continue;
Dead = false;
break;
}
if (Dead)
return Reg;
}
return 0;
}
void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
SmallVectorImpl<unsigned> &VRegs) const {
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
auto &MRI = MF.getRegInfo();
HexagonBlockRanges HBR(MF);
typedef std::map<MachineBasicBlock*,HexagonBlockRanges::InstrIndexMap>
BlockIndexMap;
typedef std::map<MachineBasicBlock*,HexagonBlockRanges::RangeList>
BlockRangeMap;
typedef HexagonBlockRanges::IndexType IndexType;
struct SlotInfo {
BlockRangeMap Map;
unsigned Size = 0;
const TargetRegisterClass *RC = nullptr;
};
BlockIndexMap BlockIndexes;
SmallSet<int,4> BadFIs;
std::map<int,SlotInfo> FIRangeMap;
auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R)
-> const TargetRegisterClass* {
if (TargetRegisterInfo::isPhysicalRegister(R.Reg))
assert(R.Sub == 0);
if (TargetRegisterInfo::isVirtualRegister(R.Reg)) {
auto *RCR = MRI.getRegClass(R.Reg);
if (R.Sub == 0)
return RCR;
unsigned PR = *RCR->begin();
R.Reg = HRI.getSubReg(PR, R.Sub);
}
return HRI.getMinimalPhysRegClass(R.Reg);
};
// Accumulate register classes: get a common class for a pre-existing
// class HaveRC and a new class NewRC. Return nullptr if a common class
// cannot be found, otherwise return the resulting class. If HaveRC is
// nullptr, assume that it is still unset.
auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
const TargetRegisterClass *NewRC)
-> const TargetRegisterClass* {
if (HaveRC == nullptr || HaveRC == NewRC)
return NewRC;
// Different classes, both non-null. Pick the more general one.
if (HaveRC->hasSubClassEq(NewRC))
return HaveRC;
if (NewRC->hasSubClassEq(HaveRC))
return NewRC;
return nullptr;
};
// Scan all blocks in the function. Check all occurrences of frame indexes,
// and collect relevant information.
for (auto &B : MF) {
std::map<int,IndexType> LastStore, LastLoad;
auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
auto &IndexMap = P.first->second;
DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n"
<< IndexMap << '\n');
for (auto &In : B) {
int LFI, SFI;
bool Load = HII.isLoadFromStackSlot(&In, LFI) && !HII.isPredicated(&In);
bool Store = HII.isStoreToStackSlot(&In, SFI) && !HII.isPredicated(&In);
if (Load && Store) {
// If it's both a load and a store, then we won't handle it.
BadFIs.insert(LFI);
BadFIs.insert(SFI);
continue;
}
// Check for register classes of the register used as the source for
// the store, and the register used as the destination for the load.
// Also, only accept base+imm_offset addressing modes. Other addressing
// modes can have side-effects (post-increments, etc.). For stack
// slots they are very unlikely, so there is not much loss due to
// this restriction.
if (Load || Store) {
int TFI = Load ? LFI : SFI;
unsigned AM = HII.getAddrMode(&In);
SlotInfo &SI = FIRangeMap[TFI];
bool Bad = (AM != HexagonII::BaseImmOffset);
if (!Bad) {
// If the addressing mode is ok, check the register class.
const TargetRegisterClass *RC = nullptr;
if (Load) {
MachineOperand &DataOp = In.getOperand(0);
RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
} else {
MachineOperand &DataOp = In.getOperand(2);
RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
}
RC = getCommonRC(SI.RC, RC);
if (RC == nullptr)
Bad = true;
else
SI.RC = RC;
}
if (!Bad) {
// Check sizes.
unsigned S = (1U << (HII.getMemAccessSize(&In) - 1));
if (SI.Size != 0 && SI.Size != S)
Bad = true;
else
SI.Size = S;
}
if (Bad)
BadFIs.insert(TFI);
}
// Locate uses of frame indices.
for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
const MachineOperand &Op = In.getOperand(i);
if (!Op.isFI())
continue;
int FI = Op.getIndex();
// Make sure that the following operand is an immediate and that
// it is 0. This is the offset in the stack object.
if (i+1 >= n || !In.getOperand(i+1).isImm() ||
In.getOperand(i+1).getImm() != 0)
BadFIs.insert(FI);
if (BadFIs.count(FI))
continue;
IndexType Index = IndexMap.getIndex(&In);
if (Load) {
if (LastStore[FI] == IndexType::None)
LastStore[FI] = IndexType::Entry;
LastLoad[FI] = Index;
} else if (Store) {
HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
if (LastStore[FI] != IndexType::None)
RL.add(LastStore[FI], LastLoad[FI], false, false);
else if (LastLoad[FI] != IndexType::None)
RL.add(IndexType::Entry, LastLoad[FI], false, false);
LastLoad[FI] = IndexType::None;
LastStore[FI] = Index;
} else {
BadFIs.insert(FI);
}
}
}
for (auto &I : LastLoad) {
IndexType LL = I.second;
if (LL == IndexType::None)
continue;
auto &RL = FIRangeMap[I.first].Map[&B];
IndexType &LS = LastStore[I.first];
if (LS != IndexType::None)
RL.add(LS, LL, false, false);
else
RL.add(IndexType::Entry, LL, false, false);
LS = IndexType::None;
}
for (auto &I : LastStore) {
IndexType LS = I.second;
if (LS == IndexType::None)
continue;
auto &RL = FIRangeMap[I.first].Map[&B];
RL.add(LS, IndexType::None, false, false);
}
}
DEBUG({
for (auto &P : FIRangeMap) {
dbgs() << "fi#" << P.first;
if (BadFIs.count(P.first))
dbgs() << " (bad)";
dbgs() << " RC: ";
if (P.second.RC != nullptr)
dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
else
dbgs() << "<null>\n";
for (auto &R : P.second.Map)
dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n";
}
});
// When a slot is loaded from in a block without being stored to in the
// same block, it is live-on-entry to this block. To avoid CFG analysis,
// consider this slot to be live-on-exit from all blocks.
SmallSet<int,4> LoxFIs;
std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
for (auto &P : FIRangeMap) {
// P = pair(FI, map: BB->RangeList)
if (BadFIs.count(P.first))
continue;
for (auto &B : MF) {
auto F = P.second.Map.find(&B);
// F = pair(BB, RangeList)
if (F == P.second.Map.end() || F->second.empty())
continue;
HexagonBlockRanges::IndexRange &IR = F->second.front();
if (IR.start() == IndexType::Entry)
LoxFIs.insert(P.first);
BlockFIMap[&B].push_back(P.first);
}
}
DEBUG({
dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
for (auto &P : BlockFIMap) {
auto &FIs = P.second;
if (FIs.empty())
continue;
dbgs() << " BB#" << P.first->getNumber() << ": {";
for (auto I : FIs) {
dbgs() << " fi#" << I;
if (LoxFIs.count(I))
dbgs() << '*';
}
dbgs() << " }\n";
}
});
// eliminate loads, when all loads eliminated, eliminate all stores.
for (auto &B : MF) {
auto F = BlockIndexes.find(&B);
assert(F != BlockIndexes.end());
HexagonBlockRanges::InstrIndexMap &IM = F->second;
HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n"
<< HexagonBlockRanges::PrintRangeMap(DM, HRI));
for (auto FI : BlockFIMap[&B]) {
if (BadFIs.count(FI))
continue;
DEBUG(dbgs() << "Working on fi#" << FI << '\n');
HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
for (auto &Range : RL) {
DEBUG(dbgs() << "--Examining range:" << RL << '\n');
if (!IndexType::isInstr(Range.start()) ||
!IndexType::isInstr(Range.end()))
continue;
MachineInstr *SI = IM.getInstr(Range.start());
MachineInstr *EI = IM.getInstr(Range.end());
assert(SI->mayStore() && "Unexpected start instruction");
assert(EI->mayLoad() && "Unexpected end instruction");
MachineOperand &SrcOp = SI->getOperand(2);
HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
SrcOp.getSubReg() };
auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()});
// The this-> is needed to unconfuse MSVC.
unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
if (FoundR == 0)
continue;
// Generate the copy-in: "FoundR = COPY SrcR" at the store location.
MachineBasicBlock::iterator StartIt = SI, NextIt;
MachineInstr *CopyIn = nullptr;
if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
DebugLoc DL = SI->getDebugLoc();
CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
.addOperand(SrcOp);
}
++StartIt;
// Check if this is a last store and the FI is live-on-exit.
if (LoxFIs.count(FI) && (&Range == &RL.back())) {
// Update store's source register.
if (unsigned SR = SrcOp.getSubReg())
SrcOp.setReg(HRI.getSubReg(FoundR, SR));
else
SrcOp.setReg(FoundR);
SrcOp.setSubReg(0);
// We are keeping this register live.
SrcOp.setIsKill(false);
} else {
B.erase(SI);
IM.replaceInstr(SI, CopyIn);
}
auto EndIt = std::next(MachineBasicBlock::iterator(EI));
for (auto It = StartIt; It != EndIt; It = NextIt) {
MachineInstr *MI = &*It;
NextIt = std::next(It);
int TFI;
if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
continue;
unsigned DstR = MI->getOperand(0).getReg();
assert(MI->getOperand(0).getSubReg() == 0);
MachineInstr *CopyOut = nullptr;
if (DstR != FoundR) {
DebugLoc DL = MI->getDebugLoc();
unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1));
assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
unsigned CopyOpc = TargetOpcode::COPY;
if (HII.isSignExtendingLoad(MI))
CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
else if (HII.isZeroExtendingLoad(MI))
CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
.addReg(FoundR, getKillRegState(MI == EI));
}
IM.replaceInstr(MI, CopyOut);
B.erase(It);
}
// Update the dead map.
HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
DM[RR].subtract(Range);
} // for Range in range list
}
}
}
void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
MachineBasicBlock &MB = *AI->getParent();

View File

@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
#include "Hexagon.h"
#include "HexagonBlockRanges.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@ -124,6 +125,13 @@ private:
bool expandSpillMacros(MachineFunction &MF,
SmallVectorImpl<unsigned> &NewRegs) const;
unsigned findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR,
HexagonBlockRanges::InstrIndexMap &IndexMap,
HexagonBlockRanges::RegToRangeMap &DeadMap,
const TargetRegisterClass *RC) const;
void optimizeSpillSlots(MachineFunction &MF,
SmallVectorImpl<unsigned> &VRegs) const;
void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
MachineBasicBlock *&EpilogB) const;

View File

@ -0,0 +1,49 @@
; Check that a callee-saved register will be saved correctly if
; the predicate-to-GPR spilling code uses it.
;
; RUN: llc -march=hexagon < %s | FileCheck %s
;
; We expect to spill p0 into a general-purpose register and keep it there,
; without adding an extra spill of that register.
;
; CHECK: PredSpill:
; CHECK: memd(r29{{.*}}) = r17:16
; CHECK-DAG: r{{[0-9]+}} = p0
; CHECK-DAG: p0 = r{{[0-9]+}}
; CHECK-NOT: = memw(r29
;
define void @PredSpill() {
entry:
br i1 undef, label %if.then, label %if.else.14
if.then: ; preds = %entry
br i1 undef, label %if.end.57, label %if.else
if.else: ; preds = %if.then
unreachable
if.else.14: ; preds = %entry
br i1 undef, label %if.then.17, label %if.end.57
if.then.17: ; preds = %if.else.14
br i1 undef, label %if.end.57, label %if.then.20
if.then.20: ; preds = %if.then.17
%call21 = tail call i32 @myfun()
%tobool22 = icmp eq i32 %call21, 0
%0 = tail call i32 @myfun()
br i1 %tobool22, label %if.else.42, label %if.then.23
if.then.23: ; preds = %if.then.20
unreachable
if.else.42: ; preds = %if.then.20
ret void
if.end.57: ; preds = %if.then.17, %if.else.14, %if.then
ret void
}
declare i32 @myfun()

View File

@ -1,6 +1,3 @@
; This functionality will be restored shortly.
; XFAIL: *
; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
;
; This checks that predicate registers are moved to GPRs instead of spilling

View File

@ -0,0 +1,144 @@
; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \
; RUN: -hexagon-bit=0 < %s | FileCheck %s
; This spill should be eliminated.
; CHECK-NOT: vmem(r29+#6)
define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 {
entry:
%0 = bitcast i8* %key to <32 x i32>*
%1 = bitcast i8* %data1 to <32 x i32>*
br label %for.body
for.body:
%pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ]
%pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ]
%pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ]
%dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ]
%2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128
%3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128
%4 = load <32 x i32>, <32 x i32>* undef, align 128
%arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2
%5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128
%arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2
%6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128
%7 = load <32 x i32>, <32 x i32>* null, align 128
%8 = load <32 x i32>, <32 x i32>* undef, align 128
%9 = load <32 x i32>, <32 x i32>* null, align 128
%arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3
%arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6
%10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128
%arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4
%11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128
%arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8
%arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8
%12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128
%arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5
%13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128
%arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10
%arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10
%14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128
%arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6
%15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128
%arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12
%16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128
%arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12
%17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128
%arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14
%18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
%arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
%19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
%20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
%21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
%22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
%23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
%24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
%25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
%26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
%27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
%28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
%29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
%30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
%31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
%32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
%33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
%34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
%35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
%36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
%37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
%38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
%39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
%40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
%41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
%42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
%43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
%44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
%45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
%46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
%47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
%48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
%49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
%50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
%51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
%52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
%53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
%54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
%55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
%56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
%57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
%58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
%59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
%60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
%61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
%62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
%63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
%64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
%65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
%66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
%67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
%68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
%69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
%70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
%71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
%72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
%73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
%74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
%75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
%76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
%77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
%78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer)
%79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537)
%80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537)
%81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537)
%82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537)
%83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537)
%84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537)
%85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537)
%86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537)
store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128
store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128
store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128
store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128
store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128
store <32 x i32> zeroinitializer, <32 x i32>* null, align 128
%add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16
%add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16
br i1 false, label %for.end, label %for.body
for.end:
%87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86)
ret void
}
declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1
declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

View File

@ -0,0 +1,80 @@
; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 < %s | FileCheck %s
; CHECK: vmem
target triple = "hexagon"
@vecpreds = external global [15 x <16 x i32>], align 64
@vectors = external global [15 x <16 x i32>], align 64
@vector_pairs = external global [15 x <32 x i32>], align 128
@.str1 = external hidden unnamed_addr constant [20 x i8], align 1
@.str2 = external hidden unnamed_addr constant [43 x i8], align 1
@Q6VecPredResult = external global <16 x i32>, align 64
@.str52 = external hidden unnamed_addr constant [57 x i8], align 1
@.str54 = external hidden unnamed_addr constant [59 x i8], align 1
@VectorResult = external global <16 x i32>, align 64
@.str243 = external hidden unnamed_addr constant [60 x i8], align 1
@.str251 = external hidden unnamed_addr constant [77 x i8], align 1
@.str290 = external hidden unnamed_addr constant [65 x i8], align 1
@VectorPairResult = external global <32 x i32>, align 128
; Function Attrs: nounwind
declare void @print_vector(i32, i8*) #0
; Function Attrs: nounwind
declare i32 @printf(i8*, ...) #0
; Function Attrs: nounwind
declare void @print_vecpred(i32, i8*) #0
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
; Function Attrs: nounwind
declare void @init_vectors() #0
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
; Function Attrs: nounwind
declare void @init_addresses() #0
; Function Attrs: nounwind
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind
define i32 @main() #0 {
entry:
%0 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%1 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*))
%2 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str52, i32 0, i32 0)) #3
%3 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @.str54, i32 0, i32 0)) #3
%4 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%call300 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str290, i32 0, i32 0)) #3
%5 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%6 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%call1373 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str243, i32 0, i32 0)) #3
%7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
%8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
%10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
%12 = bitcast <512 x i1> %11 to <16 x i32>
%13 = bitcast <16 x i32> %12 to <512 x i1>
%14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
ret i32 0
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind }

View File

@ -0,0 +1,80 @@
; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -enable-hexagon-hvx < %s | FileCheck %s
; CHECK: vmem(r{{[0-9]+}}+#3) = v{{[0-9]+}}
; CHECK: call puts
; CHECK: call print_vecpred
; CHECK: v{{[0-9]+}}{{ *}}={{ *}}vmem(r{{[0-9]+}}+#3)
target triple = "hexagon"
@K = global i64 0, align 8
@src = global i32 -1, align 4
@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
@dst_addresses = common global [15 x i64] zeroinitializer, align 8
@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
@src_addresses = common global [15 x i8*] zeroinitializer, align 8
@ptr = common global [32768 x i32] zeroinitializer, align 8
@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
@VectorResult = common global <16 x i32> zeroinitializer, align 64
@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
@str = private unnamed_addr constant [106 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),INT32_MIN)\00"
@str3 = private unnamed_addr constant [99 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),-1)\00"
@str4 = private unnamed_addr constant [98 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),0)\00"
; Function Attrs: nounwind
define i32 @main() #0 {
entry:
%call = tail call i32 bitcast (i32 (...)* @init_addresses to i32 ()*)() #3
%call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
tail call void @init_vectors() #3
%0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
%1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
%4 = bitcast <512 x i1> %3 to <16 x i32>
store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
%5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
%6 = bitcast <512 x i1> %5 to <16 x i32>
store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
%7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
%8 = bitcast <512 x i1> %7 to <16 x i32>
store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
ret i32 0
}
declare i32 @init_addresses(...) #1
declare i32 @acquire_vector_unit(i8 zeroext) #1
declare void @init_vectors() #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2
declare void @print_vecpred(i32, i8*) #1
; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #3
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
!1 = !{!2, !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}