1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[ARM][NFC] ARMCodeGenPrepare: some refactoring and algorithm description

Differential Revision: https://reviews.llvm.org/D50846

llvm-svn: 339997
This commit is contained in:
Sjoerd Meijer 2018-08-17 07:34:01 +00:00
parent 1be380d8da
commit dc83ae3e1b

View File

@ -54,8 +54,59 @@ EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
cl::desc("Use DSP instructions for scalar operations\ cl::desc("Use DSP instructions for scalar operations\
with immediate operands")); with immediate operands"));
namespace { // The goal of this pass is to enable more efficient code generation for
// operations on narrow types (i.e. types with < 32-bits) and this is a
// motivating IR code example:
//
// define hidden i32 @cmp(i8 zeroext) {
// %2 = add i8 %0, -49
// %3 = icmp ult i8 %2, 3
// ..
// }
//
// The issue here is that i8 is type-legalized to i32 because i8 is not a
// legal type. Thus, arithmetic is done in integer-precision, but then the
// byte value is masked out as follows:
//
// t19: i32 = add t4, Constant:i32<-49>
// t24: i32 = and t19, Constant:i32<255>
//
// Consequently, we generate code like this:
//
// subs r0, #49
// uxtb r1, r0
// cmp r1, #3
//
// This shows that masking out the byte value results in generation of
// the UXTB instruction. This is not optimal as r0 already contains the byte
// value we need, and so instead we can just generate:
//
// sub.w r1, r0, #49
// cmp r1, #3
//
// We achieve this by type promoting the IR to i32 like so for this example:
//
// define i32 @cmp(i8 zeroext %c) {
// %0 = zext i8 %c to i32
// %c.off = add i32 %0, -49
// %1 = icmp ult i32 %c.off, 3
// ..
// }
//
// For this to be valid and legal, we need to prove that the i32 add is
// producing the same value as the i8 addition, and that e.g. no overflow
// happens.
//
// A brief sketch of the algorithm and some terminology.
// We pattern match interesting IR patterns:
// - which have "sources": instructions producing narrow values (i8, i16), and
// - they have "sinks": instructions consuming these narrow values.
//
// We collect all instruction connecting sources and sinks in a worklist, so
// that we can mutate these instruction and perform type promotion when it is
// legal to do so.
namespace {
class IRPromoter { class IRPromoter {
SmallPtrSet<Value*, 8> NewInsts; SmallPtrSet<Value*, 8> NewInsts;
SmallVector<Instruction*, 4> InstsToRemove; SmallVector<Instruction*, 4> InstsToRemove;
@ -77,8 +128,8 @@ public:
void Mutate(Type *OrigTy, void Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited, SmallPtrSetImpl<Value*> &Visited,
SmallPtrSetImpl<Value*> &Leaves, SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Roots); SmallPtrSetImpl<Instruction*> &Sinks);
}; };
class ARMCodeGenPrepare : public FunctionPass { class ARMCodeGenPrepare : public FunctionPass {
@ -110,8 +161,7 @@ public:
} }
/// Can the given value generate sign bits. static bool generateSignBits(Value *V) {
static bool isSigned(Value *V) {
if (!isa<Instruction>(V)) if (!isa<Instruction>(V))
return false; return false;
@ -144,7 +194,7 @@ static bool isSupportedType(Value *V) {
return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize; return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
} }
/// Return true if the given value is a leaf in the use-def chain, producing /// Return true if the given value is a source in the use-def chain, producing
/// a narrow (i8, i16) value. These values will be zext to start the promotion /// a narrow (i8, i16) value. These values will be zext to start the promotion
/// of the tree to i32. We guarantee that these won't populate the upper bits /// of the tree to i32. We guarantee that these won't populate the upper bits
/// of the register. ZExt on the loads will be free, and the same for call /// of the register. ZExt on the loads will be free, and the same for call
@ -254,7 +304,7 @@ static bool isPromotedResultSafe(Value *V) {
if (!isa<Instruction>(V)) if (!isa<Instruction>(V))
return true; return true;
if (isSigned(V)) if (generateSignBits(V))
return false; return false;
// If I is only being used by something that will require its value to be // If I is only being used by something that will require its value to be
@ -290,8 +340,8 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
void IRPromoter::Mutate(Type *OrigTy, void IRPromoter::Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited, SmallPtrSetImpl<Value*> &Visited,
SmallPtrSetImpl<Value*> &Leaves, SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Roots) { SmallPtrSetImpl<Instruction*> &Sinks) {
IRBuilder<> Builder{Ctx}; IRBuilder<> Builder{Ctx};
Type *ExtTy = Type::getInt32Ty(M->getContext()); Type *ExtTy = Type::getInt32Ty(M->getContext());
SmallPtrSet<Value*, 8> Promoted; SmallPtrSet<Value*, 8> Promoted;
@ -364,9 +414,9 @@ void IRPromoter::Mutate(Type *OrigTy,
TruncTysMap[ZExt] = TruncTysMap[V]; TruncTysMap[ZExt] = TruncTysMap[V];
}; };
// First, insert extending instructions between the leaves and their users. // First, insert extending instructions between the sources and their users.
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n"); LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
for (auto V : Leaves) { for (auto V : Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n"); LLVM_DEBUG(dbgs() << " - " << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V)) if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I); InsertZExt(I, I);
@ -374,7 +424,7 @@ void IRPromoter::Mutate(Type *OrigTy,
BasicBlock &BB = Arg->getParent()->front(); BasicBlock &BB = Arg->getParent()->front();
InsertZExt(Arg, &*BB.getFirstInsertionPt()); InsertZExt(Arg, &*BB.getFirstInsertionPt());
} else { } else {
llvm_unreachable("unhandled leaf that needs extending"); llvm_unreachable("unhandled source that needs extending");
} }
Promoted.insert(V); Promoted.insert(V);
} }
@ -383,11 +433,11 @@ void IRPromoter::Mutate(Type *OrigTy,
// Then mutate the types of the instructions within the tree. Here we handle // Then mutate the types of the instructions within the tree. Here we handle
// constant operands. // constant operands.
for (auto *V : Visited) { for (auto *V : Visited) {
if (Leaves.count(V)) if (Sources.count(V))
continue; continue;
auto *I = cast<Instruction>(V); auto *I = cast<Instruction>(V);
if (Roots.count(I)) if (Sinks.count(I))
continue; continue;
for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
@ -410,7 +460,7 @@ void IRPromoter::Mutate(Type *OrigTy,
// Now we need to remove any zexts that have become unnecessary, as well // Now we need to remove any zexts that have become unnecessary, as well
// as insert any intrinsics. // as insert any intrinsics.
for (auto *V : Visited) { for (auto *V : Visited) {
if (Leaves.count(V)) if (Sources.count(V))
continue; continue;
if (!shouldPromote(V) || isPromotedResultSafe(V)) if (!shouldPromote(V) || isPromotedResultSafe(V))
@ -425,7 +475,7 @@ void IRPromoter::Mutate(Type *OrigTy,
return nullptr; return nullptr;
if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V) || if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V) ||
Leaves.count(V)) Sources.count(V))
return nullptr; return nullptr;
Type *TruncTy = TruncTysMap[V]; Type *TruncTy = TruncTysMap[V];
@ -440,10 +490,10 @@ void IRPromoter::Mutate(Type *OrigTy,
return Trunc; return Trunc;
}; };
LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n"); LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
// Fix up any stores or returns that use the results of the promoted // Fix up any stores or returns that use the results of the promoted
// chain. // chain.
for (auto I : Roots) { for (auto I : Sinks) {
LLVM_DEBUG(dbgs() << " - " << *I << "\n"); LLVM_DEBUG(dbgs() << " - " << *I << "\n");
// Handle calls separately as we need to iterate over arg operands. // Handle calls separately as we need to iterate over arg operands.
@ -503,7 +553,7 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
// Special cases for calls as we need to check for zeroext // Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can // TODO We should accept calls even if they don't have zeroext, as they can
// still be roots. // still be sinks.
if (auto *Call = dyn_cast<CallInst>(V)) if (auto *Call = dyn_cast<CallInst>(V))
return isSupportedType(Call) && return isSupportedType(Call) &&
Call->hasRetAttr(Attribute::AttrKind::ZExt); Call->hasRetAttr(Attribute::AttrKind::ZExt);
@ -515,10 +565,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
if (!isSupportedType(V)) if (!isSupportedType(V))
return false; return false;
bool res = !isSigned(V); if (generateSignBits(V)) {
if (!res) LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n"); return false;
return res; }
return true;
} }
/// Check that the type of V would be promoted and that the original type is /// Check that the type of V would be promoted and that the original type is
@ -570,14 +621,15 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
<< TypeSize << "\n"); << TypeSize << "\n");
SetVector<Value*> WorkList; SetVector<Value*> WorkList;
SmallPtrSet<Value*, 8> Leaves; SmallPtrSet<Value*, 8> Sources;
SmallPtrSet<Instruction*, 4> Roots; SmallPtrSet<Instruction*, 4> Sinks;
WorkList.insert(V); WorkList.insert(V);
SmallPtrSet<Value*, 16> CurrentVisited; SmallPtrSet<Value*, 16> CurrentVisited;
CurrentVisited.clear(); CurrentVisited.clear();
// Return true if the given value can, or has been, visited. Add V to the // Return true if V was added to the worklist as a supported instruction,
// worklist if needed. // if it was already visited, or if we don't need to explore it (e.g.
// pointer values and GEPs), and false otherwise.
auto AddLegalInst = [&](Value *V) { auto AddLegalInst = [&](Value *V) {
if (CurrentVisited.count(V)) if (CurrentVisited.count(V))
return true; return true;
@ -621,9 +673,9 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
// Calls can be both sources and sinks. // Calls can be both sources and sinks.
if (isSink(V)) if (isSink(V))
Roots.insert(cast<Instruction>(V)); Sinks.insert(cast<Instruction>(V));
if (isSource(V)) if (isSource(V))
Leaves.insert(V); Sources.insert(V);
else if (auto *I = dyn_cast<Instruction>(V)) { else if (auto *I = dyn_cast<Instruction>(V)) {
// Visit operands of any instruction visited. // Visit operands of any instruction visited.
for (auto &U : I->operands()) { for (auto &U : I->operands()) {
@ -648,9 +700,9 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
); );
unsigned ToPromote = 0; unsigned ToPromote = 0;
for (auto *V : CurrentVisited) { for (auto *V : CurrentVisited) {
if (Leaves.count(V)) if (Sources.count(V))
continue; continue;
if (Roots.count(cast<Instruction>(V))) if (Sinks.count(cast<Instruction>(V)))
continue; continue;
++ToPromote; ++ToPromote;
} }
@ -658,7 +710,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (ToPromote < 2) if (ToPromote < 2)
return false; return false;
Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots); Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks);
return true; return true;
} }