mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
3909a36379
Replace with forward declaration and move dependency down to source files that actually need it. Both TargetLowering.h and TargetMachine.h are 2 of the most expensive headers (top 10) in the ClangBuildAnalyzer report when building llc.
397 lines
14 KiB
C++
397 lines
14 KiB
C++
//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass looks for instructions that can be replaced by a Test Data Class
|
|
// instruction, and replaces them when profitable.
|
|
//
|
|
// Roughly, the following rules are recognized:
|
|
//
|
|
// 1: fcmp pred X, 0 -> tdc X, mask
|
|
// 2: fcmp pred X, +-inf -> tdc X, mask
|
|
// 3: fcmp pred X, +-minnorm -> tdc X, mask
|
|
// 4: tdc (fabs X), mask -> tdc X, newmask
|
|
// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
|
|
// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
|
|
// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
|
|
// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
|
|
// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
|
|
// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
|
|
//
|
|
// The pass works in 4 steps:
|
|
//
|
|
// 1. All fcmp and icmp instructions in a function are checked for a match
|
|
// with rules 1-3 and 5-7. Their TDC equivalents are stored in
|
|
// the ConvertedInsts mapping. If the operand of a fcmp instruction is
|
|
// a fabs, it's also folded according to rule 4.
|
|
// 2. All and/or/xor i1 instructions whose both operands have been already
|
|
// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
|
|
// as a queue of instructions to check.
|
|
// 3. All mapped instructions that are considered worthy of conversion (ie.
|
|
// replacing them will actually simplify the final code) are replaced
|
|
// with a call to the s390.tdc intrinsic.
|
|
// 4. All intermediate results of replaced instructions are removed if unused.
|
|
//
|
|
// Instructions that match rules 1-3 are considered unworthy of conversion
|
|
// on their own (since a comparison instruction is superior), but are mapped
|
|
// in the hopes of folding the result using rules 4 and 8-10 (likely removing
|
|
// the original comparison in the process).
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "SystemZ.h"
|
|
#include "SystemZSubtarget.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/InstIterator.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/IntrinsicsS390.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include <deque>
|
|
#include <set>
|
|
|
|
using namespace llvm;
|
|
|
|
namespace llvm {
|
|
void initializeSystemZTDCPassPass(PassRegistry&);
|
|
}
|
|
|
|
namespace {
|
|
|
|
class SystemZTDCPass : public FunctionPass {
|
|
public:
|
|
static char ID;
|
|
SystemZTDCPass() : FunctionPass(ID) {
|
|
initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnFunction(Function &F) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<TargetPassConfig>();
|
|
}
|
|
|
|
private:
|
|
// Maps seen instructions that can be mapped to a TDC, values are
|
|
// (TDC operand, TDC mask, worthy flag) triples.
|
|
MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
|
|
// The queue of and/or/xor i1 instructions to be potentially folded.
|
|
std::vector<BinaryOperator *> LogicOpsWorklist;
|
|
// Instructions matched while folding, to be removed at the end if unused.
|
|
std::set<Instruction *> PossibleJunk;
|
|
|
|
// Tries to convert a fcmp instruction.
|
|
void convertFCmp(CmpInst &I);
|
|
|
|
// Tries to convert an icmp instruction.
|
|
void convertICmp(CmpInst &I);
|
|
|
|
// Tries to convert an i1 and/or/xor instruction, whose both operands
|
|
// have been already converted.
|
|
void convertLogicOp(BinaryOperator &I);
|
|
|
|
// Marks an instruction as converted - adds it to ConvertedInsts and adds
|
|
// any and/or/xor i1 users to the queue.
|
|
void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
|
|
ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
|
|
auto &M = *I->getFunction()->getParent();
|
|
auto &Ctx = M.getContext();
|
|
for (auto *U : I->users()) {
|
|
auto *LI = dyn_cast<BinaryOperator>(U);
|
|
if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
|
|
(LI->getOpcode() == Instruction::And ||
|
|
LI->getOpcode() == Instruction::Or ||
|
|
LI->getOpcode() == Instruction::Xor)) {
|
|
LogicOpsWorklist.push_back(LI);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char SystemZTDCPass::ID = 0;
|
|
INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
|
|
"SystemZ Test Data Class optimization", false, false)
|
|
|
|
FunctionPass *llvm::createSystemZTDCPass() {
|
|
return new SystemZTDCPass();
|
|
}
|
|
|
|
void SystemZTDCPass::convertFCmp(CmpInst &I) {
|
|
Value *Op0 = I.getOperand(0);
|
|
auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
|
|
auto Pred = I.getPredicate();
|
|
// Only comparisons with consts are interesting.
|
|
if (!Const)
|
|
return;
|
|
// Compute the smallest normal number (and its negation).
|
|
auto &Sem = Op0->getType()->getFltSemantics();
|
|
APFloat Smallest = APFloat::getSmallestNormalized(Sem);
|
|
APFloat NegSmallest = Smallest;
|
|
NegSmallest.changeSign();
|
|
// Check if Const is one of our recognized consts.
|
|
int WhichConst;
|
|
if (Const->isZero()) {
|
|
// All comparisons with 0 can be converted.
|
|
WhichConst = 0;
|
|
} else if (Const->isInfinity()) {
|
|
// Likewise for infinities.
|
|
WhichConst = Const->isNegative() ? 2 : 1;
|
|
} else if (Const->isExactlyValue(Smallest)) {
|
|
// For Smallest, we cannot do EQ separately from GT.
|
|
if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
|
|
(Pred & CmpInst::FCMP_OGE) != 0)
|
|
return;
|
|
WhichConst = 3;
|
|
} else if (Const->isExactlyValue(NegSmallest)) {
|
|
// Likewise for NegSmallest, we cannot do EQ separately from LT.
|
|
if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
|
|
(Pred & CmpInst::FCMP_OLE) != 0)
|
|
return;
|
|
WhichConst = 4;
|
|
} else {
|
|
// Not one of our special constants.
|
|
return;
|
|
}
|
|
// Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
|
|
static const int Masks[][4] = {
|
|
{ // 0
|
|
SystemZ::TDCMASK_ZERO, // eq
|
|
SystemZ::TDCMASK_POSITIVE, // gt
|
|
SystemZ::TDCMASK_NEGATIVE, // lt
|
|
SystemZ::TDCMASK_NAN, // un
|
|
},
|
|
{ // inf
|
|
SystemZ::TDCMASK_INFINITY_PLUS, // eq
|
|
0, // gt
|
|
(SystemZ::TDCMASK_ZERO |
|
|
SystemZ::TDCMASK_NEGATIVE |
|
|
SystemZ::TDCMASK_NORMAL_PLUS |
|
|
SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
|
|
SystemZ::TDCMASK_NAN, // un
|
|
},
|
|
{ // -inf
|
|
SystemZ::TDCMASK_INFINITY_MINUS, // eq
|
|
(SystemZ::TDCMASK_ZERO |
|
|
SystemZ::TDCMASK_POSITIVE |
|
|
SystemZ::TDCMASK_NORMAL_MINUS |
|
|
SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
|
|
0, // lt
|
|
SystemZ::TDCMASK_NAN, // un
|
|
},
|
|
{ // minnorm
|
|
0, // eq (unsupported)
|
|
(SystemZ::TDCMASK_NORMAL_PLUS |
|
|
SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
|
|
(SystemZ::TDCMASK_ZERO |
|
|
SystemZ::TDCMASK_NEGATIVE |
|
|
SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
|
|
SystemZ::TDCMASK_NAN, // un
|
|
},
|
|
{ // -minnorm
|
|
0, // eq (unsupported)
|
|
(SystemZ::TDCMASK_ZERO |
|
|
SystemZ::TDCMASK_POSITIVE |
|
|
SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
|
|
(SystemZ::TDCMASK_NORMAL_MINUS |
|
|
SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
|
|
SystemZ::TDCMASK_NAN, // un
|
|
}
|
|
};
|
|
// Construct the mask as a combination of the partial masks.
|
|
int Mask = 0;
|
|
if (Pred & CmpInst::FCMP_OEQ)
|
|
Mask |= Masks[WhichConst][0];
|
|
if (Pred & CmpInst::FCMP_OGT)
|
|
Mask |= Masks[WhichConst][1];
|
|
if (Pred & CmpInst::FCMP_OLT)
|
|
Mask |= Masks[WhichConst][2];
|
|
if (Pred & CmpInst::FCMP_UNO)
|
|
Mask |= Masks[WhichConst][3];
|
|
// A lone fcmp is unworthy of tdc conversion on its own, but may become
|
|
// worthy if combined with fabs.
|
|
bool Worthy = false;
|
|
if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
|
|
Function *F = CI->getCalledFunction();
|
|
if (F && F->getIntrinsicID() == Intrinsic::fabs) {
|
|
// Fold with fabs - adjust the mask appropriately.
|
|
Mask &= SystemZ::TDCMASK_PLUS;
|
|
Mask |= Mask >> 1;
|
|
Op0 = CI->getArgOperand(0);
|
|
// A combination of fcmp with fabs is a win, unless the constant
|
|
// involved is 0 (which is handled by later passes).
|
|
Worthy = WhichConst != 0;
|
|
PossibleJunk.insert(CI);
|
|
}
|
|
}
|
|
converted(&I, Op0, Mask, Worthy);
|
|
}
|
|
|
|
void SystemZTDCPass::convertICmp(CmpInst &I) {
|
|
Value *Op0 = I.getOperand(0);
|
|
auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
|
|
auto Pred = I.getPredicate();
|
|
// All our icmp rules involve comparisons with consts.
|
|
if (!Const)
|
|
return;
|
|
if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
|
|
// Check for icmp+bitcast used for signbit.
|
|
if (!Cast->getSrcTy()->isFloatTy() &&
|
|
!Cast->getSrcTy()->isDoubleTy() &&
|
|
!Cast->getSrcTy()->isFP128Ty())
|
|
return;
|
|
Value *V = Cast->getOperand(0);
|
|
int Mask;
|
|
if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
|
|
// icmp slt (bitcast X), 0 - set if sign bit true
|
|
Mask = SystemZ::TDCMASK_MINUS;
|
|
} else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
|
|
// icmp sgt (bitcast X), -1 - set if sign bit false
|
|
Mask = SystemZ::TDCMASK_PLUS;
|
|
} else {
|
|
// Not a sign bit check.
|
|
return;
|
|
}
|
|
PossibleJunk.insert(Cast);
|
|
converted(&I, V, Mask, true);
|
|
} else if (auto *CI = dyn_cast<CallInst>(Op0)) {
|
|
// Check if this is a pre-existing call of our tdc intrinsic.
|
|
Function *F = CI->getCalledFunction();
|
|
if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
|
|
return;
|
|
if (!Const->isZero())
|
|
return;
|
|
Value *V = CI->getArgOperand(0);
|
|
auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
|
|
// Bail if the mask is not a constant.
|
|
if (!MaskC)
|
|
return;
|
|
int Mask = MaskC->getZExtValue();
|
|
Mask &= SystemZ::TDCMASK_ALL;
|
|
if (Pred == CmpInst::ICMP_NE) {
|
|
// icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
|
|
} else if (Pred == CmpInst::ICMP_EQ) {
|
|
// icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
|
|
Mask ^= SystemZ::TDCMASK_ALL;
|
|
} else {
|
|
// An unknown comparison - ignore.
|
|
return;
|
|
}
|
|
PossibleJunk.insert(CI);
|
|
converted(&I, V, Mask, false);
|
|
}
|
|
}
|
|
|
|
void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
|
|
Value *Op0, *Op1;
|
|
int Mask0, Mask1;
|
|
bool Worthy0, Worthy1;
|
|
std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
|
|
std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
|
|
if (Op0 != Op1)
|
|
return;
|
|
int Mask;
|
|
switch (I.getOpcode()) {
|
|
case Instruction::And:
|
|
Mask = Mask0 & Mask1;
|
|
break;
|
|
case Instruction::Or:
|
|
Mask = Mask0 | Mask1;
|
|
break;
|
|
case Instruction::Xor:
|
|
Mask = Mask0 ^ Mask1;
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unknown op in convertLogicOp");
|
|
}
|
|
converted(&I, Op0, Mask, true);
|
|
}
|
|
|
|
bool SystemZTDCPass::runOnFunction(Function &F) {
|
|
auto &TPC = getAnalysis<TargetPassConfig>();
|
|
if (TPC.getTM<TargetMachine>()
|
|
.getSubtarget<SystemZSubtarget>(F)
|
|
.hasSoftFloat())
|
|
return false;
|
|
|
|
ConvertedInsts.clear();
|
|
LogicOpsWorklist.clear();
|
|
PossibleJunk.clear();
|
|
|
|
// Look for icmp+fcmp instructions.
|
|
for (auto &I : instructions(F)) {
|
|
if (I.getOpcode() == Instruction::FCmp)
|
|
convertFCmp(cast<CmpInst>(I));
|
|
else if (I.getOpcode() == Instruction::ICmp)
|
|
convertICmp(cast<CmpInst>(I));
|
|
}
|
|
|
|
// If none found, bail already.
|
|
if (ConvertedInsts.empty())
|
|
return false;
|
|
|
|
// Process the queue of logic instructions.
|
|
while (!LogicOpsWorklist.empty()) {
|
|
BinaryOperator *Op = LogicOpsWorklist.back();
|
|
LogicOpsWorklist.pop_back();
|
|
// If both operands mapped, and the instruction itself not yet mapped,
|
|
// convert it.
|
|
if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
|
|
ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
|
|
!ConvertedInsts.count(Op))
|
|
convertLogicOp(*Op);
|
|
}
|
|
|
|
// Time to actually replace the instructions. Do it in the reverse order
|
|
// of finding them, since there's a good chance the earlier ones will be
|
|
// unused (due to being folded into later ones).
|
|
Module &M = *F.getParent();
|
|
auto &Ctx = M.getContext();
|
|
Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
|
|
bool MadeChange = false;
|
|
for (auto &It : reverse(ConvertedInsts)) {
|
|
Instruction *I = It.first;
|
|
Value *V;
|
|
int Mask;
|
|
bool Worthy;
|
|
std::tie(V, Mask, Worthy) = It.second;
|
|
if (!I->user_empty()) {
|
|
// If used and unworthy of conversion, skip it.
|
|
if (!Worthy)
|
|
continue;
|
|
// Call the intrinsic, compare result with 0.
|
|
Function *TDCFunc =
|
|
Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
|
|
IRBuilder<> IRB(I);
|
|
Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
|
|
Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
|
|
Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
|
|
I->replaceAllUsesWith(ICmp);
|
|
}
|
|
// If unused, or used and converted, remove it.
|
|
I->eraseFromParent();
|
|
MadeChange = true;
|
|
}
|
|
|
|
if (!MadeChange)
|
|
return false;
|
|
|
|
// We've actually done something - now clear misc accumulated junk (fabs,
|
|
// bitcast).
|
|
for (auto *I : PossibleJunk)
|
|
if (I->user_empty())
|
|
I->eraseFromParent();
|
|
|
|
return true;
|
|
}
|