1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00
llvm-mirror/lib/Target/PowerPC/PPCFastISel.cpp
Kang Zhang 0eb8d477e2 [PowerPC] [PowerPC] Enhance the fast selection of fptoi & fptrunc instruction and clean up related asserts
Summary:
Fast selection of llvm fptoi & fptrunc instructions is not handled well about
VSX instruction support.
We'd use VSX float convert integer instruction instead of non-vsx float convert
integer instruction if the operand register class is VSSRC or VSFRC because i32
and i64 are mapped to VSSRC and VSFRC correspondingly if VSX feature is
openeded.
For float trunc instruction, we do this silimar work like float convert integer
instruction to try to use VSX instruction.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D58430

llvm-svn: 354762
2019-02-25 02:46:16 +00:00

2465 lines
85 KiB
C++

//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the PowerPC-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// PPCGenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCCCState.h"
#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
//===----------------------------------------------------------------------===//
//
// TBD:
// fastLowerArguments: Handle simple cases.
// PPCMaterializeGV: Handle TLS.
// SelectCall: Handle function pointers.
// SelectCall: Handle multi-register return values.
// SelectCall: Optimize away nops for local calls.
// processCallArgs: Handle bit-converted arguments.
// finishCall: Handle multi-register return values.
// PPCComputeAddress: Handle parameter references as FrameIndex's.
// PPCEmitCmp: Handle immediate as operand 1.
// SelectCall: Handle small byval arguments.
// SelectIntrinsicCall: Implement.
// SelectSelect: Implement.
// Consider factoring isTypeLegal into the base class.
// Implement switches and jump tables.
//
//===----------------------------------------------------------------------===//
using namespace llvm;
#define DEBUG_TYPE "ppcfastisel"
namespace {
typedef struct Address {
enum {
RegBase,
FrameIndexBase
} BaseType;
union {
unsigned Reg;
int FI;
} Base;
long Offset;
// Innocuous defaults for our address.
Address()
: BaseType(RegBase), Offset(0) {
Base.Reg = 0;
}
} Address;
class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const PPCSubtarget *PPCSubTarget;
PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
LLVMContext *Context;
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
TII(*PPCSubTarget->getInstrInfo()),
TLI(*PPCSubTarget->getTargetLowering()),
Context(&FuncInfo.Fn->getContext()) {}
// Backend specific FastISel code.
private:
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
bool fastLowerArguments() override;
unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
bool fastLowerCall(CallLoweringInfo &CLI) override;
// Instruction selection routines.
private:
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectIToFP(const Instruction *I, bool IsSigned);
bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool isValueAvailable(const Value *V) const;
bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
return RC->getID() == PPC::VSFRCRegClassID;
}
bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
return RC->getID() == PPC::VSSRCRegClassID;
}
unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
unsigned SrcReg, unsigned Flag = 0,
unsigned SubReg = 0) {
unsigned TmpReg = createResultReg(ToRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
return TmpReg;
}
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg,
const PPC::Predicate Pred);
bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
const TargetRegisterClass *RC, bool IsZExt = true,
unsigned FP64LoadOpc = PPC::LFD);
bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
bool PPCComputeAddress(const Value *Obj, Address &Addr);
void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
unsigned &IndexReg);
bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
unsigned SrcReg, bool IsSigned);
unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
// Call handling routines.
private:
bool processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg);
bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
private:
#include "PPCGenFastISel.inc"
};
} // end anonymous namespace
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
case CmpInst::FCMP_FALSE:
case CmpInst::FCMP_TRUE:
// Major concern about the following 6 cases is NaN result. The comparison
// result consists of 4 bits, indicating lt, eq, gt and un (unordered),
// only one of which will be set. The result is generated by fcmpu
// instruction. However, bc instruction only inspects one of the first 3
// bits, so when un is set, bc instruction may jump to an undesired
// place.
//
// More specifically, if we expect an unordered comparison and un is set, we
// expect to always go to true branch; in such case UEQ, UGT and ULT still
// give false, which are undesired; but UNE, UGE, ULE happen to give true,
// since they are tested by inspecting !eq, !lt, !gt, respectively.
//
// Similarly, for ordered comparison, when un is set, we always expect the
// result to be false. In such case OGT, OLT and OEQ is good, since they are
// actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
// and ONE are tested through !lt, !gt and !eq, and these are true.
case CmpInst::FCMP_UEQ:
case CmpInst::FCMP_UGT:
case CmpInst::FCMP_ULT:
case CmpInst::FCMP_OGE:
case CmpInst::FCMP_OLE:
case CmpInst::FCMP_ONE:
default:
return Optional<PPC::Predicate>();
case CmpInst::FCMP_OEQ:
case CmpInst::ICMP_EQ:
return PPC::PRED_EQ;
case CmpInst::FCMP_OGT:
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGT:
return PPC::PRED_GT;
case CmpInst::FCMP_UGE:
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_SGE:
return PPC::PRED_GE;
case CmpInst::FCMP_OLT:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_SLT:
return PPC::PRED_LT;
case CmpInst::FCMP_ULE:
case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SLE:
return PPC::PRED_LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
return PPC::PRED_NE;
case CmpInst::FCMP_ORD:
return PPC::PRED_NU;
case CmpInst::FCMP_UNO:
return PPC::PRED_UN;
}
}
// Determine whether the type Ty is simple enough to be handled by
// fast-isel, and return its equivalent machine type in VT.
// FIXME: Copied directly from ARM -- factor into base class?
bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT Evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (Evt == MVT::Other || !Evt.isSimple()) return false;
VT = Evt.getSimpleVT();
// Handle all legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
// Determine whether the type Ty is simple enough to be handled by
// fast-isel as a load target, and return its equivalent machine type in VT.
bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
return true;
}
return false;
}
bool PPCFastISel::isValueAvailable(const Value *V) const {
if (!isa<Instruction>(V))
return true;
const auto *I = cast<Instruction>(V);
return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
}
// Given a value Obj, create an Address object Addr that represents its
// address. Return false if we can't handle it.
bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default:
break;
case Instruction::BitCast:
// Look through bitcasts.
return PPCComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
long TmpOffset = Addr.Offset;
// Iterate through the GEP folding the constants into offsets where
// we can.
gep_type_iterator GTI = gep_type_begin(U);
for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
II != IE; ++II, ++GTI) {
const Value *Op = *II;
if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.Offset = TmpOffset;
if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = SI->second;
return true;
}
break;
}
}
// FIXME: References to parameters fall through to the behavior
// below. They should be able to reference a frame index since
// they are stored to the stack, so we can get "ld rx, offset(r1)"
// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
// just contain the parameter. Try to handle this with a FI.
// Try to get this in a register if nothing else has worked.
if (Addr.Base.Reg == 0)
Addr.Base.Reg = getRegForValue(Obj);
// Prevent assignment of base register to X0, which is inappropriate
// for loads and stores alike.
if (Addr.Base.Reg != 0)
MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
return Addr.Base.Reg != 0;
}
// Fix up some addresses that can't be used directly. For example, if
// an offset won't fit in an instruction field, we may need to move it
// into an index register.
void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
unsigned &IndexReg) {
// Check whether the offset fits in the instruction field.
if (!isInt<16>(Addr.Offset))
UseOffset = false;
// If this is a stack pointer and the offset needs to be simplified then
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
Addr.Base.Reg = ResultReg;
Addr.BaseType = Address::RegBase;
}
if (!UseOffset) {
IntegerType *OffsetTy = Type::getInt64Ty(*Context);
const ConstantInt *Offset =
ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
IndexReg = PPCMaterializeInt(Offset, MVT::i64);
assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
}
}
// Emit a load instruction if possible, returning true if we succeeded,
// otherwise false. See commentary below for how the register class of
// the load is determined.
bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
const TargetRegisterClass *RC,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
bool UseOffset = true;
bool HasSPE = PPCSubTarget->hasSPE();
// If ResultReg is given, it determines the register class of the load.
// Otherwise, RC is the register class to use. If the result of the
// load isn't anticipated in this block, both may be zero, in which
// case we must make a conservative guess. In particular, don't assign
// R0 or X0 to the result register, as the result may be used in a load,
// store, add-immediate, or isel that won't permit this. (Though
// perhaps the spill and reload of live-exit values would handle this?)
const TargetRegisterClass *UseRC =
(ResultReg ? MRI.getRegClass(ResultReg) :
(RC ? RC :
(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
(VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) :
(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass)))));
bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
switch (VT.SimpleTy) {
default: // e.g., vector types not handled
return false;
case MVT::i8:
Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
break;
case MVT::i16:
Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
: (Is32BitInt ? PPC::LHA : PPC::LHA8));
break;
case MVT::i32:
Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
: (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
UseOffset = false;
break;
case MVT::i64:
Opc = PPC::LD;
assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
"64-bit load with 32-bit target??");
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
Opc = PPCSubTarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
break;
case MVT::f64:
Opc = FP64LoadOpc;
break;
}
// If necessary, materialize the offset into a register and use
// the indexed form. Also handle stack pointers with special needs.
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, UseOffset, IndexReg);
// If this is a potential VSX load with an offset of 0, a VSX indexed load can
// be used.
bool IsVSSRC = isVSSRCRegClass(UseRC);
bool IsVSFRC = isVSFRCRegClass(UseRC);
bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
if ((Is32VSXLoad || Is64VSXLoad) &&
(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
}
if (ResultReg == 0)
ResultReg = createResultReg(UseRC);
// Note: If we still have a frame index here, we know the offset is
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed load.
if (Is32VSXLoad || Is64VSXLoad) return false;
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed load.
if (Is32VSXLoad || Is64VSXLoad) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
} else {
// Get the RR opcode corresponding to the RI one. FIXME: It would be
// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
// is hard to get at.
switch (Opc) {
default: llvm_unreachable("Unexpected opcode!");
case PPC::LBZ: Opc = PPC::LBZX; break;
case PPC::LBZ8: Opc = PPC::LBZX8; break;
case PPC::LHZ: Opc = PPC::LHZX; break;
case PPC::LHZ8: Opc = PPC::LHZX8; break;
case PPC::LHA: Opc = PPC::LHAX; break;
case PPC::LHA8: Opc = PPC::LHAX8; break;
case PPC::LWZ: Opc = PPC::LWZX; break;
case PPC::LWZ8: Opc = PPC::LWZX8; break;
case PPC::LWA: Opc = PPC::LWAX; break;
case PPC::LWA_32: Opc = PPC::LWAX_32; break;
case PPC::LD: Opc = PPC::LDX; break;
case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
case PPC::EVLDD: Opc = PPC::EVLDDX; break;
case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
}
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg);
// If we have an index register defined we use it in the store inst,
// otherwise we use X0 as base as it makes the vector instructions to
// use zero in the computation of the effective address regardless the
// content of the register.
if (IndexReg)
MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
else
MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
}
// Attempt to fast-select a load instruction.
bool PPCFastISel::SelectLoad(const Instruction *I) {
// FIXME: No atomic loads are supported.
if (cast<LoadInst>(I)->isAtomic())
return false;
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
return false;
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(I->getOperand(0), Addr))
return false;
// Look at the currently assigned register for this instruction
// to determine the required register class. This is necessary
// to constrain RA from using R0/X0 when this is not legal.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
updateValueMap(I, ResultReg);
return true;
}
// Emit a store instruction to store SrcReg at Addr.
bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
assert(SrcReg && "Nothing to store!");
unsigned Opc;
bool UseOffset = true;
const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
switch (VT.SimpleTy) {
default: // e.g., vector types not handled
return false;
case MVT::i8:
Opc = Is32BitInt ? PPC::STB : PPC::STB8;
break;
case MVT::i16:
Opc = Is32BitInt ? PPC::STH : PPC::STH8;
break;
case MVT::i32:
assert(Is32BitInt && "Not GPRC for i32??");
Opc = PPC::STW;
break;
case MVT::i64:
Opc = PPC::STD;
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
Opc = PPCSubTarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
break;
case MVT::f64:
Opc = PPCSubTarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
break;
}
// If necessary, materialize the offset into a register and use
// the indexed form. Also handle stack pointers with special needs.
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, UseOffset, IndexReg);
// If this is a potential VSX store with an offset of 0, a VSX indexed store
// can be used.
bool IsVSSRC = isVSSRCRegClass(RC);
bool IsVSFRC = isVSFRCRegClass(RC);
bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
if ((Is32VSXStore || Is64VSXStore) &&
(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
}
// Note: If we still have a frame index here, we know the offset is
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed store.
if (Is32VSXStore || Is64VSXStore) return false;
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
.addImm(Addr.Offset)
.addFrameIndex(Addr.Base.FI)
.addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed store.
if (Is32VSXStore || Is64VSXStore)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
} else {
// Get the RR opcode corresponding to the RI one. FIXME: It would be
// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
// is hard to get at.
switch (Opc) {
default: llvm_unreachable("Unexpected opcode!");
case PPC::STB: Opc = PPC::STBX; break;
case PPC::STH : Opc = PPC::STHX; break;
case PPC::STW : Opc = PPC::STWX; break;
case PPC::STB8: Opc = PPC::STBX8; break;
case PPC::STH8: Opc = PPC::STHX8; break;
case PPC::STW8: Opc = PPC::STWX8; break;
case PPC::STD: Opc = PPC::STDX; break;
case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
case PPC::SPESTW: Opc = PPC::SPESTWX; break;
}
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg);
// If we have an index register defined we use it in the store inst,
// otherwise we use X0 as base as it makes the vector instructions to
// use zero in the computation of the effective address regardless the
// content of the register.
if (IndexReg)
MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
else
MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
}
// Attempt to fast-select a store instruction.
bool PPCFastISel::SelectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
// FIXME: No atomics loads are supported.
if (cast<StoreInst>(I)->isAtomic())
return false;
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(Op0->getType(), VT))
return false;
// Get the value to be stored into a register.
SrcReg = getRegForValue(Op0);
if (SrcReg == 0)
return false;
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(I->getOperand(1), Addr))
return false;
if (!PPCEmitStore(VT, SrcReg, Addr))
return false;
return true;
}
// Attempt to fast-select a branch instruction.
bool PPCFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *BrBB = FuncInfo.MBB;
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// For now, just try the simplest case where it's fed by a compare.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (isValueAvailable(CI)) {
Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
if (!OptPPCPred)
return false;
PPC::Predicate PPCPred = OptPPCPred.getValue();
// Take advantage of fall-through opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
PPCPred = PPC::InvertPredicate(PPCPred);
}
unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
CondReg, PPCPred))
return false;
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCSubTarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
.addReg(CondReg).addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
fastEmitBranch(Target, DbgLoc);
return true;
}
// FIXME: ARM looks for a case where the block containing the compare
// has been split from the block containing the branch. If this happens,
// there is a vreg available containing the result of the compare. I'm
// not sure we can do much, as we've lost the predicate information with
// the compare instruction -- we have a 4-bit CR but don't know which bit
// to test here.
return false;
}
// Attempt to emit a compare of the two source values. Signed and unsigned
// comparisons are supported. Return false if we can't handle it.
bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
bool IsZExt, unsigned DestReg,
const PPC::Predicate Pred) {
Type *Ty = SrcValue1->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
return false;
// See if operand 2 is an immediate encodeable in the compare.
// FIXME: Operands are not in canonical order at -O0, so an immediate
// operand in position 1 is a lost opportunity for now. We are
// similar to ARM in this regard.
long Imm = 0;
bool UseImm = false;
const bool HasSPE = PPCSubTarget->hasSPE();
// Only 16-bit integer constants can be represented in compares for
// PowerPC. Others will be materialized into a register.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
SrcVT == MVT::i8 || SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
UseImm = true;
}
}
unsigned SrcReg1 = getRegForValue(SrcValue1);
if (SrcReg1 == 0)
return false;
unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(SrcValue2);
if (SrcReg2 == 0)
return false;
}
unsigned CmpOpc;
bool NeedsExt = false;
auto RC1 = MRI.getRegClass(SrcReg1);
auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
if (HasSPE) {
switch (Pred) {
default: return false;
case PPC::PRED_EQ:
CmpOpc = PPC::EFSCMPEQ;
break;
case PPC::PRED_LT:
CmpOpc = PPC::EFSCMPLT;
break;
case PPC::PRED_GT:
CmpOpc = PPC::EFSCMPGT;
break;
}
} else {
CmpOpc = PPC::FCMPUS;
if (isVSSRCRegClass(RC1))
SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
if (RC2 && isVSSRCRegClass(RC2))
SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
}
break;
case MVT::f64:
if (HasSPE) {
switch (Pred) {
default: return false;
case PPC::PRED_EQ:
CmpOpc = PPC::EFDCMPEQ;
break;
case PPC::PRED_LT:
CmpOpc = PPC::EFDCMPLT;
break;
case PPC::PRED_GT:
CmpOpc = PPC::EFDCMPGT;
break;
}
} else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
CmpOpc = PPC::XSCMPUDP;
} else {
CmpOpc = PPC::FCMPUD;
}
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
LLVM_FALLTHROUGH;
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
else
CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
break;
case MVT::i64:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
else
CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
break;
}
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg1 = ExtReg;
if (!UseImm) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg2 = ExtReg;
}
}
if (!UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addReg(SrcReg2);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addImm(Imm);
return true;
}
// Attempt to fast-select a floating-point extend instruction.
bool PPCFastISel::SelectFPExt(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// No code is generated for a FP extend.
updateValueMap(I, SrcReg);
return true;
}
// Attempt to fast-select a floating-point truncate instruction.
bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// Round the result to single precision.
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::SPE4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSSRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::XSRSP), DestReg)
.addReg(SrcReg);
} else {
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
}
updateValueMap(I, DestReg);
return true;
}
// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
// case to 8 bytes which produces tighter code but wastes stack space.
unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
bool IsSigned) {
// If necessary, extend 32-bit int to 64-bit.
if (SrcVT == MVT::i32) {
unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
return 0;
SrcReg = TmpReg;
}
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
// Store the value from the GPR.
if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
return 0;
// Load the integer value into an FPR. The kind of load used depends
// on a number of conditions.
unsigned LoadOpc = PPC::LFD;
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
} else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
}
}
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
unsigned ResultReg = 0;
if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
return 0;
return ResultReg;
}
// Attempt to fast-select an integer-to-floating-point conversion.
// FIXME: Once fast-isel has better support for VSX, conversions using
// direct moves should be implemented.
bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
MVT DstVT;
Type *DstTy = I->getType();
if (!isTypeLegal(DstTy, DstVT))
return false;
if (DstVT != MVT::f32 && DstVT != MVT::f64)
return false;
Value *Src = I->getOperand(0);
EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
SrcVT != MVT::i32 && SrcVT != MVT::i64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
if (PPCSubTarget->hasSPE()) {
unsigned Opc;
if (DstVT == MVT::f32)
Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
else
Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
unsigned DestReg = createResultReg(&PPC::SPERCRegClass);
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
updateValueMap(I, DestReg);
return true;
}
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
if (!IsSigned && !PPCSubTarget->hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
// (which are present only on P7 and A2 server models) when converting
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
return false;
// Extend the input if necessary.
if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
return false;
SrcVT = MVT::i64;
SrcReg = TmpReg;
}
// Move the integer value to an FPR.
unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
if (FPReg == 0)
return false;
// Determine the opcode for the conversion.
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
unsigned DestReg = createResultReg(RC);
unsigned Opc;
if (DstVT == MVT::f32)
Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
else
Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
updateValueMap(I, DestReg);
return true;
}
// Move the floating-point value in SrcReg into an integer destination
// register, and return the register (or zero if we can't handle it).
// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
unsigned SrcReg, bool IsSigned) {
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
// Note that if have STFIWX available, we could use a 4-byte stack
// slot for i32, but this being fast-isel we'll just go with the
// easiest code gen possible.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
// Store the value from the FPR.
if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
return 0;
// Reload it into a GPR. If we want an i32 on big endian, modify the
// address to have a 4-byte offset so we load from the right place.
if (VT == MVT::i32)
Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
// Look at the currently assigned register for this instruction
// to determine the required register class.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
return 0;
return ResultReg;
}
// Attempt to fast-select a floating-point-to-integer conversion.
// FIXME: Once fast-isel has better support for VSX, conversions using
// direct moves should be implemented.
bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
MVT DstVT, SrcVT;
Type *DstTy = I->getType();
if (!isTypeLegal(DstTy, DstVT))
return false;
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
if (DstVT == MVT::i64 && !IsSigned &&
!PPCSubTarget->hasFPCVT() && !PPCSubTarget->hasSPE())
return false;
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
return false;
if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
return false;
unsigned SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
// meaningless copy to get the register class right.
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
if (InRC == &PPC::F4RCRegClass)
SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
else if (InRC == &PPC::VSSRCRegClass)
SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
// Determine the opcode for the conversion, which takes place
// entirely within FPRs or VSRs.
unsigned DestReg;
unsigned Opc;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
if (IsSigned)
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
if (DstVT == MVT::i32)
Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
else
Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
} else {
DestReg = createResultReg(&PPC::F8RCRegClass);
if (DstVT == MVT::i32)
if (IsSigned)
Opc = PPC::FCTIWZ;
else
Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
}
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
unsigned IntReg = PPCSubTarget->hasSPE() ? DestReg :
PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
if (IntReg == 0)
return false;
updateValueMap(I, IntReg);
return true;
}
// Attempt to fast-select a binary integer operation that isn't already
// handled automatically.
bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
EVT DestVT = TLI.getValueType(DL, I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
// Look at the currently assigned register for this instruction
// to determine the required register class. If there is no register,
// make a conservative choice (don't assign R0).
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
&PPC::GPRC_and_GPRC_NOR0RegClass);
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
unsigned Opc;
switch (ISDOpcode) {
default: return false;
case ISD::ADD:
Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
break;
case ISD::OR:
Opc = IsGPRC ? PPC::OR : PPC::OR8;
break;
case ISD::SUB:
Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
break;
}
unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
unsigned SrcReg1 = getRegForValue(I->getOperand(0));
if (SrcReg1 == 0) return false;
// Handle case of small immediate operand.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
const APInt &CIVal = ConstInt->getValue();
int Imm = (int)CIVal.getSExtValue();
bool UseImm = true;
if (isInt<16>(Imm)) {
switch (Opc) {
default:
llvm_unreachable("Missing case!");
case PPC::ADD4:
Opc = PPC::ADDI;
MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
break;
case PPC::ADD8:
Opc = PPC::ADDI8;
MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
break;
case PPC::OR:
Opc = PPC::ORI;
break;
case PPC::OR8:
Opc = PPC::ORI8;
break;
case PPC::SUBF:
if (Imm == -32768)
UseImm = false;
else {
Opc = PPC::ADDI;
MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
Imm = -Imm;
}
break;
case PPC::SUBF8:
if (Imm == -32768)
UseImm = false;
else {
Opc = PPC::ADDI8;
MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
Imm = -Imm;
}
break;
}
if (UseImm) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg)
.addReg(SrcReg1)
.addImm(Imm);
updateValueMap(I, ResultReg);
return true;
}
}
}
// Reg-reg case.
unsigned SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
// Reverse operands for subtract-from.
if (ISDOpcode == ISD::SUB)
std::swap(SrcReg1, SrcReg2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
updateValueMap(I, ResultReg);
return true;
}
// Handle arguments to a call that we're attempting to fast-select.
// Return false if the arguments are too complex for us at the moment.
bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Skip vector arguments for now, as well as long double and
// uint128_t, and anything that isn't passed in a register.
if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
!VA.isRegLoc() || VA.needsCustom())
return false;
// Skip bit-converted arguments for now.
if (VA.getLocInfo() == CCValAssign::BCvt)
return false;
}
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
// FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
NumBytes = std::max(NumBytes, LinkageSize + 64);
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
.addImm(NumBytes).addImm(0);
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
// register (unless we're using the fast calling convention).
unsigned NextGPR = PPC::X3;
unsigned NextFPR = PPC::F1;
// Process arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Handle argument promotion and bitcasts.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
llvm_unreachable("Failed to emit a sext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::AExt:
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
llvm_unreachable("Failed to emit a zext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::BCvt: {
// FIXME: Not yet handled.
llvm_unreachable("Should have bailed before getting here!");
break;
}
}
// Copy this argument to the appropriate register.
unsigned ArgReg;
if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
ArgReg = NextFPR++;
if (CC != CallingConv::Fast)
++NextGPR;
} else
ArgReg = NextGPR++;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
RegArgs.push_back(ArgReg);
}
return true;
}
// For a call that we've determined we can fast-select, finish the
// call sequence and generate a copy to obtain the return value (if any).
bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
CallingConv::ID CC = CLI.CallConv;
// Issue CallSEQ_END.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameDestroyOpcode()))
.addImm(NumBytes).addImm(0);
// Next, generate a copy to obtain the return value.
// FIXME: No multi-register return values yet, though I don't foresee
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
assert(VA.isRegLoc() && "Can only return in registers!");
MVT DestVT = VA.getValVT();
MVT CopyVT = DestVT;
// Ints smaller than a register still arrive in a full 64-bit
// register, so make sure we recognize this.
if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
CopyVT = MVT::i64;
unsigned SourcePhysReg = VA.getLocReg();
unsigned ResultReg = 0;
if (RetVT == CopyVT) {
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
ResultReg).addReg(SourcePhysReg);
// If only the low half of a general register is needed, generate
// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
// used along the fast-isel path (not lowered), and downstream logic
// also doesn't like a direct subreg copy on a physical reg.)
} else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
}
assert(ResultReg && "ResultReg unset!");
CLI.InRegs.push_back(SourcePhysReg);
CLI.ResultReg = ResultReg;
CLI.NumResultRegs = 1;
}
return true;
}
bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
CallingConv::ID CC = CLI.CallConv;
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
const MCSymbol *Symbol = CLI.Symbol;
if (!Callee && !Symbol)
return false;
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
return false;
// Let SDISel handle vararg functions.
if (IsVarArg)
return false;
// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = CLI.RetTy;
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
// We can't handle boolean returns when CR bits are in use.
return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
}
// Bail early if more than 8 arguments, as we only currently
// handle arguments passed in registers.
unsigned NumArgs = CLI.OutVals.size();
if (NumArgs > 8)
return false;
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(NumArgs);
ArgRegs.reserve(NumArgs);
ArgVTs.reserve(NumArgs);
ArgFlags.reserve(NumArgs);
for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
// Only handle easy calls for now. It would be reasonably easy
// to handle <= 8-byte structures passed ByVal in registers, but we
// have to ensure they are right-justified in the register.
ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
return false;
Value *ArgValue = CLI.OutVals[i];
Type *ArgTy = ArgValue->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
if (ArgVT.isVector())
return false;
unsigned Arg = getRegForValue(ArgValue);
if (Arg == 0)
return false;
Args.push_back(ArgValue);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Process the arguments.
SmallVector<unsigned, 8> RegArgs;
unsigned NumBytes;
if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
RegArgs, CC, NumBytes, IsVarArg))
return false;
MachineInstrBuilder MIB;
// FIXME: No handling for function pointers yet. This requires
// implementing the function descriptor (OPD) setup.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV) {
// patchpoints are a special case; they always dispatch to a pointer value.
// However, we don't actually want to generate the indirect call sequence
// here (that will be generated, as necessary, during asm printing), and
// the call we generate here will be erased by FastISel::selectPatchpoint,
// so don't try very hard...
if (CLI.IsPatchPoint)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
else
return false;
} else {
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
}
// Add implicit physical register uses to the call.
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
// Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
// into the call.
PPCFuncInfo->setUsesTOCBasePtr();
MIB.addReg(PPC::X2, RegState::Implicit);
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
// Finish off the call including any return values.
return finishCall(RetVT, CLI, NumBytes);
}
// Attempt to fast-select a return instruction.
bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
if (TLI.supportSplitCSR(FuncInfo.MF))
return false;
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
// FIXME: Only one output register for now.
if (ValLocs.size() > 1)
return false;
// Special case for returning a constant integer of any size - materialize
// the constant as an i64 and copy it to the return register.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
CCValAssign &VA = ValLocs[0];
unsigned RetReg = VA.getLocReg();
// We still need to worry about properly extending the sign. For example,
// we could have only a single bit or a constant that needs zero
// extension rather than sign extension. Make sure we pass the return
// value extension property to integer materialization.
unsigned SrcReg =
PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
RetRegs.push_back(RetReg);
} else {
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
// Copy the result values into the output registers.
for (unsigned i = 0; i < ValLocs.size(); ++i) {
CCValAssign &VA = ValLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
RetRegs.push_back(VA.getLocReg());
unsigned SrcReg = Reg + VA.getValNo();
EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getLocVT();
if (RVVT != DestVT && RVVT != MVT::i8 &&
RVVT != MVT::i16 && RVVT != MVT::i32)
return false;
if (RVVT != DestVT) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
llvm_unreachable("Full value assign but types don't match?");
case CCValAssign::AExt:
case CCValAssign::ZExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
return false;
SrcReg = TmpReg;
break;
}
case CCValAssign::SExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
return false;
SrcReg = TmpReg;
break;
}
}
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetRegs[i])
.addReg(SrcReg);
}
}
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BLR8));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
// Attempt to emit an integer extend of SrcReg into DestReg. Both
// signed and zero extensions are supported. Return false if we
// can't handle it.
bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i64)
return false;
if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
return false;
// Signed extensions use EXTSB, EXTSH, EXTSW.
if (!IsZExt) {
unsigned Opc;
if (SrcVT == MVT::i8)
Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
else if (SrcVT == MVT::i16)
Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
else {
assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
Opc = PPC::EXTSW_32_64;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Unsigned 32-bit extensions use RLWINM.
} else if (DestVT == MVT::i32) {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 24;
else {
assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
MB = 16;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
} else {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 56;
else if (SrcVT == MVT::i16)
MB = 48;
else
MB = 32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::RLDICL_32_64), DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
}
return true;
}
// Attempt to fast-select an indirect branch instruction.
bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
unsigned AddrReg = getRegForValue(I->getOperand(0));
if (AddrReg == 0)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
.addReg(AddrReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
for (const BasicBlock *SuccBB : IB->successors())
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
// Attempt to fast-select an integer truncate instruction.
bool PPCFastISel::SelectTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
return false;
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
// The only interesting case is when we need to switch register classes.
if (SrcVT == MVT::i64)
SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
updateValueMap(I, SrcReg);
return true;
}
// Attempt to fast-select an integer extend instruction.
bool PPCFastISel::SelectIntExt(const Instruction *I) {
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
bool IsZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
SrcEVT = TLI.getValueType(DL, SrcTy, true);
DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
// If we know the register class needed for the result of this
// instruction, use it. Otherwise pick the register class of the
// correct size that does not contain X0/R0, since we don't know
// whether downstream uses permit that assignment.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass));
unsigned ResultReg = createResultReg(RC);
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
updateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
case Instruction::FPExt:
return SelectFPExt(I);
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::SIToFP:
return SelectIToFP(I, /*IsSigned*/ true);
case Instruction::UIToFP:
return SelectIToFP(I, /*IsSigned*/ false);
case Instruction::FPToSI:
return SelectFPToI(I, /*IsSigned*/ true);
case Instruction::FPToUI:
return SelectFPToI(I, /*IsSigned*/ false);
case Instruction::Add:
return SelectBinaryIntOp(I, ISD::ADD);
case Instruction::Or:
return SelectBinaryIntOp(I, ISD::OR);
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
// Here add other flavors of Instruction::XXX that automated
// cases don't catch. For example, switches are terminators
// that aren't yet handled.
default:
break;
}
return false;
}
// Materialize a floating-point constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// No plans to handle long double here.
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
// All FP constants are loaded from the constant pool.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
assert(Align > 0 && "Unexpectedly missing alignment information!");
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
const bool HasSPE = PPCSubTarget->hasSPE();
const TargetRegisterClass *RC;
if (HasSPE)
RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass);
else
RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
unsigned DestReg = createResultReg(RC);
CodeModel::Model CModel = TM.getCodeModel();
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
unsigned Opc;
if (HasSPE)
Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
else
Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
TmpReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
if (CModel == CodeModel::Large) {
unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0)
.addReg(TmpReg2);
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
.addReg(TmpReg)
.addMemOperand(MMO);
}
return DestReg;
}
// Materialize the address of a global value into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
assert(VT == MVT::i64 && "Non-address!");
const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
unsigned DestReg = createResultReg(RC);
// Global values may be plain old object addresses, TLS object
// addresses, constant pool entries, or jump tables. How we generate
// code for these may depend on small, medium, or large code model.
CodeModel::Model CModel = TM.getCodeModel();
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
// FIXME: We don't yet handle the complexity of TLS.
if (GV->isThreadLocal())
return 0;
PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
DestReg)
.addGlobalAddress(GV)
.addReg(PPC::X2);
else {
// If the address is an externally defined symbol, a symbol with common
// or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%x2, GV))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%x2, GV), GV)
// Either way, start with the ADDIStocHA:
unsigned HighPartReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
if (GVFlags & PPCII::MO_NLP_FLAG) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
} else {
// Otherwise generate the ADDItocL.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
}
return DestReg;
}
// Materialize a 32-bit integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC) {
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
unsigned ResultReg = createResultReg(RC);
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
if (isInt<16>(Imm))
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
.addImm(Imm);
else if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
.addImm(Hi);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
.addReg(TmpReg).addImm(Lo);
} else
// Just Hi bits.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
.addImm(Hi);
return ResultReg;
}
// Materialize a 64-bit integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC) {
unsigned Remainder = 0;
unsigned Shift = 0;
// If the value doesn't fit in 32 bits, see if we can shift it
// so that it fits in 32 bits.
if (!isInt<32>(Imm)) {
Shift = countTrailingZeros<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
if (isInt<32>(ImmSh))
Imm = ImmSh;
else {
Remainder = Imm;
Shift = 32;
Imm >>= 32;
}
}
// Handle the high-order 32 bits (if shifted) or the whole 32 bits
// (if not shifted).
unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
if (!Shift)
return TmpReg1;
// If upper 32 bits were not zero, we've built them and need to shift
// them into place.
unsigned TmpReg2;
if (Imm) {
TmpReg2 = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
} else
TmpReg2 = TmpReg1;
unsigned TmpReg3, Hi, Lo;
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
TmpReg3 = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
TmpReg3).addReg(TmpReg2).addImm(Hi);
} else
TmpReg3 = TmpReg2;
if ((Lo = Remainder & 0xFFFF)) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
ResultReg).addReg(TmpReg3).addImm(Lo);
return ResultReg;
}
return TmpReg3;
}
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
return ImmReg;
}
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
VT != MVT::i1)
return 0;
const TargetRegisterClass *RC =
((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
// If the constant is in range, use a load-immediate.
// Since LI will sign extend the constant we need to make sure that for
// our zeroext constants that the sign extended constant fits into 16-bits -
// a range of 0..0x7fff.
if (isInt<16>(Imm)) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
.addImm(Imm);
return ImmReg;
}
// Construct the constant piecewise.
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else if (VT == MVT::i32)
return PPCMaterialize32BitInt(Imm, RC);
return 0;
}
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
MVT VT = CEVT.getSimpleVT();
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return PPCMaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return PPCMaterializeGV(GV, VT);
else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
// Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
// assumes that constant PHI operands will be zero extended, and failure to
// match that assumption will cause problems if we sign extend here but
// some user of a PHI is in a block for which we fall back to full SDAG
// instruction selection.
return PPCMaterializeInt(CI, VT, false);
return 0;
}
// Materialize the address created by an alloca into a register, and
// return the register number (or zero if we failed to handle it).
unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
MVT VT;
if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(SI->second).addImm(0);
return ResultReg;
}
return 0;
}
// Fold loads into extends when possible.
// FIXME: We can have multiple redundant extend/trunc instructions
// following a load. The folding only picks up one. Extend this
// to check subsequent instructions for the same pattern and remove
// them. Thus ResultReg should be the def reg for the last redundant
// instruction in a chain, and all intervening instructions can be
// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
// to add ELF64-NOT: rldicl to the appropriate tests when this works.
bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(LI->getType(), VT))
return false;
// Combine load followed by zero- or sign-extend.
bool IsZExt = false;
switch(MI->getOpcode()) {
default:
return false;
case PPC::RLDICL:
case PPC::RLDICL_32_64: {
IsZExt = true;
unsigned MB = MI->getOperand(3).getImm();
if ((VT == MVT::i8 && MB <= 56) ||
(VT == MVT::i16 && MB <= 48) ||
(VT == MVT::i32 && MB <= 32))
break;
return false;
}
case PPC::RLWINM:
case PPC::RLWINM8: {
IsZExt = true;
unsigned MB = MI->getOperand(3).getImm();
if ((VT == MVT::i8 && MB <= 24) ||
(VT == MVT::i16 && MB <= 16))
break;
return false;
}
case PPC::EXTSB:
case PPC::EXTSB8:
case PPC::EXTSB8_32_64:
/* There is no sign-extending load-byte instruction. */
return false;
case PPC::EXTSH:
case PPC::EXTSH8:
case PPC::EXTSH8_32_64: {
if (VT != MVT::i16 && VT != MVT::i8)
return false;
break;
}
case PPC::EXTSW:
case PPC::EXTSW_32:
case PPC::EXTSW_32_64: {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
break;
}
}
// See if we can handle this address.
Address Addr;
if (!PPCComputeAddress(LI->getOperand(0), Addr))
return false;
unsigned ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
return true;
}
// Attempt to lower call arguments in a faster way than done by
// the selection DAG code.
bool PPCFastISel::fastLowerArguments() {
// Defer to normal argument lowering for now. It's reasonably
// efficient. Consider doing something like ARM to handle the
// case where all args fit in registers, no varargs, no float
// or vector args.
return false;
}
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
return ImmReg;
}
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
VT != MVT::i1)
return 0;
const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else
return PPCMaterialize32BitInt(Imm, RC);
}
// Override for ADDI and ADDI8 to set the correct register class
// on RHS operand 0. The automatic infrastructure naively assumes
// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
// for these cases. At the moment, none of the other automatically
// generated RI instructions require special treatment. However, once
// SelectSelect is implemented, "isel" requires similar handling.
//
// Also be conservative about the output register class. Avoid
// assigning R0 or X0 to the output register for GPRC and G8RC
// register classes, as any such result could be used in ADDI, etc.,
// where those regs have another meaning.
unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
if (MachineInstOpcode == PPC::ADDI)
MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
else if (MachineInstOpcode == PPC::ADDI8)
MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
Op0, Op0IsKill, Imm);
}
// Override for instructions with one register operand to avoid use of
// R0/X0. The automatic infrastructure isn't aware of the context so
// we must be conservative.
unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
}
// Override for instructions with two register operands to avoid use
// of R0/X0. The automatic infrastructure isn't aware of the context
// so we must be conservative.
unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
Op1, Op1IsKill);
}
namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
// Only available on 64-bit ELF for now.
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
}