1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

* Rewrote casts

* Implemented GEP folding
* Dynamically output global address stuff once per function
* Fix casting fp<->short/byte

Patch contributed by Nate Begeman.

llvm-svn: 15237
This commit is contained in:
Misha Brukman 2004-07-26 18:13:24 +00:00
parent bf2ad22ff6
commit 288289f9a7
2 changed files with 1018 additions and 382 deletions

View File

@ -32,8 +32,7 @@
using namespace llvm;
namespace {
Statistic<> GEPConsts("ppc-codegen", "Number of const GEPs");
Statistic<> GEPSplits("ppc-codegen", "Number of partially const GEPs");
Statistic<> GEPFolds("ppc-codegen", "Number of GEPs folded");
/// TypeClass - Used by the PowerPC backend to group LLVM types by their basic
/// PPC Representation.
@ -41,17 +40,6 @@ namespace {
enum TypeClass {
cByte, cShort, cInt, cFP32, cFP64, cLong
};
// This struct is for recording the necessary operations to emit the GEP
typedef struct CollapsedGepOp {
public:
CollapsedGepOp(bool mul, Value *i, ConstantSInt *s) :
isMul(mul), index(i), size(s) {}
bool isMul;
Value *index;
ConstantSInt *size;
} CollapsedGepOp;
}
/// getClass - Turn a primitive type into a "class" number which is based on the
@ -90,7 +78,7 @@ namespace {
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
int VarArgsFrameIndex; // FrameIndex for start of varargs area
std::map<Value*, unsigned> RegMap; // Mapping between Values and SSA Regs
// External functions used in the Module
@ -105,6 +93,11 @@ namespace {
// FrameIndex for the alloca.
std::map<AllocaInst*, unsigned> AllocaMap;
// A Reg to hold the base address used for global loads and stores, and a
// flag to set whether or not we need to emit it for this function.
unsigned GlobalBaseReg;
bool GlobalBaseInitialized;
ISel(TargetMachine &tm) : TM(reinterpret_cast<PowerPCTargetMachine&>(tm)),
F(0), BB(0) {}
@ -158,6 +151,9 @@ namespace {
BB = &F->front();
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseInitialized = false;
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
@ -219,6 +215,16 @@ namespace {
ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
};
// This struct is for recording the necessary operations to emit the GEP
struct CollapsedGepOp {
bool isMul;
Value *index;
ConstantSInt *size;
CollapsedGepOp(bool mul, Value *i, ConstantSInt *s) :
isMul(mul), index(i), size(s) {}
};
void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
const std::vector<ValueRecord> &Args, bool isVarArg);
void visitCallInst(CallInst &I);
@ -276,7 +282,8 @@ namespace {
///
void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg);
User::op_iterator IdxEnd, unsigned TargetReg,
bool CollapseRemainder, ConstantSInt **Remainder);
/// emitCastOperation - Common code shared between visitCastInst and
/// constant expression cast support.
@ -335,11 +342,19 @@ namespace {
/// emitSelectOperation - Common code shared between visitSelectInst and the
/// constant expression support.
///
void emitSelectOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Cond, Value *TrueVal, Value *FalseVal,
unsigned DestReg);
/// copyGlobalBaseToRegister - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
void ISel::copyGlobalBaseToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned R);
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
@ -417,10 +432,6 @@ unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB,
unsigned Reg = makeAnotherReg(V->getType());
copyConstantToRegister(MBB, IPt, C, Reg);
return Reg;
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
// Do not emit noop casts at all.
if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()))
return getReg(CI->getOperand(0), MBB, IPt);
} else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
unsigned Reg = makeAnotherReg(V->getType());
unsigned FI = getFixedSizedAllocaFI(AI);
@ -500,6 +511,26 @@ unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
}
/// copyGlobalBaseToRegister - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
void ISel::copyGlobalBaseToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned R) {
if (!GlobalBaseInitialized) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = F->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
GlobalBaseReg = makeAnotherReg(Type::IntTy);
BuildMI(FirstMBB, MBBI, PPC32::MovePCtoLR, 0, GlobalBaseReg);
GlobalBaseInitialized = true;
}
// Emit our copy of GlobalBaseReg to the destination register in the
// current MBB
BuildMI(*MBB, IP, PPC32::OR, 2, R).addReg(GlobalBaseReg)
.addReg(GlobalBaseReg);
}
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
@ -567,14 +598,13 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
// Load addr of constant to reg; constant is located at PC + distance
unsigned CurPC = makeAnotherReg(Type::IntTy);
// Load addr of constant to reg; constant is located at base + distance
unsigned GlobalBase = makeAnotherReg(Type::IntTy);
unsigned Reg1 = makeAnotherReg(Type::IntTy);
unsigned Reg2 = makeAnotherReg(Type::IntTy);
// Move PC to destination reg
BuildMI(*MBB, IP, PPC32::MovePCtoLR, 0, CurPC);
// Move value at PC + distance into return reg
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, Reg1).addReg(CurPC)
// Move value at base + distance into return reg
copyGlobalBaseToRegister(MBB, IP, GlobalBase);
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, Reg1).addReg(GlobalBase)
.addConstantPoolIndex(CPI);
BuildMI(*MBB, IP, PPC32::LOADLoDirect, 2, Reg2).addReg(Reg1)
.addConstantPoolIndex(CPI);
@ -585,16 +615,15 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
// Copy zero (null pointer) to the register.
BuildMI(*MBB, IP, PPC32::LI, 1, R).addSImm(0);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
// GV is located at PC + distance
unsigned CurPC = makeAnotherReg(Type::IntTy);
// GV is located at base + distance
unsigned GlobalBase = makeAnotherReg(Type::IntTy);
unsigned TmpReg = makeAnotherReg(GV->getType());
unsigned Opcode = (GV->hasWeakLinkage() || GV->isExternal()) ?
PPC32::LOADLoIndirect : PPC32::LOADLoDirect;
// Move PC to destination reg
BuildMI(*MBB, IP, PPC32::MovePCtoLR, 0, CurPC);
// Move value at PC + distance into return reg
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, TmpReg).addReg(CurPC)
// Move value at base + distance into return reg
copyGlobalBaseToRegister(MBB, IP, GlobalBase);
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, TmpReg).addReg(GlobalBase)
.addGlobalAddress(GV);
BuildMI(*MBB, IP, Opcode, 2, R).addReg(TmpReg).addGlobalAddress(GV);
@ -740,7 +769,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
// the start of the first vararg value... this is used to expand
// llvm.va_start.
if (Fn.getFunctionType()->isVarArg())
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
}
@ -861,6 +890,32 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
return 0;
}
// canFoldGEPIntoLoadOrStore - Return the GEP instruction if we can fold it into
// the load or store instruction that is the only user of the GEP.
//
static GetElementPtrInst *canFoldGEPIntoLoadOrStore(Value *V) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V))
if (GEPI->hasOneUse()) {
Instruction *User = cast<Instruction>(GEPI->use_back());
if (isa<StoreInst>(User) &&
GEPI->getParent() == User->getParent() &&
User->getOperand(0) != GEPI &&
User->getOperand(1) == GEPI) {
++GEPFolds;
return GEPI;
}
if (isa<LoadInst>(User) &&
GEPI->getParent() == User->getParent() &&
User->getOperand(0) == GEPI) {
++GEPFolds;
return GEPI;
}
}
return 0;
}
// Return a fixed numbering for setcc instructions which does not depend on the
// order of the opcodes.
//
@ -917,6 +972,26 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
unsigned Class = getClassB(CompTy);
unsigned Op0r = getReg(Op0, MBB, IP);
// Before we do a comparison, we have to make sure that we're truncating our
// registers appropriately.
if (Class == cByte) {
unsigned TmpReg = makeAnotherReg(CompTy);
if (CompTy->isSigned())
BuildMI(*MBB, IP, PPC32::EXTSB, 1, TmpReg).addReg(Op0r);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0)
.addImm(24).addImm(31);
Op0r = TmpReg;
} else if (Class == cShort) {
unsigned TmpReg = makeAnotherReg(CompTy);
if (CompTy->isSigned())
BuildMI(*MBB, IP, PPC32::EXTSH, 1, TmpReg).addReg(Op0r);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0)
.addImm(16).addImm(31);
Op0r = TmpReg;
}
// Use crand for lt, gt and crandc for le, ge
unsigned CROpcode = (OpNum == 2 || OpNum == 4) ? PPC32::CRAND : PPC32::CRANDC;
// ? cr1[lt] : cr1[gt]
@ -1387,7 +1462,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
BuildMI(BB, PPC32::OR, 2, GPR[GPR_idx]).addReg(ArgReg)
.addReg(ArgReg);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 0 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
}
@ -1400,7 +1476,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
BuildMI(BB, PPC32::OR, 2, GPR[GPR_idx]).addReg(ArgReg)
.addReg(ArgReg);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 0 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
}
@ -1417,7 +1494,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
.addReg(ArgReg+1);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
CallMI->addRegOperand(GPR[GPR_idx+1], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 1 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg+1).addSImm(ArgOffset+4)
@ -1599,8 +1677,10 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
case Intrinsic::vaend:
case Intrinsic::returnaddress:
case Intrinsic::frameaddress:
// FIXME: should lower this ourselves
// FIXME: should lower these ourselves
// case Intrinsic::isunordered:
// case Intrinsic::memcpy: -> doCall(). system memcpy almost
// guaranteed to be faster than anything we generate ourselves
// We directly implement these intrinsics
break;
case Intrinsic::readio: {
@ -1674,7 +1754,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
BuildMI(BB, PPC32::LI, 1, TmpReg1).addSImm(0);
}
return;
#if 0
// This may be useful for supporting isunordered
case Intrinsic::isnan:
@ -2289,7 +2369,7 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
if (isSigned) {
// FIXME: Unimplemented
// Page C-3 of the PowerPC 32bit Programming Environments Manual
std::cerr << "ERROR: Unimplemented: signed right shift\n";
std::cerr << "ERROR: Unimplemented: signed right shift of long\n";
abort();
} else {
BuildMI(*MBB, IP, PPC32::SUBFIC, 2, TmpReg1).addReg(ShiftAmountReg)
@ -2344,68 +2424,168 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
}
/// visitLoadInst - Implement LLVM load instructions
/// visitLoadInst - Implement LLVM load instructions. Pretty straightforward
/// mapping of LLVM classes to PPC load instructions, with the exception of
/// signed byte loads, which need a sign extension following them.
///
void ISel::visitLoadInst(LoadInst &I) {
static const unsigned Opcodes[] = {
PPC32::LBZ, PPC32::LHZ, PPC32::LWZ, PPC32::LFS
// Immediate opcodes, for reg+imm addressing
static const unsigned ImmOpcodes[] = {
PPC32::LBZ, PPC32::LHZ, PPC32::LWZ,
PPC32::LFS, PPC32::LFD, PPC32::LWZ
};
// Indexed opcodes, for reg+reg addressing
static const unsigned IdxOpcodes[] = {
PPC32::LBZX, PPC32::LHZX, PPC32::LWZX,
PPC32::LFSX, PPC32::LFDX, PPC32::LWZX
};
unsigned Class = getClassB(I.getType());
unsigned Opcode = Opcodes[Class];
if (I.getType() == Type::DoubleTy) Opcode = PPC32::LFD;
if (Class == cShort && I.getType()->isSigned()) Opcode = PPC32::LHA;
unsigned DestReg = getReg(I);
unsigned Class = getClassB(I.getType());
unsigned ImmOpcode = ImmOpcodes[Class];
unsigned IdxOpcode = IdxOpcodes[Class];
unsigned DestReg = getReg(I);
Value *SourceAddr = I.getOperand(0);
if (Class == cShort && I.getType()->isSigned()) ImmOpcode = PPC32::LHA;
if (Class == cShort && I.getType()->isSigned()) IdxOpcode = PPC32::LHAX;
if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
if (AllocaInst *AI = dyn_castFixedAlloca(SourceAddr)) {
unsigned FI = getFixedSizedAllocaFI(AI);
if (Class == cLong) {
addFrameReference(BuildMI(BB, PPC32::LWZ, 2, DestReg), FI);
addFrameReference(BuildMI(BB, PPC32::LWZ, 2, DestReg+1), FI, 4);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg+1), FI, 4);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
addFrameReference(BuildMI(BB, Opcode, 2, TmpReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, TmpReg), FI);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
addFrameReference(BuildMI(BB, Opcode, 2, DestReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg), FI);
}
} else {
unsigned SrcAddrReg = getReg(I.getOperand(0));
return;
}
// If this load is the only use of the GEP instruction that is its address,
// then we can fold the GEP directly into the load instruction.
// emitGEPOperation with a second to last arg of 'true' will place the
// base register for the GEP into baseReg, and the constant offset from that
// into offset. If the offset fits in 16 bits, we can emit a reg+imm store
// otherwise, we copy the offset into another reg, and use reg+reg addressing.
if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) {
unsigned baseReg = getReg(GEPI);
ConstantSInt *offset;
emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1,
GEPI->op_end(), baseReg, true, &offset);
if (Class != cLong && canUseAsImmediateForOpcode(offset, 0)) {
if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(offset->getValue())
.addReg(baseReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(offset->getValue())
.addReg(baseReg);
}
return;
}
unsigned indexReg = getReg(offset);
if (Class == cLong) {
BuildMI(BB, PPC32::LWZ, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, PPC32::LWZ, 2, DestReg+1).addSImm(4).addReg(SrcAddrReg);
unsigned indexPlus4 = makeAnotherReg(Type::IntTy);
BuildMI(BB, PPC32::ADDI, 2, indexPlus4).addReg(indexReg).addSImm(4);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, IdxOpcode, 2, DestReg+1).addReg(indexPlus4).addReg(baseReg);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, Opcode, 2, TmpReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, Opcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
}
return;
}
// The fallback case, where the load was from a source that could not be
// folded into the load instruction.
unsigned SrcAddrReg = getReg(SourceAddr);
if (Class == cLong) {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, ImmOpcode, 2, DestReg+1).addSImm(4).addReg(SrcAddrReg);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
}
}
/// visitStoreInst - Implement LLVM store instructions
///
void ISel::visitStoreInst(StoreInst &I) {
unsigned ValReg = getReg(I.getOperand(0));
unsigned AddressReg = getReg(I.getOperand(1));
const Type *ValTy = I.getOperand(0)->getType();
unsigned Class = getClassB(ValTy);
// Immediate opcodes, for reg+imm addressing
static const unsigned ImmOpcodes[] = {
PPC32::STB, PPC32::STH, PPC32::STW,
PPC32::STFS, PPC32::STFD, PPC32::STW
};
// Indexed opcodes, for reg+reg addressing
static const unsigned IdxOpcodes[] = {
PPC32::STBX, PPC32::STHX, PPC32::STWX,
PPC32::STFSX, PPC32::STDX, PPC32::STWX
};
Value *SourceAddr = I.getOperand(1);
const Type *ValTy = I.getOperand(0)->getType();
unsigned Class = getClassB(ValTy);
unsigned ImmOpcode = ImmOpcodes[Class];
unsigned IdxOpcode = IdxOpcodes[Class];
unsigned ValReg = getReg(I.getOperand(0));
if (Class == cLong) {
BuildMI(BB, PPC32::STW, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
BuildMI(BB, PPC32::STW, 3).addReg(ValReg+1).addSImm(4).addReg(AddressReg);
// If this store is the only use of the GEP instruction that is its address,
// then we can fold the GEP directly into the store instruction.
// emitGEPOperation with a second to last arg of 'true' will place the
// base register for the GEP into baseReg, and the constant offset from that
// into offset. If the offset fits in 16 bits, we can emit a reg+imm store
// otherwise, we copy the offset into another reg, and use reg+reg addressing.
if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) {
unsigned baseReg = getReg(GEPI);
ConstantSInt *offset;
emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1,
GEPI->op_end(), baseReg, true, &offset);
if (Class != cLong && canUseAsImmediateForOpcode(offset, 0)) {
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(offset->getValue())
.addReg(baseReg);
return;
}
unsigned indexReg = getReg(offset);
if (Class == cLong) {
unsigned indexPlus4 = makeAnotherReg(Type::IntTy);
BuildMI(BB, PPC32::ADDI, 2, indexPlus4).addReg(indexReg).addSImm(4);
BuildMI(BB, IdxOpcode, 3).addReg(ValReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, IdxOpcode, 3).addReg(ValReg+1).addReg(indexPlus4)
.addReg(baseReg);
return;
}
BuildMI(BB, IdxOpcode, 3).addReg(ValReg).addReg(indexReg).addReg(baseReg);
return;
}
static const unsigned Opcodes[] = {
PPC32::STB, PPC32::STH, PPC32::STW, PPC32::STFS
};
unsigned Opcode = Opcodes[Class];
if (ValTy == Type::DoubleTy) Opcode = PPC32::STFD;
BuildMI(BB, Opcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
// If the store address wasn't the only use of a GEP, we fall back to the
// standard path: store the ValReg at the value in AddressReg.
unsigned AddressReg = getReg(I.getOperand(1));
if (Class == cLong) {
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
BuildMI(BB, ImmOpcode, 3).addReg(ValReg+1).addSImm(4).addReg(AddressReg);
return;
}
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
}
@ -2417,10 +2597,6 @@ void ISel::visitCastInst(CastInst &CI) {
unsigned SrcClass = getClassB(Op->getType());
unsigned DestClass = getClassB(CI.getType());
// Noop casts are not emitted: getReg will return the source operand as the
// register to use for any uses of the noop cast.
if (DestClass == SrcClass)
return;
// If this is a cast from a 32-bit integer to a Long type, and the only uses
// of the case are GEP instructions, then the cast does not need to be
@ -2484,24 +2660,6 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
return;
}
// Implement casts between values of the same type class (as determined by
// getClass) by using a register-to-register move.
if (SrcClass == DestClass) {
if (SrcClass <= cInt) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (SrcClass == cFP32 || SrcClass == cFP64) {
BuildMI(*MBB, IP, PPC32::FMR, 1, DestReg).addReg(SrcReg);
} else if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
assert(0 && "Cannot handle this type of cast instruction!");
abort();
}
return;
}
// Handle cast of Float -> Double
if (SrcClass == cFP32 && DestClass == cFP64) {
BuildMI(*MBB, IP, PPC32::FMR, 1, DestReg).addReg(SrcReg);
@ -2514,52 +2672,6 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
return;
}
// Handle cast of SMALLER int to LARGER int using a move with sign extension
// or zero extension, depending on whether the source type was signed.
if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
SrcClass < DestClass) {
bool isLong = DestClass == cLong;
if (isLong) {
DestClass = cInt;
++DestReg;
}
bool isUnsigned = DestTy->isUnsigned() || DestTy == Type::BoolTy;
BuildMI(*BB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
if (isLong) { // Handle upper 32 bits as appropriate...
--DestReg;
if (isUnsigned) // Zero out top bits...
BuildMI(*BB, IP, PPC32::LI, 1, DestReg).addSImm(0);
else // Sign extend bottom half...
BuildMI(*BB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
}
return;
}
// Special case long -> int ...
if (SrcClass == cLong && DestClass == cInt) {
BuildMI(*BB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg+1).addReg(SrcReg+1);
return;
}
// Handle cast of LARGER int to SMALLER int with a clear or sign extend
if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt &&
SrcClass > DestClass) {
bool isUnsigned = DestTy->isUnsigned() || DestTy == Type::BoolTy;
unsigned source = (SrcClass == cLong) ? SrcReg+1 : SrcReg;
if (isUnsigned) {
unsigned shift = (DestClass == cByte) ? 24 : 16;
BuildMI(*BB, IP, PPC32::RLWINM, 4, DestReg).addReg(source).addZImm(0)
.addImm(shift).addImm(31);
} else {
BuildMI(*BB, IP, (DestClass == cByte) ? PPC32::EXTSB : PPC32::EXTSH, 1,
DestReg).addReg(source);
}
return;
}
// Handle casts from integer to floating point now...
if (DestClass == cFP32 || DestClass == cFP64) {
@ -2624,7 +2736,7 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
addFrameReference(BuildMI(*BB, IP, PPC32::LFD, 2, ConstF),
ConstantFrameIndex);
addFrameReference(BuildMI(*BB, IP, PPC32::LFD, 2, TempF), ValueFrameIdx);
BuildMI(*BB, IP, PPC32::FSUB, 2, DestReg).addReg(TempF ).addReg(ConstF);
BuildMI(*BB, IP, PPC32::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF);
}
return;
}
@ -2647,23 +2759,26 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
if (DestTy->isSigned()) {
unsigned LoadOp = (DestClass == cShort) ? PPC32::LHA : PPC32::LWZ;
unsigned TempReg = makeAnotherReg(Type::DoubleTy);
// Convert to integer in the FP reg and store it to a stack slot
BuildMI(*BB, IP, PPC32::FCTIWZ, 1, TempReg).addReg(SrcReg);
addFrameReference(BuildMI(*BB, IP, PPC32::STFD, 3)
.addReg(TempReg), ValueFrameIdx);
// There is no load signed byte opcode, so we must emit a sign extend
// There is no load signed byte opcode, so we must emit a sign extend for
// that particular size. Make sure to source the new integer from the
// correct offset.
if (DestClass == cByte) {
unsigned TempReg2 = makeAnotherReg(DestTy);
addFrameReference(BuildMI(*BB, IP, LoadOp, 2, TempReg2),
ValueFrameIdx, 4);
addFrameReference(BuildMI(*BB, IP, PPC32::LBZ, 2, TempReg2),
ValueFrameIdx, 7);
BuildMI(*MBB, IP, PPC32::EXTSB, DestReg).addReg(TempReg2);
} else {
int offset = (DestClass == cShort) ? 6 : 4;
unsigned LoadOp = (DestClass == cShort) ? PPC32::LHA : PPC32::LWZ;
addFrameReference(BuildMI(*BB, IP, LoadOp, 2, DestReg),
ValueFrameIdx, 4);
ValueFrameIdx, offset);
}
} else {
unsigned Zero = getReg(ConstantFP::get(Type::DoubleTy, 0.0f));
@ -2710,29 +2825,228 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
BuildMI(*BB, IP, PPC32::FCTIWZ, 1, ConvReg).addReg(TmpReg2);
addFrameReference(BuildMI(*BB, IP, PPC32::STFD, 3).addReg(ConvReg),
FrameIdx);
addFrameReference(BuildMI(*BB, IP, PPC32::LWZ, 2, IntTmp),
FrameIdx, 4);
BuildMI(*BB, IP, PPC32::BLT, 2).addReg(PPC32::CR0).addMBB(PhiMBB);
BuildMI(*BB, IP, PPC32::B, 1).addMBB(XorMBB);
if (DestClass == cByte) {
addFrameReference(BuildMI(*BB, IP, PPC32::LBZ, 2, DestReg),
FrameIdx, 7);
} else if (DestClass == cShort) {
addFrameReference(BuildMI(*BB, IP, PPC32::LHZ, 2, DestReg),
FrameIdx, 6);
} if (DestClass == cInt) {
addFrameReference(BuildMI(*BB, IP, PPC32::LWZ, 2, IntTmp),
FrameIdx, 4);
BuildMI(*BB, IP, PPC32::BLT, 2).addReg(PPC32::CR0).addMBB(PhiMBB);
BuildMI(*BB, IP, PPC32::B, 1).addMBB(XorMBB);
// XorMBB:
// add 2**31 if input was >= 2**31
BB = XorMBB;
BuildMI(BB, PPC32::XORIS, 2, XorReg).addReg(IntTmp).addImm(0x8000);
BuildMI(BB, PPC32::B, 1).addMBB(PhiMBB);
XorMBB->addSuccessor(PhiMBB);
// XorMBB:
// add 2**31 if input was >= 2**31
BB = XorMBB;
BuildMI(BB, PPC32::XORIS, 2, XorReg).addReg(IntTmp).addImm(0x8000);
XorMBB->addSuccessor(PhiMBB);
// PhiMBB:
// DestReg = phi [ IntTmp, OldMBB ], [ XorReg, XorMBB ]
BB = PhiMBB;
BuildMI(BB, PPC32::PHI, 2, DestReg).addReg(IntTmp).addMBB(OldMBB)
.addReg(XorReg).addMBB(XorMBB);
// PhiMBB:
// DestReg = phi [ IntTmp, OldMBB ], [ XorReg, XorMBB ]
BB = PhiMBB;
BuildMI(BB, PPC32::PHI, 2, DestReg).addReg(IntTmp).addMBB(OldMBB)
.addReg(XorReg).addMBB(XorMBB);
}
}
return;
}
// Check our invariants
assert((SrcClass <= cInt || SrcClass == cLong) &&
"Unhandled source class for cast operation!");
assert((DestClass <= cInt || DestClass == cLong) &&
"Unhandled destination class for cast operation!");
bool sourceUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy;
bool destUnsigned = DestTy->isUnsigned();
// Unsigned -> Unsigned, clear if larger,
if (sourceUnsigned && destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::LI, 1, DestReg).addSImm(0);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle u{ byte, short, int } x u{ byte, short, int }
unsigned clearBits = (SrcClass == cByte || DestClass == cByte) ? 24 : 16;
switch (SrcClass) {
case cByte:
case cShort:
if (SrcClass == DestClass)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cInt)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
}
return;
}
// Signed -> Signed
if (!sourceUnsigned && !destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle { byte, short, int } x { byte, short, int }
switch (SrcClass) {
case cByte:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
break;
case cShort:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
}
return;
}
// Unsigned -> Signed
if (sourceUnsigned && !destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1).
addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::LI, 1, DestReg).addSImm(0);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle u{ byte, short, int } -> { byte, short, int }
switch (SrcClass) {
case cByte:
if (DestClass == cByte)
// uByte 255 -> signed byte == -1
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else
// uByte 255 -> signed short/int == 255
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0)
.addImm(24).addImm(31);
break;
case cShort:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0)
.addImm(16).addImm(31);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
}
return;
}
// Signed -> Unsigned
if (!sourceUnsigned && destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle { byte, short, int } -> u{ byte, short, int }
unsigned clearBits = (DestClass == cByte) ? 24 : 16;
switch (SrcClass) {
case cByte:
case cShort:
if (DestClass == cByte || DestClass == cShort)
// sbyte -1 -> ubyte 0x000000FF
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
else
// sbyte -1 -> ubyte 0xFFFFFFFF
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cInt)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
}
return;
}
// Anything we haven't handled already, we can't (yet) handle at all.
assert(0 && "Unhandled cast instruction!");
std::cerr << "Unhandled cast from " << SrcTy->getDescription()
<< "to " << DestTy->getDescription() << '\n';
abort();
}
@ -2783,6 +3097,9 @@ void ISel::visitVAArgInst(VAArgInst &I) {
BuildMI(BB, PPC32::LWZ, 2, DestReg).addSImm(0).addReg(VAList);
BuildMI(BB, PPC32::LWZ, 2, DestReg+1).addSImm(4).addReg(VAList);
break;
case Type::FloatTyID:
BuildMI(BB, PPC32::LFS, 2, DestReg).addSImm(0).addReg(VAList);
break;
case Type::DoubleTyID:
BuildMI(BB, PPC32::LFD, 2, DestReg).addSImm(0).addReg(VAList);
break;
@ -2792,9 +3109,12 @@ void ISel::visitVAArgInst(VAArgInst &I) {
/// visitGetElementPtrInst - instruction-select GEP instructions
///
void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
if (canFoldGEPIntoLoadOrStore(&I))
return;
unsigned outputReg = getReg(I);
emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(),
outputReg);
outputReg, false, 0);
}
/// emitGEPOperation - Common code shared between visitGetElementPtrInst and
@ -2803,16 +3123,16 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
void ISel::emitGEPOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg) {
User::op_iterator IdxEnd, unsigned TargetReg,
bool GEPIsFolded, ConstantSInt **RemainderPtr) {
const TargetData &TD = TM.getTargetData();
const Type *Ty = Src->getType();
unsigned basePtrReg = getReg(Src, MBB, IP);
int64_t constValue = 0;
bool anyCombined = false;
// Record the operations to emit the GEP in a vector so that we can emit them
// after having analyzed the entire instruction.
std::vector<CollapsedGepOp*> ops;
std::vector<CollapsedGepOp> ops;
// GEPs have zero or more indices; we must perform a struct access
// or array access for each one.
@ -2829,7 +3149,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
unsigned fieldIndex = cast<ConstantUInt>(idx)->getValue();
unsigned memberOffset =
TD.getStructLayout(StTy)->MemberOffsets[fieldIndex];
if (constValue != 0) anyCombined = true;
// StructType member offsets are always constant values. Add it to the
// running total.
@ -2854,8 +3173,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
unsigned elementSize = TD.getTypeSize(Ty);
if (ConstantInt *C = dyn_cast<ConstantInt>(idx)) {
if (constValue != 0) anyCombined = true;
if (ConstantSInt *CS = dyn_cast<ConstantSInt>(C))
constValue += CS->getValue() * elementSize;
else if (ConstantUInt *CU = dyn_cast<ConstantUInt>(C))
@ -2864,48 +3181,40 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
assert(0 && "Invalid ConstantInt GEP index type!");
} else {
// Push current gep state to this point as an add
CollapsedGepOp *addition =
new CollapsedGepOp(false, 0, ConstantSInt::get(Type::IntTy,
constValue));
ops.push_back(addition);
ops.push_back(CollapsedGepOp(false, 0,
ConstantSInt::get(Type::IntTy,constValue)));
// Push multiply gep op and reset constant value
CollapsedGepOp *multiply =
new CollapsedGepOp(true, idx, ConstantSInt::get(Type::IntTy,
elementSize));
ops.push_back(multiply);
ops.push_back(CollapsedGepOp(true, idx,
ConstantSInt::get(Type::IntTy, elementSize)));
constValue = 0;
}
}
}
// Do some statistical accounting
if (ops.empty()) ++GEPConsts;
if (anyCombined) ++GEPSplits;
// Emit instructions for all the collapsed ops
for(std::vector<CollapsedGepOp *>::iterator cgo_i = ops.begin(),
for(std::vector<CollapsedGepOp>::iterator cgo_i = ops.begin(),
cgo_e = ops.end(); cgo_i != cgo_e; ++cgo_i) {
CollapsedGepOp *cgo = *cgo_i;
CollapsedGepOp& cgo = *cgo_i;
unsigned nextBasePtrReg = makeAnotherReg (Type::IntTy);
if (cgo->isMul) {
if (cgo.isMul) {
// We know the elementSize is a constant, so we can emit a constant mul
// and then add it to the current base reg
unsigned TmpReg = makeAnotherReg(Type::IntTy);
doMultiplyConst(MBB, IP, TmpReg, cgo->index, cgo->size);
doMultiplyConst(MBB, IP, TmpReg, cgo.index, cgo.size);
BuildMI(*MBB, IP, PPC32::ADD, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(TmpReg);
} else {
// Try and generate an immediate addition if possible
if (cgo->size->isNullValue()) {
if (cgo.size->isNullValue()) {
BuildMI(*MBB, IP, PPC32::OR, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(basePtrReg);
} else if (canUseAsImmediateForOpcode(cgo->size, 0)) {
} else if (canUseAsImmediateForOpcode(cgo.size, 0)) {
BuildMI(*MBB, IP, PPC32::ADDI, 2, nextBasePtrReg).addReg(basePtrReg)
.addSImm(cgo->size->getValue());
.addSImm(cgo.size->getValue());
} else {
unsigned Op1r = getReg(cgo->size, MBB, IP);
unsigned Op1r = getReg(cgo.size, MBB, IP);
BuildMI(*MBB, IP, PPC32::ADD, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(Op1r);
}
@ -2916,6 +3225,15 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
// Add the current base register plus any accumulated constant value
ConstantSInt *remainder = ConstantSInt::get(Type::IntTy, constValue);
// If we are emitting this during a fold, copy the current base register to
// the target, and save the current constant offset so the folding load or
// store can try and use it as an immediate.
if (GEPIsFolded) {
BuildMI (BB, PPC32::OR, 2, TargetReg).addReg(basePtrReg).addReg(basePtrReg);
*RemainderPtr = remainder;
return;
}
// After we have processed all the indices, the result is left in
// basePtrReg. Move it to the register where we were expected to
// put the answer.

View File

@ -32,8 +32,7 @@
using namespace llvm;
namespace {
Statistic<> GEPConsts("ppc-codegen", "Number of const GEPs");
Statistic<> GEPSplits("ppc-codegen", "Number of partially const GEPs");
Statistic<> GEPFolds("ppc-codegen", "Number of GEPs folded");
/// TypeClass - Used by the PowerPC backend to group LLVM types by their basic
/// PPC Representation.
@ -41,17 +40,6 @@ namespace {
enum TypeClass {
cByte, cShort, cInt, cFP32, cFP64, cLong
};
// This struct is for recording the necessary operations to emit the GEP
typedef struct CollapsedGepOp {
public:
CollapsedGepOp(bool mul, Value *i, ConstantSInt *s) :
isMul(mul), index(i), size(s) {}
bool isMul;
Value *index;
ConstantSInt *size;
} CollapsedGepOp;
}
/// getClass - Turn a primitive type into a "class" number which is based on the
@ -90,7 +78,7 @@ namespace {
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
int VarArgsFrameIndex; // FrameIndex for start of varargs area
std::map<Value*, unsigned> RegMap; // Mapping between Values and SSA Regs
// External functions used in the Module
@ -105,6 +93,11 @@ namespace {
// FrameIndex for the alloca.
std::map<AllocaInst*, unsigned> AllocaMap;
// A Reg to hold the base address used for global loads and stores, and a
// flag to set whether or not we need to emit it for this function.
unsigned GlobalBaseReg;
bool GlobalBaseInitialized;
ISel(TargetMachine &tm) : TM(reinterpret_cast<PowerPCTargetMachine&>(tm)),
F(0), BB(0) {}
@ -158,6 +151,9 @@ namespace {
BB = &F->front();
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseInitialized = false;
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
@ -219,6 +215,16 @@ namespace {
ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
};
// This struct is for recording the necessary operations to emit the GEP
struct CollapsedGepOp {
bool isMul;
Value *index;
ConstantSInt *size;
CollapsedGepOp(bool mul, Value *i, ConstantSInt *s) :
isMul(mul), index(i), size(s) {}
};
void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
const std::vector<ValueRecord> &Args, bool isVarArg);
void visitCallInst(CallInst &I);
@ -276,7 +282,8 @@ namespace {
///
void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg);
User::op_iterator IdxEnd, unsigned TargetReg,
bool CollapseRemainder, ConstantSInt **Remainder);
/// emitCastOperation - Common code shared between visitCastInst and
/// constant expression cast support.
@ -335,11 +342,19 @@ namespace {
/// emitSelectOperation - Common code shared between visitSelectInst and the
/// constant expression support.
///
void emitSelectOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Cond, Value *TrueVal, Value *FalseVal,
unsigned DestReg);
/// copyGlobalBaseToRegister - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
void ISel::copyGlobalBaseToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned R);
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
@ -417,10 +432,6 @@ unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB,
unsigned Reg = makeAnotherReg(V->getType());
copyConstantToRegister(MBB, IPt, C, Reg);
return Reg;
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
// Do not emit noop casts at all.
if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()))
return getReg(CI->getOperand(0), MBB, IPt);
} else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
unsigned Reg = makeAnotherReg(V->getType());
unsigned FI = getFixedSizedAllocaFI(AI);
@ -500,6 +511,26 @@ unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
}
/// copyGlobalBaseToRegister - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
void ISel::copyGlobalBaseToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned R) {
if (!GlobalBaseInitialized) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = F->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
GlobalBaseReg = makeAnotherReg(Type::IntTy);
BuildMI(FirstMBB, MBBI, PPC32::MovePCtoLR, 0, GlobalBaseReg);
GlobalBaseInitialized = true;
}
// Emit our copy of GlobalBaseReg to the destination register in the
// current MBB
BuildMI(*MBB, IP, PPC32::OR, 2, R).addReg(GlobalBaseReg)
.addReg(GlobalBaseReg);
}
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
@ -567,14 +598,13 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
// Load addr of constant to reg; constant is located at PC + distance
unsigned CurPC = makeAnotherReg(Type::IntTy);
// Load addr of constant to reg; constant is located at base + distance
unsigned GlobalBase = makeAnotherReg(Type::IntTy);
unsigned Reg1 = makeAnotherReg(Type::IntTy);
unsigned Reg2 = makeAnotherReg(Type::IntTy);
// Move PC to destination reg
BuildMI(*MBB, IP, PPC32::MovePCtoLR, 0, CurPC);
// Move value at PC + distance into return reg
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, Reg1).addReg(CurPC)
// Move value at base + distance into return reg
copyGlobalBaseToRegister(MBB, IP, GlobalBase);
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, Reg1).addReg(GlobalBase)
.addConstantPoolIndex(CPI);
BuildMI(*MBB, IP, PPC32::LOADLoDirect, 2, Reg2).addReg(Reg1)
.addConstantPoolIndex(CPI);
@ -585,16 +615,15 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
// Copy zero (null pointer) to the register.
BuildMI(*MBB, IP, PPC32::LI, 1, R).addSImm(0);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
// GV is located at PC + distance
unsigned CurPC = makeAnotherReg(Type::IntTy);
// GV is located at base + distance
unsigned GlobalBase = makeAnotherReg(Type::IntTy);
unsigned TmpReg = makeAnotherReg(GV->getType());
unsigned Opcode = (GV->hasWeakLinkage() || GV->isExternal()) ?
PPC32::LOADLoIndirect : PPC32::LOADLoDirect;
// Move PC to destination reg
BuildMI(*MBB, IP, PPC32::MovePCtoLR, 0, CurPC);
// Move value at PC + distance into return reg
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, TmpReg).addReg(CurPC)
// Move value at base + distance into return reg
copyGlobalBaseToRegister(MBB, IP, GlobalBase);
BuildMI(*MBB, IP, PPC32::LOADHiAddr, 2, TmpReg).addReg(GlobalBase)
.addGlobalAddress(GV);
BuildMI(*MBB, IP, Opcode, 2, R).addReg(TmpReg).addGlobalAddress(GV);
@ -740,7 +769,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
// the start of the first vararg value... this is used to expand
// llvm.va_start.
if (Fn.getFunctionType()->isVarArg())
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
}
@ -861,6 +890,32 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
return 0;
}
// canFoldGEPIntoLoadOrStore - Return the GEP instruction if we can fold it into
// the load or store instruction that is the only user of the GEP.
//
static GetElementPtrInst *canFoldGEPIntoLoadOrStore(Value *V) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V))
if (GEPI->hasOneUse()) {
Instruction *User = cast<Instruction>(GEPI->use_back());
if (isa<StoreInst>(User) &&
GEPI->getParent() == User->getParent() &&
User->getOperand(0) != GEPI &&
User->getOperand(1) == GEPI) {
++GEPFolds;
return GEPI;
}
if (isa<LoadInst>(User) &&
GEPI->getParent() == User->getParent() &&
User->getOperand(0) == GEPI) {
++GEPFolds;
return GEPI;
}
}
return 0;
}
// Return a fixed numbering for setcc instructions which does not depend on the
// order of the opcodes.
//
@ -917,6 +972,26 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
unsigned Class = getClassB(CompTy);
unsigned Op0r = getReg(Op0, MBB, IP);
// Before we do a comparison, we have to make sure that we're truncating our
// registers appropriately.
if (Class == cByte) {
unsigned TmpReg = makeAnotherReg(CompTy);
if (CompTy->isSigned())
BuildMI(*MBB, IP, PPC32::EXTSB, 1, TmpReg).addReg(Op0r);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0)
.addImm(24).addImm(31);
Op0r = TmpReg;
} else if (Class == cShort) {
unsigned TmpReg = makeAnotherReg(CompTy);
if (CompTy->isSigned())
BuildMI(*MBB, IP, PPC32::EXTSH, 1, TmpReg).addReg(Op0r);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0)
.addImm(16).addImm(31);
Op0r = TmpReg;
}
// Use crand for lt, gt and crandc for le, ge
unsigned CROpcode = (OpNum == 2 || OpNum == 4) ? PPC32::CRAND : PPC32::CRANDC;
// ? cr1[lt] : cr1[gt]
@ -1387,7 +1462,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
BuildMI(BB, PPC32::OR, 2, GPR[GPR_idx]).addReg(ArgReg)
.addReg(ArgReg);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 0 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
}
@ -1400,7 +1476,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
BuildMI(BB, PPC32::OR, 2, GPR[GPR_idx]).addReg(ArgReg)
.addReg(ArgReg);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 0 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
}
@ -1417,7 +1494,8 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
.addReg(ArgReg+1);
CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
CallMI->addRegOperand(GPR[GPR_idx+1], MachineOperand::Use);
} else {
}
if (GPR_remaining <= 1 || isVarArg) {
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg).addSImm(ArgOffset)
.addReg(PPC32::R1);
BuildMI(BB, PPC32::STW, 3).addReg(ArgReg+1).addSImm(ArgOffset+4)
@ -1599,8 +1677,10 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
case Intrinsic::vaend:
case Intrinsic::returnaddress:
case Intrinsic::frameaddress:
// FIXME: should lower this ourselves
// FIXME: should lower these ourselves
// case Intrinsic::isunordered:
// case Intrinsic::memcpy: -> doCall(). system memcpy almost
// guaranteed to be faster than anything we generate ourselves
// We directly implement these intrinsics
break;
case Intrinsic::readio: {
@ -1674,7 +1754,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
BuildMI(BB, PPC32::LI, 1, TmpReg1).addSImm(0);
}
return;
#if 0
// This may be useful for supporting isunordered
case Intrinsic::isnan:
@ -2289,7 +2369,7 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
if (isSigned) {
// FIXME: Unimplemented
// Page C-3 of the PowerPC 32bit Programming Environments Manual
std::cerr << "ERROR: Unimplemented: signed right shift\n";
std::cerr << "ERROR: Unimplemented: signed right shift of long\n";
abort();
} else {
BuildMI(*MBB, IP, PPC32::SUBFIC, 2, TmpReg1).addReg(ShiftAmountReg)
@ -2344,68 +2424,168 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
}
/// visitLoadInst - Implement LLVM load instructions
/// visitLoadInst - Implement LLVM load instructions. Pretty straightforward
/// mapping of LLVM classes to PPC load instructions, with the exception of
/// signed byte loads, which need a sign extension following them.
///
void ISel::visitLoadInst(LoadInst &I) {
static const unsigned Opcodes[] = {
PPC32::LBZ, PPC32::LHZ, PPC32::LWZ, PPC32::LFS
// Immediate opcodes, for reg+imm addressing
static const unsigned ImmOpcodes[] = {
PPC32::LBZ, PPC32::LHZ, PPC32::LWZ,
PPC32::LFS, PPC32::LFD, PPC32::LWZ
};
// Indexed opcodes, for reg+reg addressing
static const unsigned IdxOpcodes[] = {
PPC32::LBZX, PPC32::LHZX, PPC32::LWZX,
PPC32::LFSX, PPC32::LFDX, PPC32::LWZX
};
unsigned Class = getClassB(I.getType());
unsigned Opcode = Opcodes[Class];
if (I.getType() == Type::DoubleTy) Opcode = PPC32::LFD;
if (Class == cShort && I.getType()->isSigned()) Opcode = PPC32::LHA;
unsigned DestReg = getReg(I);
unsigned Class = getClassB(I.getType());
unsigned ImmOpcode = ImmOpcodes[Class];
unsigned IdxOpcode = IdxOpcodes[Class];
unsigned DestReg = getReg(I);
Value *SourceAddr = I.getOperand(0);
if (Class == cShort && I.getType()->isSigned()) ImmOpcode = PPC32::LHA;
if (Class == cShort && I.getType()->isSigned()) IdxOpcode = PPC32::LHAX;
if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
if (AllocaInst *AI = dyn_castFixedAlloca(SourceAddr)) {
unsigned FI = getFixedSizedAllocaFI(AI);
if (Class == cLong) {
addFrameReference(BuildMI(BB, PPC32::LWZ, 2, DestReg), FI);
addFrameReference(BuildMI(BB, PPC32::LWZ, 2, DestReg+1), FI, 4);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg+1), FI, 4);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
addFrameReference(BuildMI(BB, Opcode, 2, TmpReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, TmpReg), FI);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
addFrameReference(BuildMI(BB, Opcode, 2, DestReg), FI);
addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg), FI);
}
} else {
unsigned SrcAddrReg = getReg(I.getOperand(0));
return;
}
// If this load is the only use of the GEP instruction that is its address,
// then we can fold the GEP directly into the load instruction.
// emitGEPOperation with a second to last arg of 'true' will place the
// base register for the GEP into baseReg, and the constant offset from that
// into offset. If the offset fits in 16 bits, we can emit a reg+imm store
// otherwise, we copy the offset into another reg, and use reg+reg addressing.
if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) {
unsigned baseReg = getReg(GEPI);
ConstantSInt *offset;
emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1,
GEPI->op_end(), baseReg, true, &offset);
if (Class != cLong && canUseAsImmediateForOpcode(offset, 0)) {
if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(offset->getValue())
.addReg(baseReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(offset->getValue())
.addReg(baseReg);
}
return;
}
unsigned indexReg = getReg(offset);
if (Class == cLong) {
BuildMI(BB, PPC32::LWZ, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, PPC32::LWZ, 2, DestReg+1).addSImm(4).addReg(SrcAddrReg);
unsigned indexPlus4 = makeAnotherReg(Type::IntTy);
BuildMI(BB, PPC32::ADDI, 2, indexPlus4).addReg(indexReg).addSImm(4);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, IdxOpcode, 2, DestReg+1).addReg(indexPlus4).addReg(baseReg);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, Opcode, 2, TmpReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, Opcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg);
}
return;
}
// The fallback case, where the load was from a source that could not be
// folded into the load instruction.
unsigned SrcAddrReg = getReg(SourceAddr);
if (Class == cLong) {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, ImmOpcode, 2, DestReg+1).addSImm(4).addReg(SrcAddrReg);
} else if (Class == cByte && I.getType()->isSigned()) {
unsigned TmpReg = makeAnotherReg(I.getType());
BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(0).addReg(SrcAddrReg);
BuildMI(BB, PPC32::EXTSB, 1, DestReg).addReg(TmpReg);
} else {
BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg);
}
}
/// visitStoreInst - Implement LLVM store instructions
///
void ISel::visitStoreInst(StoreInst &I) {
unsigned ValReg = getReg(I.getOperand(0));
unsigned AddressReg = getReg(I.getOperand(1));
const Type *ValTy = I.getOperand(0)->getType();
unsigned Class = getClassB(ValTy);
// Immediate opcodes, for reg+imm addressing
static const unsigned ImmOpcodes[] = {
PPC32::STB, PPC32::STH, PPC32::STW,
PPC32::STFS, PPC32::STFD, PPC32::STW
};
// Indexed opcodes, for reg+reg addressing
static const unsigned IdxOpcodes[] = {
PPC32::STBX, PPC32::STHX, PPC32::STWX,
PPC32::STFSX, PPC32::STDX, PPC32::STWX
};
Value *SourceAddr = I.getOperand(1);
const Type *ValTy = I.getOperand(0)->getType();
unsigned Class = getClassB(ValTy);
unsigned ImmOpcode = ImmOpcodes[Class];
unsigned IdxOpcode = IdxOpcodes[Class];
unsigned ValReg = getReg(I.getOperand(0));
if (Class == cLong) {
BuildMI(BB, PPC32::STW, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
BuildMI(BB, PPC32::STW, 3).addReg(ValReg+1).addSImm(4).addReg(AddressReg);
// If this store is the only use of the GEP instruction that is its address,
// then we can fold the GEP directly into the store instruction.
// emitGEPOperation with a second to last arg of 'true' will place the
// base register for the GEP into baseReg, and the constant offset from that
// into offset. If the offset fits in 16 bits, we can emit a reg+imm store
// otherwise, we copy the offset into another reg, and use reg+reg addressing.
if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) {
unsigned baseReg = getReg(GEPI);
ConstantSInt *offset;
emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1,
GEPI->op_end(), baseReg, true, &offset);
if (Class != cLong && canUseAsImmediateForOpcode(offset, 0)) {
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(offset->getValue())
.addReg(baseReg);
return;
}
unsigned indexReg = getReg(offset);
if (Class == cLong) {
unsigned indexPlus4 = makeAnotherReg(Type::IntTy);
BuildMI(BB, PPC32::ADDI, 2, indexPlus4).addReg(indexReg).addSImm(4);
BuildMI(BB, IdxOpcode, 3).addReg(ValReg).addReg(indexReg).addReg(baseReg);
BuildMI(BB, IdxOpcode, 3).addReg(ValReg+1).addReg(indexPlus4)
.addReg(baseReg);
return;
}
BuildMI(BB, IdxOpcode, 3).addReg(ValReg).addReg(indexReg).addReg(baseReg);
return;
}
static const unsigned Opcodes[] = {
PPC32::STB, PPC32::STH, PPC32::STW, PPC32::STFS
};
unsigned Opcode = Opcodes[Class];
if (ValTy == Type::DoubleTy) Opcode = PPC32::STFD;
BuildMI(BB, Opcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
// If the store address wasn't the only use of a GEP, we fall back to the
// standard path: store the ValReg at the value in AddressReg.
unsigned AddressReg = getReg(I.getOperand(1));
if (Class == cLong) {
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
BuildMI(BB, ImmOpcode, 3).addReg(ValReg+1).addSImm(4).addReg(AddressReg);
return;
}
BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg);
}
@ -2417,10 +2597,6 @@ void ISel::visitCastInst(CastInst &CI) {
unsigned SrcClass = getClassB(Op->getType());
unsigned DestClass = getClassB(CI.getType());
// Noop casts are not emitted: getReg will return the source operand as the
// register to use for any uses of the noop cast.
if (DestClass == SrcClass)
return;
// If this is a cast from a 32-bit integer to a Long type, and the only uses
// of the case are GEP instructions, then the cast does not need to be
@ -2484,24 +2660,6 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
return;
}
// Implement casts between values of the same type class (as determined by
// getClass) by using a register-to-register move.
if (SrcClass == DestClass) {
if (SrcClass <= cInt) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (SrcClass == cFP32 || SrcClass == cFP64) {
BuildMI(*MBB, IP, PPC32::FMR, 1, DestReg).addReg(SrcReg);
} else if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
assert(0 && "Cannot handle this type of cast instruction!");
abort();
}
return;
}
// Handle cast of Float -> Double
if (SrcClass == cFP32 && DestClass == cFP64) {
BuildMI(*MBB, IP, PPC32::FMR, 1, DestReg).addReg(SrcReg);
@ -2514,52 +2672,6 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
return;
}
// Handle cast of SMALLER int to LARGER int using a move with sign extension
// or zero extension, depending on whether the source type was signed.
if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
SrcClass < DestClass) {
bool isLong = DestClass == cLong;
if (isLong) {
DestClass = cInt;
++DestReg;
}
bool isUnsigned = DestTy->isUnsigned() || DestTy == Type::BoolTy;
BuildMI(*BB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
if (isLong) { // Handle upper 32 bits as appropriate...
--DestReg;
if (isUnsigned) // Zero out top bits...
BuildMI(*BB, IP, PPC32::LI, 1, DestReg).addSImm(0);
else // Sign extend bottom half...
BuildMI(*BB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
}
return;
}
// Special case long -> int ...
if (SrcClass == cLong && DestClass == cInt) {
BuildMI(*BB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg+1).addReg(SrcReg+1);
return;
}
// Handle cast of LARGER int to SMALLER int with a clear or sign extend
if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt &&
SrcClass > DestClass) {
bool isUnsigned = DestTy->isUnsigned() || DestTy == Type::BoolTy;
unsigned source = (SrcClass == cLong) ? SrcReg+1 : SrcReg;
if (isUnsigned) {
unsigned shift = (DestClass == cByte) ? 24 : 16;
BuildMI(*BB, IP, PPC32::RLWINM, 4, DestReg).addReg(source).addZImm(0)
.addImm(shift).addImm(31);
} else {
BuildMI(*BB, IP, (DestClass == cByte) ? PPC32::EXTSB : PPC32::EXTSH, 1,
DestReg).addReg(source);
}
return;
}
// Handle casts from integer to floating point now...
if (DestClass == cFP32 || DestClass == cFP64) {
@ -2624,7 +2736,7 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
addFrameReference(BuildMI(*BB, IP, PPC32::LFD, 2, ConstF),
ConstantFrameIndex);
addFrameReference(BuildMI(*BB, IP, PPC32::LFD, 2, TempF), ValueFrameIdx);
BuildMI(*BB, IP, PPC32::FSUB, 2, DestReg).addReg(TempF ).addReg(ConstF);
BuildMI(*BB, IP, PPC32::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF);
}
return;
}
@ -2647,23 +2759,26 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
if (DestTy->isSigned()) {
unsigned LoadOp = (DestClass == cShort) ? PPC32::LHA : PPC32::LWZ;
unsigned TempReg = makeAnotherReg(Type::DoubleTy);
// Convert to integer in the FP reg and store it to a stack slot
BuildMI(*BB, IP, PPC32::FCTIWZ, 1, TempReg).addReg(SrcReg);
addFrameReference(BuildMI(*BB, IP, PPC32::STFD, 3)
.addReg(TempReg), ValueFrameIdx);
// There is no load signed byte opcode, so we must emit a sign extend
// There is no load signed byte opcode, so we must emit a sign extend for
// that particular size. Make sure to source the new integer from the
// correct offset.
if (DestClass == cByte) {
unsigned TempReg2 = makeAnotherReg(DestTy);
addFrameReference(BuildMI(*BB, IP, LoadOp, 2, TempReg2),
ValueFrameIdx, 4);
addFrameReference(BuildMI(*BB, IP, PPC32::LBZ, 2, TempReg2),
ValueFrameIdx, 7);
BuildMI(*MBB, IP, PPC32::EXTSB, DestReg).addReg(TempReg2);
} else {
int offset = (DestClass == cShort) ? 6 : 4;
unsigned LoadOp = (DestClass == cShort) ? PPC32::LHA : PPC32::LWZ;
addFrameReference(BuildMI(*BB, IP, LoadOp, 2, DestReg),
ValueFrameIdx, 4);
ValueFrameIdx, offset);
}
} else {
unsigned Zero = getReg(ConstantFP::get(Type::DoubleTy, 0.0f));
@ -2710,29 +2825,228 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB,
BuildMI(*BB, IP, PPC32::FCTIWZ, 1, ConvReg).addReg(TmpReg2);
addFrameReference(BuildMI(*BB, IP, PPC32::STFD, 3).addReg(ConvReg),
FrameIdx);
addFrameReference(BuildMI(*BB, IP, PPC32::LWZ, 2, IntTmp),
FrameIdx, 4);
BuildMI(*BB, IP, PPC32::BLT, 2).addReg(PPC32::CR0).addMBB(PhiMBB);
BuildMI(*BB, IP, PPC32::B, 1).addMBB(XorMBB);
if (DestClass == cByte) {
addFrameReference(BuildMI(*BB, IP, PPC32::LBZ, 2, DestReg),
FrameIdx, 7);
} else if (DestClass == cShort) {
addFrameReference(BuildMI(*BB, IP, PPC32::LHZ, 2, DestReg),
FrameIdx, 6);
} if (DestClass == cInt) {
addFrameReference(BuildMI(*BB, IP, PPC32::LWZ, 2, IntTmp),
FrameIdx, 4);
BuildMI(*BB, IP, PPC32::BLT, 2).addReg(PPC32::CR0).addMBB(PhiMBB);
BuildMI(*BB, IP, PPC32::B, 1).addMBB(XorMBB);
// XorMBB:
// add 2**31 if input was >= 2**31
BB = XorMBB;
BuildMI(BB, PPC32::XORIS, 2, XorReg).addReg(IntTmp).addImm(0x8000);
BuildMI(BB, PPC32::B, 1).addMBB(PhiMBB);
XorMBB->addSuccessor(PhiMBB);
// XorMBB:
// add 2**31 if input was >= 2**31
BB = XorMBB;
BuildMI(BB, PPC32::XORIS, 2, XorReg).addReg(IntTmp).addImm(0x8000);
XorMBB->addSuccessor(PhiMBB);
// PhiMBB:
// DestReg = phi [ IntTmp, OldMBB ], [ XorReg, XorMBB ]
BB = PhiMBB;
BuildMI(BB, PPC32::PHI, 2, DestReg).addReg(IntTmp).addMBB(OldMBB)
.addReg(XorReg).addMBB(XorMBB);
// PhiMBB:
// DestReg = phi [ IntTmp, OldMBB ], [ XorReg, XorMBB ]
BB = PhiMBB;
BuildMI(BB, PPC32::PHI, 2, DestReg).addReg(IntTmp).addMBB(OldMBB)
.addReg(XorReg).addMBB(XorMBB);
}
}
return;
}
// Check our invariants
assert((SrcClass <= cInt || SrcClass == cLong) &&
"Unhandled source class for cast operation!");
assert((DestClass <= cInt || DestClass == cLong) &&
"Unhandled destination class for cast operation!");
bool sourceUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy;
bool destUnsigned = DestTy->isUnsigned();
// Unsigned -> Unsigned, clear if larger,
if (sourceUnsigned && destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::LI, 1, DestReg).addSImm(0);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle u{ byte, short, int } x u{ byte, short, int }
unsigned clearBits = (SrcClass == cByte || DestClass == cByte) ? 24 : 16;
switch (SrcClass) {
case cByte:
case cShort:
if (SrcClass == DestClass)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cInt)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
}
return;
}
// Signed -> Signed
if (!sourceUnsigned && !destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle { byte, short, int } x { byte, short, int }
switch (SrcClass) {
case cByte:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
break;
case cShort:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
}
return;
}
// Unsigned -> Signed
if (sourceUnsigned && !destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1).
addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::LI, 1, DestReg).addSImm(0);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle u{ byte, short, int } -> { byte, short, int }
switch (SrcClass) {
case cByte:
if (DestClass == cByte)
// uByte 255 -> signed byte == -1
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else
// uByte 255 -> signed short/int == 255
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0)
.addImm(24).addImm(31);
break;
case cShort:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0)
.addImm(16).addImm(31);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cByte)
BuildMI(*MBB, IP, PPC32::EXTSB, 1, DestReg).addReg(SrcReg);
else if (DestClass == cShort)
BuildMI(*MBB, IP, PPC32::EXTSH, 1, DestReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
}
return;
}
// Signed -> Unsigned
if (!sourceUnsigned && destUnsigned) {
// handle long dest class now to keep switch clean
if (DestClass == cLong) {
if (SrcClass == cLong) {
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg+1)
.addReg(SrcReg+1);
} else {
BuildMI(*MBB, IP, PPC32::SRAWI, 2, DestReg).addReg(SrcReg).addImm(31);
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg+1).addReg(SrcReg)
.addReg(SrcReg);
}
return;
}
// handle { byte, short, int } -> u{ byte, short, int }
unsigned clearBits = (DestClass == cByte) ? 24 : 16;
switch (SrcClass) {
case cByte:
case cShort:
if (DestClass == cByte || DestClass == cShort)
// sbyte -1 -> ubyte 0x000000FF
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
else
// sbyte -1 -> ubyte 0xFFFFFFFF
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
break;
case cLong:
++SrcReg;
// Fall through
case cInt:
if (DestClass == cInt)
BuildMI(*MBB, IP, PPC32::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg);
else
BuildMI(*MBB, IP, PPC32::RLWINM, 4, DestReg).addReg(SrcReg)
.addImm(0).addImm(clearBits).addImm(31);
break;
}
return;
}
// Anything we haven't handled already, we can't (yet) handle at all.
assert(0 && "Unhandled cast instruction!");
std::cerr << "Unhandled cast from " << SrcTy->getDescription()
<< "to " << DestTy->getDescription() << '\n';
abort();
}
@ -2783,6 +3097,9 @@ void ISel::visitVAArgInst(VAArgInst &I) {
BuildMI(BB, PPC32::LWZ, 2, DestReg).addSImm(0).addReg(VAList);
BuildMI(BB, PPC32::LWZ, 2, DestReg+1).addSImm(4).addReg(VAList);
break;
case Type::FloatTyID:
BuildMI(BB, PPC32::LFS, 2, DestReg).addSImm(0).addReg(VAList);
break;
case Type::DoubleTyID:
BuildMI(BB, PPC32::LFD, 2, DestReg).addSImm(0).addReg(VAList);
break;
@ -2792,9 +3109,12 @@ void ISel::visitVAArgInst(VAArgInst &I) {
/// visitGetElementPtrInst - instruction-select GEP instructions
///
void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
if (canFoldGEPIntoLoadOrStore(&I))
return;
unsigned outputReg = getReg(I);
emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(),
outputReg);
outputReg, false, 0);
}
/// emitGEPOperation - Common code shared between visitGetElementPtrInst and
@ -2803,16 +3123,16 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
void ISel::emitGEPOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg) {
User::op_iterator IdxEnd, unsigned TargetReg,
bool GEPIsFolded, ConstantSInt **RemainderPtr) {
const TargetData &TD = TM.getTargetData();
const Type *Ty = Src->getType();
unsigned basePtrReg = getReg(Src, MBB, IP);
int64_t constValue = 0;
bool anyCombined = false;
// Record the operations to emit the GEP in a vector so that we can emit them
// after having analyzed the entire instruction.
std::vector<CollapsedGepOp*> ops;
std::vector<CollapsedGepOp> ops;
// GEPs have zero or more indices; we must perform a struct access
// or array access for each one.
@ -2829,7 +3149,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
unsigned fieldIndex = cast<ConstantUInt>(idx)->getValue();
unsigned memberOffset =
TD.getStructLayout(StTy)->MemberOffsets[fieldIndex];
if (constValue != 0) anyCombined = true;
// StructType member offsets are always constant values. Add it to the
// running total.
@ -2854,8 +3173,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
unsigned elementSize = TD.getTypeSize(Ty);
if (ConstantInt *C = dyn_cast<ConstantInt>(idx)) {
if (constValue != 0) anyCombined = true;
if (ConstantSInt *CS = dyn_cast<ConstantSInt>(C))
constValue += CS->getValue() * elementSize;
else if (ConstantUInt *CU = dyn_cast<ConstantUInt>(C))
@ -2864,48 +3181,40 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
assert(0 && "Invalid ConstantInt GEP index type!");
} else {
// Push current gep state to this point as an add
CollapsedGepOp *addition =
new CollapsedGepOp(false, 0, ConstantSInt::get(Type::IntTy,
constValue));
ops.push_back(addition);
ops.push_back(CollapsedGepOp(false, 0,
ConstantSInt::get(Type::IntTy,constValue)));
// Push multiply gep op and reset constant value
CollapsedGepOp *multiply =
new CollapsedGepOp(true, idx, ConstantSInt::get(Type::IntTy,
elementSize));
ops.push_back(multiply);
ops.push_back(CollapsedGepOp(true, idx,
ConstantSInt::get(Type::IntTy, elementSize)));
constValue = 0;
}
}
}
// Do some statistical accounting
if (ops.empty()) ++GEPConsts;
if (anyCombined) ++GEPSplits;
// Emit instructions for all the collapsed ops
for(std::vector<CollapsedGepOp *>::iterator cgo_i = ops.begin(),
for(std::vector<CollapsedGepOp>::iterator cgo_i = ops.begin(),
cgo_e = ops.end(); cgo_i != cgo_e; ++cgo_i) {
CollapsedGepOp *cgo = *cgo_i;
CollapsedGepOp& cgo = *cgo_i;
unsigned nextBasePtrReg = makeAnotherReg (Type::IntTy);
if (cgo->isMul) {
if (cgo.isMul) {
// We know the elementSize is a constant, so we can emit a constant mul
// and then add it to the current base reg
unsigned TmpReg = makeAnotherReg(Type::IntTy);
doMultiplyConst(MBB, IP, TmpReg, cgo->index, cgo->size);
doMultiplyConst(MBB, IP, TmpReg, cgo.index, cgo.size);
BuildMI(*MBB, IP, PPC32::ADD, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(TmpReg);
} else {
// Try and generate an immediate addition if possible
if (cgo->size->isNullValue()) {
if (cgo.size->isNullValue()) {
BuildMI(*MBB, IP, PPC32::OR, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(basePtrReg);
} else if (canUseAsImmediateForOpcode(cgo->size, 0)) {
} else if (canUseAsImmediateForOpcode(cgo.size, 0)) {
BuildMI(*MBB, IP, PPC32::ADDI, 2, nextBasePtrReg).addReg(basePtrReg)
.addSImm(cgo->size->getValue());
.addSImm(cgo.size->getValue());
} else {
unsigned Op1r = getReg(cgo->size, MBB, IP);
unsigned Op1r = getReg(cgo.size, MBB, IP);
BuildMI(*MBB, IP, PPC32::ADD, 2, nextBasePtrReg).addReg(basePtrReg)
.addReg(Op1r);
}
@ -2916,6 +3225,15 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
// Add the current base register plus any accumulated constant value
ConstantSInt *remainder = ConstantSInt::get(Type::IntTy, constValue);
// If we are emitting this during a fold, copy the current base register to
// the target, and save the current constant offset so the folding load or
// store can try and use it as an immediate.
if (GEPIsFolded) {
BuildMI (BB, PPC32::OR, 2, TargetReg).addReg(basePtrReg).addReg(basePtrReg);
*RemainderPtr = remainder;
return;
}
// After we have processed all the indices, the result is left in
// basePtrReg. Move it to the register where we were expected to
// put the answer.