mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-01 16:33:37 +01:00
b33129ebad
Add the predicate operand to the instructions. Update the back end accordingly where the instructions are used. Restrict the SP operands to actually only be SP, as otherwise these break assembly parsing for the normal instruction variants. llvm-svn: 138445
891 lines
31 KiB
C++
891 lines
31 KiB
C++
//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "t2-reduce-size"
|
|
#include "ARM.h"
|
|
#include "ARMBaseRegisterInfo.h"
|
|
#include "ARMBaseInstrInfo.h"
|
|
#include "ARMSubtarget.h"
|
|
#include "Thumb2InstrInfo.h"
|
|
#include "MCTargetDesc/ARMAddressingModes.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
using namespace llvm;
|
|
|
|
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
|
|
STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
|
|
STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
|
|
|
|
static cl::opt<int> ReduceLimit("t2-reduce-limit",
|
|
cl::init(-1), cl::Hidden);
|
|
static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
|
|
cl::init(-1), cl::Hidden);
|
|
static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
|
|
cl::init(-1), cl::Hidden);
|
|
|
|
namespace {
|
|
/// ReduceTable - A static table with information on mapping from wide
|
|
/// opcodes to narrow
|
|
struct ReduceEntry {
|
|
unsigned WideOpc; // Wide opcode
|
|
unsigned NarrowOpc1; // Narrow opcode to transform to
|
|
unsigned NarrowOpc2; // Narrow opcode when it's two-address
|
|
uint8_t Imm1Limit; // Limit of immediate field (bits)
|
|
uint8_t Imm2Limit; // Limit of immediate field when it's two-address
|
|
unsigned LowRegs1 : 1; // Only possible if low-registers are used
|
|
unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
|
|
unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
|
|
// 1 - No cc field.
|
|
// 2 - Always set CPSR.
|
|
unsigned PredCC2 : 2;
|
|
unsigned PartFlag : 1; // 16-bit instruction does partial flag update
|
|
unsigned Special : 1; // Needs to be dealt with specially
|
|
};
|
|
|
|
static const ReduceEntry ReduceTable[] = {
|
|
// Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
|
|
{ ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
|
|
{ ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
|
|
{ ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
|
|
{ ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
|
|
{ ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
|
|
{ ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
|
|
{ ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
|
|
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
|
|
//{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
|
|
{ ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
|
|
{ ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
|
|
{ ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
|
|
// FIXME: adr.n immediate offset must be multiple of 4.
|
|
//{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
|
|
{ ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
|
|
{ ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
|
|
{ ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
|
|
// FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
|
|
// likely to cause issue in the loop. As a size / performance workaround,
|
|
// they are not marked as such.
|
|
{ ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
|
|
{ ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
|
|
// FIXME: Do we need the 16-bit 'S' variant?
|
|
{ ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
|
|
{ ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
|
|
{ ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
|
|
{ ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
|
|
{ ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
|
|
{ ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
|
|
{ ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
|
|
{ ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
|
|
{ ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
|
|
{ ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
|
|
{ ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
|
|
{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
|
|
{ ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
|
|
{ ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
|
|
{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
|
|
{ ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
|
|
{ ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
|
|
|
|
// FIXME: Clean this up after splitting each Thumb load / store opcode
|
|
// into multiple ones.
|
|
{ ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
|
|
{ ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
|
|
|
|
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
|
|
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
|
|
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
|
|
// ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
|
|
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
|
|
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
|
|
};
|
|
|
|
class Thumb2SizeReduce : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
Thumb2SizeReduce();
|
|
|
|
const Thumb2InstrInfo *TII;
|
|
const ARMSubtarget *STI;
|
|
|
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
|
|
virtual const char *getPassName() const {
|
|
return "Thumb2 instruction size reduction pass";
|
|
}
|
|
|
|
private:
|
|
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
|
|
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
|
|
|
|
bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
|
|
|
|
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
|
|
bool is2Addr, ARMCC::CondCodes Pred,
|
|
bool LiveCPSR, bool &HasCC, bool &CCDead);
|
|
|
|
bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry);
|
|
|
|
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry, bool LiveCPSR,
|
|
MachineInstr *CPSRDef);
|
|
|
|
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
|
|
/// instruction.
|
|
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry,
|
|
bool LiveCPSR, MachineInstr *CPSRDef);
|
|
|
|
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
|
|
/// non-two-address instruction.
|
|
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry,
|
|
bool LiveCPSR, MachineInstr *CPSRDef);
|
|
|
|
/// ReduceMBB - Reduce width of instructions in the specified basic block.
|
|
bool ReduceMBB(MachineBasicBlock &MBB);
|
|
};
|
|
char Thumb2SizeReduce::ID = 0;
|
|
}
|
|
|
|
Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
|
|
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
|
|
unsigned FromOpc = ReduceTable[i].WideOpc;
|
|
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
|
|
assert(false && "Duplicated entries?");
|
|
}
|
|
}
|
|
|
|
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
|
|
for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
|
|
if (*Regs == ARM::CPSR)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
|
|
/// the 's' 16-bit instruction partially update CPSR. Abort the
|
|
/// transformation to avoid adding false dependency on last CPSR setting
|
|
/// instruction which hurts the ability for out-of-order execution engine
|
|
/// to do register renaming magic.
|
|
/// This function checks if there is a read-of-write dependency between the
|
|
/// last instruction that defines the CPSR and the current instruction. If there
|
|
/// is, then there is no harm done since the instruction cannot be retired
|
|
/// before the CPSR setting instruction anyway.
|
|
/// Note, we are not doing full dependency analysis here for the sake of compile
|
|
/// time. We're not looking for cases like:
|
|
/// r0 = muls ...
|
|
/// r1 = add.w r0, ...
|
|
/// ...
|
|
/// = mul.w r1
|
|
/// In this case it would have been ok to narrow the mul.w to muls since there
|
|
/// are indirect RAW dependency between the muls and the mul.w
|
|
bool
|
|
Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
|
|
if (!Def || !STI->avoidCPSRPartialUpdate())
|
|
return false;
|
|
|
|
SmallSet<unsigned, 2> Defs;
|
|
for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
|
|
const MachineOperand &MO = Def->getOperand(i);
|
|
if (!MO.isReg() || MO.isUndef() || MO.isUse())
|
|
continue;
|
|
unsigned Reg = MO.getReg();
|
|
if (Reg == 0 || Reg == ARM::CPSR)
|
|
continue;
|
|
Defs.insert(Reg);
|
|
}
|
|
|
|
for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
|
|
const MachineOperand &MO = Use->getOperand(i);
|
|
if (!MO.isReg() || MO.isUndef() || MO.isDef())
|
|
continue;
|
|
unsigned Reg = MO.getReg();
|
|
if (Defs.count(Reg))
|
|
return false;
|
|
}
|
|
|
|
// No read-after-write dependency. The narrowing will add false dependency.
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
|
|
bool is2Addr, ARMCC::CondCodes Pred,
|
|
bool LiveCPSR, bool &HasCC, bool &CCDead) {
|
|
if ((is2Addr && Entry.PredCC2 == 0) ||
|
|
(!is2Addr && Entry.PredCC1 == 0)) {
|
|
if (Pred == ARMCC::AL) {
|
|
// Not predicated, must set CPSR.
|
|
if (!HasCC) {
|
|
// Original instruction was not setting CPSR, but CPSR is not
|
|
// currently live anyway. It's ok to set it. The CPSR def is
|
|
// dead though.
|
|
if (!LiveCPSR) {
|
|
HasCC = true;
|
|
CCDead = true;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
} else {
|
|
// Predicated, must not set CPSR.
|
|
if (HasCC)
|
|
return false;
|
|
}
|
|
} else if ((is2Addr && Entry.PredCC2 == 2) ||
|
|
(!is2Addr && Entry.PredCC1 == 2)) {
|
|
/// Old opcode has an optional def of CPSR.
|
|
if (HasCC)
|
|
return true;
|
|
// If old opcode does not implicitly define CPSR, then it's not ok since
|
|
// these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
|
|
if (!HasImplicitCPSRDef(MI->getDesc()))
|
|
return false;
|
|
HasCC = true;
|
|
} else {
|
|
// 16-bit instruction does not set CPSR.
|
|
if (HasCC)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool VerifyLowRegs(MachineInstr *MI) {
|
|
unsigned Opc = MI->getOpcode();
|
|
bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
|
|
Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
|
|
Opc == ARM::t2LDMDB_UPD);
|
|
bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
|
|
bool isSPOk = isPCOk || isLROk;
|
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
|
const MachineOperand &MO = MI->getOperand(i);
|
|
if (!MO.isReg() || MO.isImplicit())
|
|
continue;
|
|
unsigned Reg = MO.getReg();
|
|
if (Reg == 0 || Reg == ARM::CPSR)
|
|
continue;
|
|
if (isPCOk && Reg == ARM::PC)
|
|
continue;
|
|
if (isLROk && Reg == ARM::LR)
|
|
continue;
|
|
if (Reg == ARM::SP) {
|
|
if (isSPOk)
|
|
continue;
|
|
if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
|
|
// Special case for these ldr / str with sp as base register.
|
|
continue;
|
|
}
|
|
if (!isARMLowRegister(Reg))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry) {
|
|
if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
|
|
return false;
|
|
|
|
unsigned Scale = 1;
|
|
bool HasImmOffset = false;
|
|
bool HasShift = false;
|
|
bool HasOffReg = true;
|
|
bool isLdStMul = false;
|
|
unsigned Opc = Entry.NarrowOpc1;
|
|
unsigned OpNum = 3; // First 'rest' of operands.
|
|
uint8_t ImmLimit = Entry.Imm1Limit;
|
|
|
|
switch (Entry.WideOpc) {
|
|
default:
|
|
llvm_unreachable("Unexpected Thumb2 load / store opcode!");
|
|
case ARM::t2LDRi12:
|
|
case ARM::t2STRi12:
|
|
if (MI->getOperand(1).getReg() == ARM::SP) {
|
|
Opc = Entry.NarrowOpc2;
|
|
ImmLimit = Entry.Imm2Limit;
|
|
HasOffReg = false;
|
|
}
|
|
|
|
Scale = 4;
|
|
HasImmOffset = true;
|
|
HasOffReg = false;
|
|
break;
|
|
case ARM::t2LDRBi12:
|
|
case ARM::t2STRBi12:
|
|
HasImmOffset = true;
|
|
HasOffReg = false;
|
|
break;
|
|
case ARM::t2LDRHi12:
|
|
case ARM::t2STRHi12:
|
|
Scale = 2;
|
|
HasImmOffset = true;
|
|
HasOffReg = false;
|
|
break;
|
|
case ARM::t2LDRs:
|
|
case ARM::t2LDRBs:
|
|
case ARM::t2LDRHs:
|
|
case ARM::t2LDRSBs:
|
|
case ARM::t2LDRSHs:
|
|
case ARM::t2STRs:
|
|
case ARM::t2STRBs:
|
|
case ARM::t2STRHs:
|
|
HasShift = true;
|
|
OpNum = 4;
|
|
break;
|
|
case ARM::t2LDMIA:
|
|
case ARM::t2LDMDB: {
|
|
unsigned BaseReg = MI->getOperand(0).getReg();
|
|
if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
|
|
return false;
|
|
|
|
// For the non-writeback version (this one), the base register must be
|
|
// one of the registers being loaded.
|
|
bool isOK = false;
|
|
for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
|
|
if (MI->getOperand(i).getReg() == BaseReg) {
|
|
isOK = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!isOK)
|
|
return false;
|
|
|
|
OpNum = 0;
|
|
isLdStMul = true;
|
|
break;
|
|
}
|
|
case ARM::t2LDMIA_RET: {
|
|
unsigned BaseReg = MI->getOperand(1).getReg();
|
|
if (BaseReg != ARM::SP)
|
|
return false;
|
|
Opc = Entry.NarrowOpc2; // tPOP_RET
|
|
OpNum = 2;
|
|
isLdStMul = true;
|
|
break;
|
|
}
|
|
case ARM::t2LDMIA_UPD:
|
|
case ARM::t2LDMDB_UPD:
|
|
case ARM::t2STMIA_UPD:
|
|
case ARM::t2STMDB_UPD: {
|
|
OpNum = 0;
|
|
|
|
unsigned BaseReg = MI->getOperand(1).getReg();
|
|
if (BaseReg == ARM::SP &&
|
|
(Entry.WideOpc == ARM::t2LDMIA_UPD ||
|
|
Entry.WideOpc == ARM::t2STMDB_UPD)) {
|
|
Opc = Entry.NarrowOpc2; // tPOP or tPUSH
|
|
OpNum = 2;
|
|
} else if (!isARMLowRegister(BaseReg) ||
|
|
(Entry.WideOpc != ARM::t2LDMIA_UPD &&
|
|
Entry.WideOpc != ARM::t2STMIA_UPD)) {
|
|
return false;
|
|
}
|
|
|
|
isLdStMul = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
unsigned OffsetReg = 0;
|
|
bool OffsetKill = false;
|
|
if (HasShift) {
|
|
OffsetReg = MI->getOperand(2).getReg();
|
|
OffsetKill = MI->getOperand(2).isKill();
|
|
|
|
if (MI->getOperand(3).getImm())
|
|
// Thumb1 addressing mode doesn't support shift.
|
|
return false;
|
|
}
|
|
|
|
unsigned OffsetImm = 0;
|
|
if (HasImmOffset) {
|
|
OffsetImm = MI->getOperand(2).getImm();
|
|
unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
|
|
|
|
if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
|
|
// Make sure the immediate field fits.
|
|
return false;
|
|
}
|
|
|
|
// Add the 16-bit load / store instruction.
|
|
DebugLoc dl = MI->getDebugLoc();
|
|
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
|
|
if (!isLdStMul) {
|
|
MIB.addOperand(MI->getOperand(0));
|
|
MIB.addOperand(MI->getOperand(1));
|
|
|
|
if (HasImmOffset)
|
|
MIB.addImm(OffsetImm / Scale);
|
|
|
|
assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
|
|
|
|
if (HasOffReg)
|
|
MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
|
|
}
|
|
|
|
// Transfer the rest of operands.
|
|
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
|
|
MIB.addOperand(MI->getOperand(OpNum));
|
|
|
|
// Transfer memoperands.
|
|
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
|
|
|
// Transfer MI flags.
|
|
MIB.setMIFlags(MI->getFlags());
|
|
|
|
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
|
|
|
|
MBB.erase(MI);
|
|
++NumLdSts;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry,
|
|
bool LiveCPSR, MachineInstr *CPSRDef) {
|
|
unsigned Opc = MI->getOpcode();
|
|
if (Opc == ARM::t2ADDri) {
|
|
// If the source register is SP, try to reduce to tADDrSPi, otherwise
|
|
// it's a normal reduce.
|
|
if (MI->getOperand(1).getReg() != ARM::SP) {
|
|
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
|
|
return true;
|
|
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
|
|
}
|
|
// Try to reduce to tADDrSPi.
|
|
unsigned Imm = MI->getOperand(2).getImm();
|
|
// The immediate must be in range, the destination register must be a low
|
|
// reg, the predicate must be "always" and the condition flags must not
|
|
// be being set.
|
|
if (Imm & 3 || Imm > 1020)
|
|
return false;
|
|
if (!isARMLowRegister(MI->getOperand(0).getReg()))
|
|
return false;
|
|
if (MI->getOperand(3).getImm() != ARMCC::AL)
|
|
return false;
|
|
const MCInstrDesc &MCID = MI->getDesc();
|
|
if (MCID.hasOptionalDef() &&
|
|
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
|
|
return false;
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
|
|
TII->get(ARM::tADDrSPi))
|
|
.addOperand(MI->getOperand(0))
|
|
.addOperand(MI->getOperand(1))
|
|
.addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
|
|
AddDefaultPred(MIB);
|
|
|
|
// Transfer MI flags.
|
|
MIB.setMIFlags(MI->getFlags());
|
|
|
|
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
|
|
|
|
MBB.erase(MI);
|
|
++NumNarrows;
|
|
return true;
|
|
}
|
|
|
|
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
|
|
return false;
|
|
|
|
const MCInstrDesc &MCID = MI->getDesc();
|
|
if (MCID.mayLoad() || MCID.mayStore())
|
|
return ReduceLoadStore(MBB, MI, Entry);
|
|
|
|
switch (Opc) {
|
|
default: break;
|
|
case ARM::t2ADDSri:
|
|
case ARM::t2ADDSrr: {
|
|
unsigned PredReg = 0;
|
|
if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
|
|
switch (Opc) {
|
|
default: break;
|
|
case ARM::t2ADDSri: {
|
|
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
|
|
return true;
|
|
// fallthrough
|
|
}
|
|
case ARM::t2ADDSrr:
|
|
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case ARM::t2RSBri:
|
|
case ARM::t2RSBSri:
|
|
case ARM::t2SXTB:
|
|
case ARM::t2SXTH:
|
|
case ARM::t2UXTB:
|
|
case ARM::t2UXTH:
|
|
if (MI->getOperand(2).getImm() == 0)
|
|
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
|
|
break;
|
|
case ARM::t2MOVi16:
|
|
// Can convert only 'pure' immediate operands, not immediates obtained as
|
|
// globals' addresses.
|
|
if (MI->getOperand(1).isImm())
|
|
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
|
|
break;
|
|
case ARM::t2CMPrr: {
|
|
// Try to reduce to the lo-reg only version first. Why there are two
|
|
// versions of the instruction is a mystery.
|
|
// It would be nice to just have two entries in the master table that
|
|
// are prioritized, but the table assumes a unique entry for each
|
|
// source insn opcode. So for now, we hack a local entry record to use.
|
|
static const ReduceEntry NarrowEntry =
|
|
{ ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
|
|
if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
|
|
return true;
|
|
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry,
|
|
bool LiveCPSR, MachineInstr *CPSRDef) {
|
|
|
|
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
|
|
return false;
|
|
|
|
unsigned Reg0 = MI->getOperand(0).getReg();
|
|
unsigned Reg1 = MI->getOperand(1).getReg();
|
|
if (Reg0 != Reg1) {
|
|
// Try to commute the operands to make it a 2-address instruction.
|
|
unsigned CommOpIdx1, CommOpIdx2;
|
|
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
|
|
CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
|
|
return false;
|
|
MachineInstr *CommutedMI = TII->commuteInstruction(MI);
|
|
if (!CommutedMI)
|
|
return false;
|
|
}
|
|
if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
|
|
return false;
|
|
if (Entry.Imm2Limit) {
|
|
unsigned Imm = MI->getOperand(2).getImm();
|
|
unsigned Limit = (1 << Entry.Imm2Limit) - 1;
|
|
if (Imm > Limit)
|
|
return false;
|
|
} else {
|
|
unsigned Reg2 = MI->getOperand(2).getReg();
|
|
if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
|
|
return false;
|
|
}
|
|
|
|
// Check if it's possible / necessary to transfer the predicate.
|
|
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
|
|
unsigned PredReg = 0;
|
|
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
|
bool SkipPred = false;
|
|
if (Pred != ARMCC::AL) {
|
|
if (!NewMCID.isPredicable())
|
|
// Can't transfer predicate, fail.
|
|
return false;
|
|
} else {
|
|
SkipPred = !NewMCID.isPredicable();
|
|
}
|
|
|
|
bool HasCC = false;
|
|
bool CCDead = false;
|
|
const MCInstrDesc &MCID = MI->getDesc();
|
|
if (MCID.hasOptionalDef()) {
|
|
unsigned NumOps = MCID.getNumOperands();
|
|
HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
|
|
if (HasCC && MI->getOperand(NumOps-1).isDead())
|
|
CCDead = true;
|
|
}
|
|
if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
|
|
return false;
|
|
|
|
// Avoid adding a false dependency on partial flag update by some 16-bit
|
|
// instructions which has the 's' bit set.
|
|
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
|
|
canAddPseudoFlagDep(CPSRDef, MI))
|
|
return false;
|
|
|
|
// Add the 16-bit instruction.
|
|
DebugLoc dl = MI->getDebugLoc();
|
|
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
|
|
MIB.addOperand(MI->getOperand(0));
|
|
if (NewMCID.hasOptionalDef()) {
|
|
if (HasCC)
|
|
AddDefaultT1CC(MIB, CCDead);
|
|
else
|
|
AddNoT1CC(MIB);
|
|
}
|
|
|
|
// Transfer the rest of operands.
|
|
unsigned NumOps = MCID.getNumOperands();
|
|
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
|
|
if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
|
|
continue;
|
|
if (SkipPred && MCID.OpInfo[i].isPredicate())
|
|
continue;
|
|
MIB.addOperand(MI->getOperand(i));
|
|
}
|
|
|
|
// Transfer MI flags.
|
|
MIB.setMIFlags(MI->getFlags());
|
|
|
|
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
|
|
|
|
MBB.erase(MI);
|
|
++Num2Addrs;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
|
|
const ReduceEntry &Entry,
|
|
bool LiveCPSR, MachineInstr *CPSRDef) {
|
|
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
|
|
return false;
|
|
|
|
unsigned Limit = ~0U;
|
|
if (Entry.Imm1Limit)
|
|
Limit = (1 << Entry.Imm1Limit) - 1;
|
|
|
|
const MCInstrDesc &MCID = MI->getDesc();
|
|
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
|
|
if (MCID.OpInfo[i].isPredicate())
|
|
continue;
|
|
const MachineOperand &MO = MI->getOperand(i);
|
|
if (MO.isReg()) {
|
|
unsigned Reg = MO.getReg();
|
|
if (!Reg || Reg == ARM::CPSR)
|
|
continue;
|
|
if (Entry.LowRegs1 && !isARMLowRegister(Reg))
|
|
return false;
|
|
} else if (MO.isImm() &&
|
|
!MCID.OpInfo[i].isPredicate()) {
|
|
if (((unsigned)MO.getImm()) > Limit)
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check if it's possible / necessary to transfer the predicate.
|
|
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
|
|
unsigned PredReg = 0;
|
|
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
|
bool SkipPred = false;
|
|
if (Pred != ARMCC::AL) {
|
|
if (!NewMCID.isPredicable())
|
|
// Can't transfer predicate, fail.
|
|
return false;
|
|
} else {
|
|
SkipPred = !NewMCID.isPredicable();
|
|
}
|
|
|
|
bool HasCC = false;
|
|
bool CCDead = false;
|
|
if (MCID.hasOptionalDef()) {
|
|
unsigned NumOps = MCID.getNumOperands();
|
|
HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
|
|
if (HasCC && MI->getOperand(NumOps-1).isDead())
|
|
CCDead = true;
|
|
}
|
|
if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
|
|
return false;
|
|
|
|
// Avoid adding a false dependency on partial flag update by some 16-bit
|
|
// instructions which has the 's' bit set.
|
|
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
|
|
canAddPseudoFlagDep(CPSRDef, MI))
|
|
return false;
|
|
|
|
// Add the 16-bit instruction.
|
|
DebugLoc dl = MI->getDebugLoc();
|
|
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
|
|
MIB.addOperand(MI->getOperand(0));
|
|
if (NewMCID.hasOptionalDef()) {
|
|
if (HasCC)
|
|
AddDefaultT1CC(MIB, CCDead);
|
|
else
|
|
AddNoT1CC(MIB);
|
|
}
|
|
|
|
// Transfer the rest of operands.
|
|
unsigned NumOps = MCID.getNumOperands();
|
|
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
|
|
if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
|
|
continue;
|
|
if ((MCID.getOpcode() == ARM::t2RSBSri ||
|
|
MCID.getOpcode() == ARM::t2RSBri ||
|
|
MCID.getOpcode() == ARM::t2SXTB ||
|
|
MCID.getOpcode() == ARM::t2SXTH ||
|
|
MCID.getOpcode() == ARM::t2UXTB ||
|
|
MCID.getOpcode() == ARM::t2UXTH) && i == 2)
|
|
// Skip the zero immediate operand, it's now implicit.
|
|
continue;
|
|
bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
|
|
if (SkipPred && isPred)
|
|
continue;
|
|
const MachineOperand &MO = MI->getOperand(i);
|
|
if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
|
|
// Skip implicit def of CPSR. Either it's modeled as an optional
|
|
// def now or it's already an implicit def on the new instruction.
|
|
continue;
|
|
MIB.addOperand(MO);
|
|
}
|
|
if (!MCID.isPredicable() && NewMCID.isPredicable())
|
|
AddDefaultPred(MIB);
|
|
|
|
// Transfer MI flags.
|
|
MIB.setMIFlags(MI->getFlags());
|
|
|
|
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
|
|
|
|
MBB.erase(MI);
|
|
++NumNarrows;
|
|
return true;
|
|
}
|
|
|
|
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
|
|
bool HasDef = false;
|
|
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
|
const MachineOperand &MO = MI.getOperand(i);
|
|
if (!MO.isReg() || MO.isUndef() || MO.isUse())
|
|
continue;
|
|
if (MO.getReg() != ARM::CPSR)
|
|
continue;
|
|
|
|
DefCPSR = true;
|
|
if (!MO.isDead())
|
|
HasDef = true;
|
|
}
|
|
|
|
return HasDef || LiveCPSR;
|
|
}
|
|
|
|
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
|
|
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
|
const MachineOperand &MO = MI.getOperand(i);
|
|
if (!MO.isReg() || MO.isUndef() || MO.isDef())
|
|
continue;
|
|
if (MO.getReg() != ARM::CPSR)
|
|
continue;
|
|
assert(LiveCPSR && "CPSR liveness tracking is wrong!");
|
|
if (MO.isKill()) {
|
|
LiveCPSR = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return LiveCPSR;
|
|
}
|
|
|
|
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
|
|
bool Modified = false;
|
|
|
|
// Yes, CPSR could be livein.
|
|
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
|
|
MachineInstr *CPSRDef = 0;
|
|
|
|
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
|
|
MachineBasicBlock::iterator NextMII;
|
|
for (; MII != E; MII = NextMII) {
|
|
NextMII = llvm::next(MII);
|
|
|
|
MachineInstr *MI = &*MII;
|
|
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
|
|
|
|
unsigned Opcode = MI->getOpcode();
|
|
DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
|
|
if (OPI != ReduceOpcodeMap.end()) {
|
|
const ReduceEntry &Entry = ReduceTable[OPI->second];
|
|
// Ignore "special" cases for now.
|
|
if (Entry.Special) {
|
|
if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
|
|
Modified = true;
|
|
MachineBasicBlock::iterator I = prior(NextMII);
|
|
MI = &*I;
|
|
}
|
|
goto ProcessNext;
|
|
}
|
|
|
|
// Try to transform to a 16-bit two-address instruction.
|
|
if (Entry.NarrowOpc2 &&
|
|
ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
|
|
Modified = true;
|
|
MachineBasicBlock::iterator I = prior(NextMII);
|
|
MI = &*I;
|
|
goto ProcessNext;
|
|
}
|
|
|
|
// Try to transform to a 16-bit non-two-address instruction.
|
|
if (Entry.NarrowOpc1 &&
|
|
ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
|
|
Modified = true;
|
|
MachineBasicBlock::iterator I = prior(NextMII);
|
|
MI = &*I;
|
|
}
|
|
}
|
|
|
|
ProcessNext:
|
|
bool DefCPSR = false;
|
|
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
|
|
if (MI->getDesc().isCall())
|
|
// Calls don't really set CPSR.
|
|
CPSRDef = 0;
|
|
else if (DefCPSR)
|
|
// This is the last CPSR defining instruction.
|
|
CPSRDef = MI;
|
|
}
|
|
|
|
return Modified;
|
|
}
|
|
|
|
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
|
|
const TargetMachine &TM = MF.getTarget();
|
|
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
|
|
STI = &TM.getSubtarget<ARMSubtarget>();
|
|
|
|
bool Modified = false;
|
|
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
|
|
Modified |= ReduceMBB(*I);
|
|
return Modified;
|
|
}
|
|
|
|
/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
|
|
/// reduction pass.
|
|
FunctionPass *llvm::createThumb2SizeReductionPass() {
|
|
return new Thumb2SizeReduce();
|
|
}
|