mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
CXX_FAST_TLS calling convention: performance improvement for ARM.
This is the same change on ARM as r255821 on AArch64. rdar://9001553 llvm-svn: 257424
This commit is contained in:
parent
dbfeeafbb3
commit
6cbd4fbe85
@ -88,10 +88,21 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
|
if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
|
||||||
return CSR_iOS_CXX_TLS_SaveList;
|
return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
|
||||||
|
? CSR_iOS_CXX_TLS_PE_SaveList
|
||||||
|
: CSR_iOS_CXX_TLS_SaveList;
|
||||||
return RegList;
|
return RegList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
|
||||||
|
const MachineFunction *MF) const {
|
||||||
|
assert(MF && "Invalid MachineFunction pointer.");
|
||||||
|
if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
||||||
|
MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
|
||||||
|
return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
const uint32_t *
|
const uint32_t *
|
||||||
ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
||||||
CallingConv::ID CC) const {
|
CallingConv::ID CC) const {
|
||||||
|
@ -98,6 +98,8 @@ protected:
|
|||||||
public:
|
public:
|
||||||
/// Code Generation virtual methods...
|
/// Code Generation virtual methods...
|
||||||
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
|
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
|
||||||
|
const MCPhysReg *
|
||||||
|
getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
|
||||||
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
||||||
CallingConv::ID) const override;
|
CallingConv::ID) const override;
|
||||||
const uint32_t *getNoPreservedMask() const override;
|
const uint32_t *getNoPreservedMask() const override;
|
||||||
|
@ -234,6 +234,12 @@ def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
|
|||||||
def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
|
def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
|
||||||
(sequence "D%u", 31, 0))>;
|
(sequence "D%u", 31, 0))>;
|
||||||
|
|
||||||
|
// CSRs that are handled by prologue, epilogue.
|
||||||
|
def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
|
||||||
|
|
||||||
|
// CSRs that are handled explicitly via copies.
|
||||||
|
def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
|
||||||
|
|
||||||
// The "interrupt" attribute is used to generate code that is acceptable in
|
// The "interrupt" attribute is used to generate code that is acceptable in
|
||||||
// exception-handlers of various kinds. It makes us use a different return
|
// exception-handlers of various kinds. It makes us use a different return
|
||||||
// instruction (handled elsewhere) and affects which registers we must return to
|
// instruction (handled elsewhere) and affects which registers we must return to
|
||||||
|
@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
|
|||||||
if (!FuncInfo.CanLowerReturn)
|
if (!FuncInfo.CanLowerReturn)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (TLI.supportSplitCSR(FuncInfo.MF))
|
||||||
|
return false;
|
||||||
|
|
||||||
// Build a list of return value registers.
|
// Build a list of return value registers.
|
||||||
SmallVector<unsigned, 4> RetRegs;
|
SmallVector<unsigned, 4> RetRegs;
|
||||||
|
|
||||||
|
@ -2348,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
|
|||||||
Flag = Chain.getValue(1);
|
Flag = Chain.getValue(1);
|
||||||
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
||||||
}
|
}
|
||||||
|
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||||
|
const MCPhysReg *I =
|
||||||
|
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
|
||||||
|
if (I) {
|
||||||
|
for (; *I; ++I) {
|
||||||
|
if (ARM::GPRRegClass.contains(*I))
|
||||||
|
RetOps.push_back(DAG.getRegister(*I, MVT::i32));
|
||||||
|
else if (ARM::DPRRegClass.contains(*I))
|
||||||
|
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update chain and glue.
|
// Update chain and glue.
|
||||||
RetOps[0] = Chain;
|
RetOps[0] = Chain;
|
||||||
@ -12393,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister(
|
|||||||
// via the personality function.
|
// via the personality function.
|
||||||
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
|
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
|
||||||
|
// Update IsSplitCSR in ARMFunctionInfo.
|
||||||
|
ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
|
||||||
|
AFI->setIsSplitCSR(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARMTargetLowering::insertCopiesSplitCSR(
|
||||||
|
MachineBasicBlock *Entry,
|
||||||
|
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
|
||||||
|
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||||
|
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
|
||||||
|
if (!IStart)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||||
|
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||||
|
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||||
|
const TargetRegisterClass *RC = nullptr;
|
||||||
|
if (ARM::GPRRegClass.contains(*I))
|
||||||
|
RC = &ARM::GPRRegClass;
|
||||||
|
else if (ARM::DPRRegClass.contains(*I))
|
||||||
|
RC = &ARM::DPRRegClass;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
|
||||||
|
|
||||||
|
unsigned NewVR = MRI->createVirtualRegister(RC);
|
||||||
|
// Create copy from CSR to a virtual register.
|
||||||
|
// FIXME: this currently does not emit CFI pseudo-instructions, it works
|
||||||
|
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
|
||||||
|
// nounwind. If we want to generalize this later, we may need to emit
|
||||||
|
// CFI pseudo-instructions.
|
||||||
|
assert(Entry->getParent()->getFunction()->hasFnAttribute(
|
||||||
|
Attribute::NoUnwind) &&
|
||||||
|
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||||
|
Entry->addLiveIn(*I);
|
||||||
|
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||||
|
NewVR)
|
||||||
|
.addReg(*I);
|
||||||
|
|
||||||
|
for (auto *Exit : Exits)
|
||||||
|
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||||
|
*I)
|
||||||
|
.addReg(NewVR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -580,6 +580,15 @@ namespace llvm {
|
|||||||
SmallVectorImpl<SDValue> &InVals,
|
SmallVectorImpl<SDValue> &InVals,
|
||||||
bool isThisReturn, SDValue ThisVal) const;
|
bool isThisReturn, SDValue ThisVal) const;
|
||||||
|
|
||||||
|
bool supportSplitCSR(MachineFunction *MF) const override {
|
||||||
|
return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
||||||
|
MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
|
||||||
|
}
|
||||||
|
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
|
||||||
|
void insertCopiesSplitCSR(
|
||||||
|
MachineBasicBlock *Entry,
|
||||||
|
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
CallingConv::ID CallConv, bool isVarArg,
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
|
|||||||
RestoreSPFromFP(false), LRSpilledForFarJump(false),
|
RestoreSPFromFP(false), LRSpilledForFarJump(false),
|
||||||
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
||||||
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
|
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
|
||||||
PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
|
PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
|
||||||
|
IsSplitCSR(false) {}
|
||||||
|
@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
|
|||||||
/// coalesced weights.
|
/// coalesced weights.
|
||||||
DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
|
DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
|
||||||
|
|
||||||
|
/// True if this function has a subset of CSRs that is handled explicitly via
|
||||||
|
/// copies.
|
||||||
|
bool IsSplitCSR;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ARMFunctionInfo() :
|
ARMFunctionInfo() :
|
||||||
isThumb(false),
|
isThumb(false),
|
||||||
@ -128,7 +132,7 @@ public:
|
|||||||
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
||||||
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
|
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
|
||||||
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
|
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
|
||||||
VarArgsFrameIndex(0), HasITBlocks(false) {}
|
VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
|
||||||
|
|
||||||
explicit ARMFunctionInfo(MachineFunction &MF);
|
explicit ARMFunctionInfo(MachineFunction &MF);
|
||||||
|
|
||||||
@ -199,6 +203,9 @@ public:
|
|||||||
bool hasITBlocks() const { return HasITBlocks; }
|
bool hasITBlocks() const { return HasITBlocks; }
|
||||||
void setHasITBlocks(bool h) { HasITBlocks = h; }
|
void setHasITBlocks(bool h) { HasITBlocks = h; }
|
||||||
|
|
||||||
|
bool isSplitCSR() const { return IsSplitCSR; }
|
||||||
|
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
|
||||||
|
|
||||||
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
|
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
|
||||||
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
|
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
|
||||||
llvm_unreachable("Duplicate entries!");
|
llvm_unreachable("Duplicate entries!");
|
||||||
|
@ -28,17 +28,19 @@ __tls_init.exit:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: _ZTW2sg
|
; CHECK-LABEL: _ZTW2sg
|
||||||
; CHECK: push {r1, r2, r3, r4, r7, lr}
|
; CHECK: push {lr}
|
||||||
; CHECK: push {r9, r12}
|
; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
|
||||||
; CHECK: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
; CHECK-NOT: push {r9, r12}
|
||||||
; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
|
; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
||||||
|
; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
|
||||||
; CHECK: blx
|
; CHECK: blx
|
||||||
; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
|
; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
|
||||||
; CHECK; blx
|
; CHECK; blx
|
||||||
; CHECK: tlv_atexit
|
; CHECK: tlv_atexit
|
||||||
; CHECK: [[BB_end]]:
|
; CHECK: [[BB_end]]:
|
||||||
; CHECK: blx
|
; CHECK: blx
|
||||||
; CHECK: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
|
; CHECK-NOT: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
|
||||||
; CHECK: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
; CHECK-NOT: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
||||||
; CHECK: pop {r9, r12}
|
; CHECK-NOT: pop {r9, r12}
|
||||||
; CHECK: pop {r1, r2, r3, r4, r7, pc}
|
; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
|
||||||
|
; CHECK: pop {lr}
|
||||||
|
Loading…
Reference in New Issue
Block a user