1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

ARM: decide whether to use movw/movt based on "minsize" attribute.

llvm-svn: 196102
This commit is contained in:
Tim Northover 2013-12-02 14:46:26 +00:00
parent 402d68071c
commit c144b1204e
9 changed files with 49 additions and 14 deletions

View File

@ -1859,12 +1859,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
MachineInstr *MI,
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
if (!Subtarget.isMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR

View File

@ -417,7 +417,8 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
/// NumBytes. This can save a few bytes per function in code-size, but
/// obviously generates more memory traffic. As such, it only takes
/// effect in functions being optimised for size.
bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI,
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -

View File

@ -256,7 +256,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes))
if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
FramePtrOffsetInPush += NumBytes;
else
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@ -434,7 +434,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
}
} else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, FirstPop, NumBytes))
} else if (NumBytes &&
!tryFoldSPUpdateIntoPushPop(STI, MF, FirstPop, NumBytes))
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.

View File

@ -1745,8 +1745,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
bool HasMinSizeAttr = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
bool HasMinSizeAttr = Subtarget->isMinSize();
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;

View File

@ -102,6 +102,7 @@ void ARMSubtarget::initializeEnvironment() {
HasVFPv4 = false;
HasFPARMv8 = false;
HasNEON = false;
MinSize = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
SlowFPVMLx = false;
@ -151,6 +152,9 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
MinSize =
FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {

View File

@ -64,6 +64,10 @@ protected:
bool HasFPARMv8;
bool HasNEON;
/// MinSize - True if the function being compiled has the "minsize" attribute
/// and should be optimised for size at the expense of speed.
bool MinSize;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
@ -270,6 +274,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
bool isMinSize() const { return MinSize; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@ -327,7 +332,7 @@ public:
bool isR9Reserved() const { return IsR9Reserved; }
bool useMovt() const { return UseMovt && hasV6T2Ops(); }
bool useMovt() const { return UseMovt && !isMinSize(); }
bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }

View File

@ -165,7 +165,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
NumBytes = DPRCSOffset;
int FramePtrOffsetInBlock = 0;
if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
if (tryFoldSPUpdateIntoPushPop(STI, MF, prior(MBBI), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
NumBytes = 0;
}
@ -291,9 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
} else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
} else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}

View File

@ -1012,8 +1012,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
AttributeSet FnAttrs = MF.getFunction()->getAttributes();
OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::MinSize);
MinimizeSize = STI->isMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());

View File

@ -0,0 +1,26 @@
; RUN: llc -mtriple=thumbv7s %s -o - | FileCheck %s
; RUN: llc -mtriple=armv7s %s -o - | FileCheck %s
; CodeGen should be able to set and reset the MinSize subtarget-feature, and
; make use of it in deciding whether to use MOVW/MOVT for global variables or a
; lit-pool load (saving roughly 2 bytes of code).
@var = global i32 0
define i32 @small_global() minsize {
; CHECK-LABEL: small_global:
; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
; CHECK: ldr r0, [r[[GLOBDEST]]]
%val = load i32* @var
ret i32 %val
}
define i32 @big_global() {
; CHECK-LABEL: big_global:
; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
; CHECK: movt [[GLOBDEST]], :upper16:var
%val = load i32* @var
ret i32 %val
}