1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[MC] Write padding into fragments when -mc-relax-all flag is used

Summary:
When instruction bundling is enabled and the -mc-relax-all flag is
set, we can write bundle padding directly into fragments and avoid
creating large number of fragments significantly reducing LLVM MC
memory usage.

Test Plan: Regression test attached

Reviewers: eliben

Subscribers: jfb, mseaborn

Differential Revision: http://reviews.llvm.org/D8072

llvm-svn: 234714
This commit is contained in:
Petr Hosek 2015-04-12 23:42:25 +00:00
parent 2bf8a1716d
commit bbb668b972
17 changed files with 180 additions and 37 deletions

View File

@ -50,11 +50,6 @@ private:
/// \brief Is the layout for this fragment valid?
bool isFragmentValid(const MCFragment *F) const;
/// \brief Compute the amount of padding required before this fragment to
/// obey bundling restrictions.
uint64_t computeBundlePadding(const MCFragment *F,
uint64_t FOffset, uint64_t FSize);
public:
MCAsmLayout(MCAssembler &Assembler);

View File

@ -1245,11 +1245,23 @@ public:
FileNames.push_back(FileName);
}
/// \brief Write the necessary bundle padding to the given object writer.
/// Expects a fragment \p F containing instructions and its size \p FSize.
void writeFragmentPadding(const MCFragment &F, uint64_t FSize,
MCObjectWriter *OW) const;
/// @}
void dump();
};
/// \brief Compute the amount of padding required before the fragment \p F to
/// obey bundling restrictions, where \p FOffset is the fragment's offset in
/// its section and \p FSize is the fragment's size.
uint64_t computeBundlePadding(const MCAssembler &Assembler,
const MCFragment *F,
uint64_t FOffset, uint64_t FSize);
} // end namespace llvm
#endif

View File

@ -95,6 +95,9 @@ private:
void fixSymbolsInTLSFixups(const MCExpr *expr);
/// \brief Merge the content of the fragment \p EF into the fragment \p DF.
void mergeFragment(MCDataFragment *, MCEncodedFragmentWithFixups *);
bool SeenIdent;
struct LocalCommon {
@ -106,6 +109,10 @@ private:
std::vector<LocalCommon> LocalCommons;
SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
/// BundleGroups - The stack of fragments holding the bundle-locked
/// instructions.
llvm::SmallVector<MCDataFragment *, 4> BundleGroups;
};
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,

View File

@ -44,10 +44,6 @@ class MCObjectStreamer : public MCStreamer {
void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
// If any labels have been emitted but not assigned fragments, ensure that
// they get assigned, either to F if possible or to a new data fragment.
void flushPendingLabels(MCFragment *F);
protected:
MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter);
@ -85,6 +81,12 @@ protected:
bool changeSectionImpl(const MCSection *Section, const MCExpr *Subsection);
/// If any labels have been emitted but not assigned fragments, ensure that
/// they get assigned, either to F if possible or to a new data fragment.
/// Optionally, it is also possible to provide an offset \p FOffset, which
/// will be used as a symbol offset within the fragment.
void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0);
public:
void visitUsedSymbol(const MCSymbol &Sym) override;

View File

@ -229,8 +229,9 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
return getSectionAddressSize(SD);
}
uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
uint64_t FOffset, uint64_t FSize) {
uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
const MCFragment *F,
uint64_t FOffset, uint64_t FSize) {
uint64_t BundleSize = Assembler.getBundleAlignSize();
assert(BundleSize > 0 &&
"computeBundlePadding should only be called if bundling is enabled");
@ -332,6 +333,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
getFragmentList().insert(IP, F);
F->setParent(this);
}
return IP;
}
@ -632,7 +634,12 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
// The fragment's offset will point to after the padding, and its computed
// size won't include the padding.
//
if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
// When the -mc-relax-all flag is used, we optimize bundling by writting the
// bundle padding directly into fragments when the instructions are emitted
// inside the streamer.
//
if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
F->hasInstructions()) {
assert(isa<MCEncodedFragment>(F) &&
"Only MCEncodedFragment implementations have instructions");
uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
@ -640,7 +647,8 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
if (FSize > Assembler.getBundleAlignSize())
report_fatal_error("Fragment can't be larger than a bundle size");
uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
F->Offset, FSize);
if (RequiredBundlePadding > UINT8_MAX)
report_fatal_error("Padding cannot exceed 255 bytes");
F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
@ -655,24 +663,18 @@ static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
OW->WriteBytes(EF.getContents());
}
/// \brief Write the fragment \p F to the output file.
static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment &F) {
MCObjectWriter *OW = &Asm.getWriter();
// FIXME: Embed in fragments instead?
uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize,
MCObjectWriter *OW) const {
// Should NOP padding be written out before this fragment?
unsigned BundlePadding = F.getBundlePadding();
if (BundlePadding > 0) {
assert(Asm.isBundlingEnabled() &&
assert(isBundlingEnabled() &&
"Writing bundle padding with disabled bundling");
assert(F.hasInstructions() &&
"Writing bundle padding for a fragment without instructions");
unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
// If the padding itself crosses a bundle boundary, it must be emitted
// in 2 pieces, since even nop instructions must not cross boundaries.
// v--------------v <- BundleAlignSize
@ -681,16 +683,27 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
// | Prev |####|####| F |
// ----------------------------
// ^-------------------^ <- TotalLength
unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
if (!getBackend().writeNopData(DistanceToBoundary, OW))
report_fatal_error("unable to write NOP sequence of " +
Twine(DistanceToBoundary) + " bytes");
BundlePadding -= DistanceToBoundary;
}
if (!Asm.getBackend().writeNopData(BundlePadding, OW))
if (!getBackend().writeNopData(BundlePadding, OW))
report_fatal_error("unable to write NOP sequence of " +
Twine(BundlePadding) + " bytes");
}
}
/// \brief Write the fragment \p F to the output file.
static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment &F) {
MCObjectWriter *OW = &Asm.getWriter();
// FIXME: Embed in fragments instead?
uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
Asm.writeFragmentPadding(F, FragmentSize, OW);
// This variable (and its dummy usage) is to participate in the assert at
// the end of the function.

View File

@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
@ -40,6 +41,48 @@ using namespace llvm;
MCELFStreamer::~MCELFStreamer() {
}
void MCELFStreamer::mergeFragment(MCDataFragment *DF,
MCEncodedFragmentWithFixups *EF) {
MCAssembler &Assembler = getAssembler();
if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
uint64_t FSize = EF->getContents().size();
if (FSize > Assembler.getBundleAlignSize())
report_fatal_error("Fragment can't be larger than a bundle size");
uint64_t RequiredBundlePadding = computeBundlePadding(
Assembler, EF, DF->getContents().size(), FSize);
if (RequiredBundlePadding > UINT8_MAX)
report_fatal_error("Padding cannot exceed 255 bytes");
if (RequiredBundlePadding > 0) {
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS);
EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
Assembler.writeFragmentPadding(*EF, FSize, OW);
VecOS.flush();
delete OW;
DF->getContents().append(Code.begin(), Code.end());
}
}
flushPendingLabels(DF, DF->getContents().size());
for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
DF->getContents().size());
DF->getFixups().push_back(EF->getFixups()[i]);
}
DF->setHasInstructions(true);
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}
void MCELFStreamer::InitSections(bool NoExecStack) {
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
@ -449,7 +492,16 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
if (Assembler.isBundlingEnabled()) {
MCSectionData *SD = getCurrentSectionData();
if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
if (Assembler.getRelaxAll() && SD->isBundleLocked())
// If the -mc-relax-all flag is used and we are bundle-locked, we re-use
// the current bundle group.
DF = BundleGroups.back();
else if (Assembler.getRelaxAll() && !SD->isBundleLocked())
// When not in a bundle-locked group and the -mc-relax-all flag is used,
// we create a new temporary fragment which will be later merged into
// the current fragment.
DF = new MCDataFragment();
else if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
// If we are bundle-locked, we re-use the current fragment.
// The bundle-locking directive ensures this is a new data fragment.
DF = cast<MCDataFragment>(getCurrentFragment());
@ -487,6 +539,14 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
}
DF->setHasInstructions(true);
DF->getContents().append(Code.begin(), Code.end());
if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
MCSectionData *SD = getCurrentSectionData();
if (!SD->isBundleLocked()) {
mergeFragment(getOrCreateDataFragment(), DF);
delete DF;
}
}
}
void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
@ -510,6 +570,12 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
if (!SD->isBundleLocked())
SD->setBundleGroupBeforeFirstInst(true);
if (getAssembler().getRelaxAll() && !SD->isBundleLocked()) {
// TODO: drop the lock state and set directly in the fragment
MCDataFragment *DF = new MCDataFragment();
BundleGroups.push_back(DF);
}
SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
MCSectionData::BundleLocked);
}
@ -525,7 +591,27 @@ void MCELFStreamer::EmitBundleUnlock() {
else if (SD->isBundleGroupBeforeFirstInst())
report_fatal_error("Empty bundle-locked group is forbidden");
SD->setBundleLockState(MCSectionData::NotBundleLocked);
// When the -mc-relax-all flag is used, we emit instructions to fragments
// stored on a stack. When the bundle unlock is emited, we pop a fragment
// from the stack a merge it to the one below.
if (getAssembler().getRelaxAll()) {
assert(!BundleGroups.empty() && "There are no bundle groups");
MCDataFragment *DF = BundleGroups.back();
// FIXME: Use BundleGroups to track the lock state instead.
SD->setBundleLockState(MCSectionData::NotBundleLocked);
// FIXME: Use more separate fragments for nested groups.
if (!SD->isBundleLocked()) {
mergeFragment(getOrCreateDataFragment(), DF);
BundleGroups.pop_back();
delete DF;
}
if (SD->getBundleLockState() != MCSectionData::BundleLockedAlignToEnd)
getOrCreateDataFragment()->setAlignToBundleEnd(false);
} else
SD->setBundleLockState(MCSectionData::NotBundleLocked);
}
void MCELFStreamer::Flush() {

View File

@ -37,7 +37,7 @@ MCObjectStreamer::~MCObjectStreamer() {
delete Assembler;
}
void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
if (PendingLabels.size()) {
if (!F) {
F = new MCDataFragment();
@ -46,7 +46,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
}
for (MCSymbolData *SD : PendingLabels) {
SD->setFragment(F);
SD->setOffset(0);
SD->setOffset(FOffset);
}
PendingLabels.clear();
}
@ -87,7 +87,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::EmitInstToData for details)
if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() &&
F->hasInstructions())) {
F = new MCDataFragment();
insert(F);
}
@ -143,7 +144,9 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
// If there is a current fragment, mark the symbol as pointing into it.
// Otherwise queue the label and set its fragment pointer when we emit the
// next fragment.
if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) {
auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (F && !(getAssembler().isBundlingEnabled() &&
getAssembler().getRelaxAll())) {
SD.setFragment(F);
SD.setOffset(F->getContents().size());
} else {
@ -242,6 +245,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
const MCSubtargetInfo &STI) {
if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled())
llvm_unreachable("All instructions should have already been relaxed");
// Always create a new, separate fragment here, because its size can change
// during relaxation.
MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI);

View File

@ -1,4 +1,5 @@
# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - 2>&1 | FileCheck %s
# CHECK: ERROR: Fragment can't be larger than a bundle size

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Test two different executable sections with bundling.

View File

@ -1,6 +1,8 @@
# RUN: llvm-mc -triple=i686-linux -filetype=obj %s -o - | \
# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
# RUN: llvm-mc -triple=i686-nacl -filetype=obj %s -o - | \
# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
# RUN: llvm-mc -triple=i686-nacl -filetype=obj -mc-relax-all %s -o - | \
# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
.bundle_align_mode 5

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Test that long nops are generated for padding where possible.

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Will be bundle-aligning to 16 byte boundaries
.bundle_align_mode 4

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Test some variations of padding to the end of a bundle.

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Test some variations of padding for bundle-locked groups.

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# Test that an instruction near a bundle end gets properly padded
# after it is relaxed.

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble - | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble - | FileCheck %s
# Test that instructions inside bundle-locked groups are relaxed even if their
# fixup is short enough not to warrant relaxation on its own.

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
# Test simple NOP insertion for single instructions.
@ -24,14 +26,17 @@ foo:
movl %ebx, %edi
callq bar
cmpl %r14d, %ebp
# CHECK-RELAX: nopl
jle .L_ELSE
# Due to the padding that's inserted before the addl, the jump target
# becomes farther by one byte.
# CHECK: jle 5
# CHECK-OPT: jle 5
# CHECK-RELAX: jle 7
addl %ebp, %eax
# CHECK: nop
# CHECK-NEXT: 20: addl
# CHECK-OPT: nop
# CHECK-OPT-NEXT:20: addl
# CHECK-RELAX: 26: addl
jmp .L_RET
.L_ELSE: