1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

Make X86-64 in the Large model always emit 64-bit calls.

The large code model is documented at
http://www.x86-64.org/documentation/abi.pdf and says that calls should
assume their target doesn't live within the 32-bit pc-relative offset
that fits in the call instruction.

To do this, we turn off the global-address->target-global-address
conversion in X86TargetLowering::LowerCall(). The first attempt at
this broke the lazy JIT because it can separate the movabs(imm->reg)
from the actual call instruction. The lazy JIT receives the address of
the movabs as a relocation and needs to record the return address from
the call; and then when that call happens, it needs to patch the
movabs with the newly-compiled target. We could thread the call
instruction into the relocation and record the movabs<->call mapping
explicitly, but that seems to require at least as much new
complication in the code generator as this change.

To fix this, we make lazy functions _always_ go through a call
stub. You'd think we'd only have to force lazy calls through a stub on
difficult platforms, but that turns out to break indirect calls
through a function pointer. The right fix for that is to distinguish
between calls and address-of operations on uncompiled functions, but
that's complex enough to leave for someone else to do.

Another attempt at this defined a new CALL64i pseudo-instruction,
which expanded to a 2-instruction sequence in the assembly output and
was special-cased in the X86CodeEmitter's emitInstruction()
function. That broke indirect calls in the same way as above.

This patch also removes a hack forcing Darwin to the small code model.
Without far-call-stubs, the small code model requires things of the
JITMemoryManager that the DefaultJITMemoryManager can't provide.

Thanks to echristo for lots of testing!

llvm-svn: 88984
This commit is contained in:
Jeffrey Yasskin 2009-11-16 22:41:33 +00:00
parent f3784a8781
commit 0f846dbb3e
8 changed files with 187 additions and 67 deletions

View File

@ -247,16 +247,6 @@ namespace {
/// specified GV address.
void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
/// AddCallbackAtLocation - If the target is capable of rewriting an
/// instruction without the use of a stub, record the location of the use so
/// we know which function is being used at the location.
void *AddCallbackAtLocation(Function *F, void *Location) {
MutexGuard locked(TheJIT->lock);
/// Get the target-specific JIT resolver function.
state.AddCallSite(locked, Location, F);
return (void*)(intptr_t)LazyResolverFn;
}
void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
SmallVectorImpl<void*> &Ptrs);
@ -756,13 +746,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
!MayNeedFarStub)
return TheJIT->getPointerToFunction(F);
// Okay, the function has not been compiled yet, if the target callback
// mechanism is capable of rewriting the instruction directly, prefer to do
// that instead of emitting a stub. This uses the lazy resolver, so is not
// legal if lazy compilation is disabled.
if (!MayNeedFarStub && TheJIT->isCompilingLazily())
return Resolver.AddCallbackAtLocation(F, Reference);
// Otherwise, we have to emit a stub.
void *StubAddr = Resolver.getFunctionStub(F);

View File

@ -82,7 +82,7 @@ namespace {
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp = 0, intptr_t PCAdj = 0,
bool MayNeedFarStub = false, bool Indirect = false);
bool Indirect = false);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
intptr_t PCAdj = 0);
@ -176,7 +176,6 @@ template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp /* = 0 */,
intptr_t PCAdj /* = 0 */,
bool MayNeedFarStub /* = false */,
bool Indirect /* = false */) {
intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
RelocCST = PCAdj;
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
GV, RelocCST, MayNeedFarStub)
GV, RelocCST, false)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
GV, RelocCST, MayNeedFarStub);
GV, RelocCST, false);
MCE.addRelocation(MR);
// The relocated value will be added to the displacement
if (Reloc == X86::reloc_absolute_dword)
@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
// do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
bool MayNeedFarStub = isa<Function>(RelocOp->getGlobal());
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
Adj, MayNeedFarStub, Indirect);
Adj, Indirect);
} else if (RelocOp->isSymbol()) {
emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
if (MO.isGlobal()) {
// Assume undefined functions may be outside the Small codespace.
bool MayNeedFarStub =
(Is64BitMode &&
(TM.getCodeModel() == CodeModel::Large ||
TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
Opcode == X86::TAILJMPd;
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
MO.getOffset(), 0, MayNeedFarStub);
MO.getOffset(), 0);
break;
}
@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri)
rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
MayNeedFarStub, Indirect);
Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
bool MayNeedFarStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
MayNeedFarStub, Indirect);
Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64mi32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO.isGlobal()) {
bool MayNeedFarStub = isa<Function>(MO.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO, TM);
emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
MayNeedFarStub, Indirect);
Indirect);
} else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), rt);
else if (MO.isCPI())

View File

@ -1937,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
WasGlobalOrExternal = true;
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
GlobalValue *GV = G->getGlobal();
@ -1967,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@ -1984,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
} else if (isTailCall) {
}
if (isTailCall && !WasGlobalOrExternal) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,

View File

@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
#if defined (X86_64_JIT)
if (!isStub)
*(intptr_t *)(RetAddr - 0xa) = NewVal;
assert(isStub &&
"X86-64 doesn't support rewriting non-stub lazy compilation calls:"
" the call instruction varies too much.");
#else
*(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
#endif

View File

@ -185,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
// On Darwin, use small code model but hack the call instruction for
// externals. Elsewhere, do not assume globals are in the lower 4G.
if (Subtarget.is64Bit()) {
if (Subtarget.isTargetDarwin())
setCodeModel(CodeModel::Small);
else
if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
}
PM.add(createX86CodeEmitterPass(*this, MCE));
@ -211,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
// On Darwin, use small code model but hack the call instruction for
// externals. Elsewhere, do not assume globals are in the lower 4G.
if (Subtarget.is64Bit()) {
if (Subtarget.isTargetDarwin())
setCodeModel(CodeModel::Small);
else
if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
}
PM.add(createX86JITCodeEmitterPass(*this, JCE));

View File

@ -77,12 +77,12 @@ endif
# Both AuroraUX & Solaris do not have the -m flag for ulimit
ifeq ($(HOST_OS),SunOS)
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ;
else # !SunOS
ifeq ($(HOST_OS),AuroraUX)
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ;
else # !AuroraUX
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 512000 ;
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ;
endif # AuroraUX
endif # SunOS

View File

@ -26,10 +26,22 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TypeBuilder.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelect.h"
#include "llvm/Type.h"
#include <vector>
#include <string.h>
#if HAVE_ERRNO_H
#include <errno.h>
#endif
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#if _POSIX_MAPPED_FILES > 0
#include <sys/mman.h>
#endif
using namespace llvm;
@ -177,6 +189,15 @@ public:
}
};
void LoadAssemblyInto(Module *M, const char *assembly) {
SMDiagnostic Error;
bool success = NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
std::string errMsg;
raw_string_ostream os(errMsg);
Error.Print("", os);
ASSERT_TRUE(success) << os.str();
}
class JITTest : public testing::Test {
protected:
virtual void SetUp() {
@ -191,12 +212,7 @@ class JITTest : public testing::Test {
}
void LoadAssembly(const char *assembly) {
SMDiagnostic Error;
bool success = NULL != ParseAssemblyString(assembly, M, Error, Context);
std::string errMsg;
raw_string_ostream os(errMsg);
Error.Print("", os);
ASSERT_TRUE(success) << os.str();
LoadAssemblyInto(M, assembly);
}
LLVMContext Context;
@ -498,6 +514,135 @@ TEST_F(JITTest, NoStubs) {
}
#endif
#if _POSIX_MAPPED_FILES > 0 && (defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64))
class FarCallMemMgr : public RecordingJITMemoryManager {
void *MmapRegion;
size_t MmapSize;
uint8_t *NextStub;
uint8_t *NextFunction;
public:
FarCallMemMgr()
: MmapSize(16ULL << 30) { // 16GB
MmapRegion = mmap(NULL, MmapSize, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (MmapRegion == MAP_FAILED) {
ADD_FAILURE() << "mmap failed: " << strerror(errno);
}
// Set up the 16GB mapped region in several chunks:
// Stubs / ~5GB empty space / Function 1 / ~5GB empty space / Function 2
// This way no two entities can use a 32-bit relative call to reach each other.
NextStub = static_cast<uint8_t*>(MmapRegion);
NextFunction = NextStub + (5ULL << 30);
// Next, poison some of the memory so a wild call will eventually crash,
// even if memory was initialized by the OS to 0. We can't poison all of
// the memory because we want to be able to run on systems with less than
// 16GB of physical ram.
int TrapInstr = 0xCC; // INT 3
memset(NextStub, TrapInstr, 1<<10);
for (size_t Offset = 1<<30; Offset < MmapSize; Offset += 1<<30) {
// Fill the 2KB around each GB boundary with trap instructions. This
// should ensure that we can't run into emitted functions without hitting
// the trap.
memset(NextStub + Offset - (1<<10), TrapInstr, 2<<10);
}
}
~FarCallMemMgr() {
EXPECT_EQ(0, munmap(MmapRegion, MmapSize));
}
virtual void setMemoryWritable() {}
virtual void setMemoryExecutable() {}
virtual uint8_t *startFunctionBody(const Function *F,
uintptr_t &ActualSize) {
ActualSize = 1 << 30;
uint8_t *Result = NextFunction;
NextFunction += 5ULL << 30;
return Result;
}
virtual void endFunctionBody(const Function*, uint8_t*, uint8_t*) {}
virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
unsigned Alignment) {
NextStub = reinterpret_cast<uint8_t*>(
uintptr_t(NextStub + Alignment - 1) &~ uintptr_t(Alignment - 1));
uint8_t *Result = NextStub;
NextStub += StubSize;
return Result;
}
};
class FarTargetTest : public ::testing::TestWithParam<CodeGenOpt::Level> {
protected:
FarTargetTest() : SavedCodeModel(TargetMachine::getCodeModel()) {}
~FarTargetTest() {
TargetMachine::setCodeModel(SavedCodeModel);
}
const CodeModel::Model SavedCodeModel;
};
INSTANTIATE_TEST_CASE_P(CodeGenOpt,
FarTargetTest,
::testing::Values(CodeGenOpt::None,
CodeGenOpt::Default));
TEST_P(FarTargetTest, CallToFarTarget) {
// x86-64 can only make direct calls to functions within 32 bits of
// the current PC. To call anything farther away, we have to load
// the address into a register and call through the register. The
// old JIT did this by allocating a stub for any far call. However,
// that stub needed to be within 32 bits of the callsite. Here we
// test that the JIT correctly deals with stubs and calls more than
// 32 bits away from the callsite.
// Make sure the code generator is assuming code might be far away.
//TargetMachine::setCodeModel(CodeModel::Large);
LLVMContext Context;
Module *M = new Module("<main>", Context);
ExistingModuleProvider *MP = new ExistingModuleProvider(M);
JITMemoryManager *MemMgr = new FarCallMemMgr();
std::string Error;
OwningPtr<ExecutionEngine> JIT(EngineBuilder(MP)
.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
.setJITMemoryManager(MemMgr)
.setOptLevel(GetParam())
.create());
ASSERT_EQ(Error, "");
TargetMachine::setCodeModel(CodeModel::Large);
LoadAssemblyInto(M,
"define i32 @test() { "
" ret i32 7 "
"} "
" "
"define i32 @test_far() { "
" %result = call i32 @test() "
" ret i32 %result "
"} ");
// First, lay out a function early in memory.
Function *TestFunction = M->getFunction("test");
int32_t (*TestFunctionPtr)() = reinterpret_cast<int32_t(*)()>(
(intptr_t)JIT->getPointerToFunction(TestFunction));
ASSERT_EQ(7, TestFunctionPtr());
// We now lay out the far-away function. This should land >4GB away from test().
Function *FarFunction = M->getFunction("test_far");
int32_t (*FarFunctionPtr)() = reinterpret_cast<int32_t(*)()>(
(intptr_t)JIT->getPointerToFunction(FarFunction));
EXPECT_LT(1LL << 32, llabs(intptr_t(FarFunctionPtr) - intptr_t(TestFunctionPtr)))
<< "Functions must be >32 bits apart or the test is meaningless.";
// This used to result in a segfault in FarFunction, when its call instruction
// jumped to the wrong address.
EXPECT_EQ(7, FarFunctionPtr());
}
#endif // Platform has far-call problem.
// This code is copied from JITEventListenerTest, but it only runs once for all
// the tests in this directory. Everything seems fine, but that's strange
// behavior.

View File

@ -53,8 +53,9 @@ class GoogleTest(object):
def execute(self, test, litConfig):
testPath,testName = os.path.split(test.getSourcePath())
if not os.path.exists(testPath):
# Handle GTest typed tests, whose name includes a '/'.
while not os.path.exists(testPath):
# Handle GTest parametrized and typed tests, whose name includes
# some '/'s.
testPath, namePrefix = os.path.split(testPath)
testName = os.path.join(namePrefix, testName)