1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[WinEH] Optimize WinEH state stores

32-bit x86 Windows targets use a linked-list of nodes allocated on the
stack, referenced to via thread-local storage.  The personality routine
interprets one of the fields in the node as a 'state number' which
indicates where the personality routine should transfer control.

State transitions are possible only before call-sites which may throw
exceptions.  Our previous scheme had us update the state number before
all call-sites which may throw.

Instead, we can try to minimize the number of times we need to store by
reasoning about the nearest store which dominates the current call-site.
If the last store agrees with the current call-site, then we know that
the state-update is redundant and can be elided.

This is largely straightforward: an RPO walk of the blocks allows us to
correctly forward propagate the information when the function is a DAG.
Currently, loops are not handled optimally and may trigger superfluous
state stores.

Differential Revision: http://reviews.llvm.org/D16763

llvm-svn: 261122
This commit is contained in:
David Majnemer 2016-02-17 18:37:11 +00:00
parent e7d97265e1
commit 81eb2d6bae
3 changed files with 247 additions and 32 deletions

View File

@ -15,14 +15,20 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include <deque>
using namespace llvm;
@ -33,6 +39,8 @@ void initializeWinEHStatePassPass(PassRegistry &);
}
namespace {
const int OverdefinedState = INT_MIN;
class WinEHStatePass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
@ -82,6 +90,8 @@ private:
// Per-function state
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
bool UseStackGuard = false;
int ParentBaseState;
/// The stack allocation containing all EH data, including the link in the
/// fs:00 chain and the current state.
@ -170,6 +180,7 @@ bool WinEHStatePass::runOnFunction(Function &F) {
// Reset per-function state.
PersonalityFn = nullptr;
Personality = EHPersonality::Unknown;
UseStackGuard = false;
return true;
}
@ -247,7 +258,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
// Struct type of RegNode. Used for GEPing.
Type *RegNodeTy;
StringRef PersonalityName = PersonalityFn->getName();
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
Type *Int8PtrType = Builder.getInt8PtrTy();
if (Personality == EHPersonality::MSVC_CXX) {
@ -259,7 +269,8 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
StateFieldIndex = 2;
insertStateNumberStore(&*Builder.GetInsertPoint(), -1);
ParentBaseState = -1;
insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
@ -267,7 +278,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
} else if (Personality == EHPersonality::MSVC_X86SEH) {
// If _except_handler4 is in use, some additional guard checks and prologue
// stuff is required.
bool UseStackGuard = (PersonalityName == "_except_handler4");
RegNodeTy = getSEHRegistrationType();
RegNode = Builder.CreateAlloca(RegNodeTy);
// SavedESP = llvm.stacksave()
@ -276,7 +286,10 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -2 / -1
StateFieldIndex = 4;
insertStateNumberStore(&*Builder.GetInsertPoint(), UseStackGuard ? -2 : -1);
StringRef PersonalityName = PersonalityFn->getName();
UseStackGuard = (PersonalityName == "_except_handler4");
ParentBaseState = UseStackGuard ? -2 : -1;
insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
Value *LSDA = Builder.CreateCall(
@ -388,6 +401,88 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
Builder.CreateStore(Next, FSZero);
}
// Figure out what state we should assign calls in this block.
static int getBaseStateForBB(DenseMap<BasicBlock *, ColorVector> &BlockColors,
WinEHFuncInfo &FuncInfo, BasicBlock *BB) {
int BaseState = -1;
auto &BBColors = BlockColors[BB];
assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
BasicBlock *FuncletEntryBB = BBColors.front();
if (auto *FuncletPad =
dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
BaseState = BaseStateI->second;
}
return BaseState;
}
// Calculate the state a call-site is in.
static int getStateForCallSite(DenseMap<BasicBlock *, ColorVector> &BlockColors,
WinEHFuncInfo &FuncInfo, CallSite CS) {
if (auto *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
// Look up the state number of the EH pad this unwinds to.
assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
return FuncInfo.InvokeStateMap[II];
}
// Possibly throwing call instructions have no actions to take after
// an unwind. Ensure they are in the -1 state.
return getBaseStateForBB(BlockColors, FuncInfo, CS.getParent());
}
// Calculate the intersection of all the FinalStates for a BasicBlock's
// predecessor.
static int getPredState(DenseMap<BasicBlock *, int> &FinalStates, Function &F,
int ParentBaseState, BasicBlock *BB) {
// The entry block has no predecessors but we know that the prologue always
// sets us up with a fixed state.
if (&F.getEntryBlock() == BB)
return ParentBaseState;
// This is an EH Pad, conservatively report this basic block as overdefined.
if (BB->isEHPad())
return OverdefinedState;
int CommonState = OverdefinedState;
for (BasicBlock *PredBB : predecessors(BB)) {
// We didn't manage to get a state for one of these predecessors,
// conservatively report this basic block as overdefined.
auto PredEndState = FinalStates.find(PredBB);
if (PredEndState == FinalStates.end())
return OverdefinedState;
// This code is reachable via exceptional control flow,
// conservatively report this basic block as overdefined.
if (isa<CatchReturnInst>(PredBB->getTerminator()))
return OverdefinedState;
int PredState = PredEndState->second;
assert(PredState != OverdefinedState &&
"overdefined BBs shouldn't be in FinalStates");
if (CommonState == OverdefinedState)
CommonState = PredState;
// At least two predecessors have different FinalStates,
// conservatively report this basic block as overdefined.
if (CommonState != PredState)
return OverdefinedState;
}
return CommonState;
};
static bool isStateStoreNeeded(EHPersonality Personality, CallSite CS) {
if (!CS)
return false;
if (isAsynchronousEHPersonality(Personality))
return !CS.doesNotAccessMemory();
return !CS.doesNotThrow();
}
void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
// Mark the registration node. The backend needs to know which alloca it is so
// that it can recover the original frame pointer.
@ -405,38 +500,86 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
// Iterate all the instructions and emit state number stores.
DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
for (BasicBlock &BB : F) {
// Figure out what state we should assign calls in this block.
int BaseState = -1;
auto &BBColors = BlockColors[&BB];
ReversePostOrderTraversal<Function *> RPOT(&F);
assert(BBColors.size() == 1 &&
"multi-color BB not removed by preparation");
BasicBlock *FuncletEntryBB = BBColors.front();
if (auto *FuncletPad =
dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
// We do not support nesting funclets within cleanuppads.
if (isa<CleanupPadInst>(FuncletPad))
// InitialStates yields the state of the first call-site for a BasicBlock.
DenseMap<BasicBlock *, int> InitialStates;
// FinalStates yields the state of the last call-site for a BasicBlock.
DenseMap<BasicBlock *, int> FinalStates;
// Worklist used to revisit BasicBlocks with indeterminate
// Initial/Final-States.
std::deque<BasicBlock *> Worklist;
// Fill in InitialStates and FinalStates for BasicBlocks with call-sites.
for (BasicBlock *BB : RPOT) {
int InitialState = OverdefinedState;
int FinalState;
if (&F.getEntryBlock() == BB)
InitialState = FinalState = ParentBaseState;
for (Instruction &I : *BB) {
CallSite CS(&I);
if (!isStateStoreNeeded(Personality, CS))
continue;
auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
BaseState = BaseStateI->second;
int State = getStateForCallSite(BlockColors, FuncInfo, CS);
if (InitialState == OverdefinedState)
InitialState = State;
FinalState = State;
}
// No call-sites in this basic block? That's OK, we will come back to these
// in a later pass.
if (InitialState == OverdefinedState) {
Worklist.push_back(BB);
continue;
}
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
<< " InitialState=" << InitialState << '\n');
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
<< " FinalState=" << FinalState << '\n');
InitialStates.insert({BB, InitialState});
FinalStates.insert({BB, FinalState});
}
for (Instruction &I : BB) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
// Possibly throwing call instructions have no actions to take after
// an unwind. Ensure they are in the -1 state.
if (CI->doesNotThrow())
continue;
insertStateNumberStore(CI, BaseState);
} else if (auto *II = dyn_cast<InvokeInst>(&I)) {
// Look up the state number of the landingpad this unwinds to.
assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
int State = FuncInfo.InvokeStateMap[II];
insertStateNumberStore(II, State);
}
// Try to fill-in InitialStates and FinalStates which have no call-sites.
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.front();
Worklist.pop_front();
// This BasicBlock has already been figured out, nothing more we can do.
if (InitialStates.count(BB) != 0)
continue;
int PredState = getPredState(FinalStates, F, ParentBaseState, BB);
if (PredState == OverdefinedState)
continue;
// We successfully inferred this BasicBlock's state via it's predecessors;
// enqueue it's successors to see if we can infer their states.
InitialStates.insert({BB, PredState});
FinalStates.insert({BB, PredState});
for (BasicBlock *SuccBB : successors(BB))
Worklist.push_back(SuccBB);
}
// Finally, insert state stores before call-sites which transition us to a new
// state.
for (BasicBlock *BB : RPOT) {
auto &BBColors = BlockColors[BB];
BasicBlock *FuncletEntryBB = BBColors.front();
if (isa<CleanupPadInst>(FuncletEntryBB->getFirstNonPHI()))
continue;
int PrevState = getPredState(FinalStates, F, ParentBaseState, BB);
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
<< " PrevState=" << PrevState << '\n');
for (Instruction &I : *BB) {
CallSite CS(&I);
if (!isStateStoreNeeded(Personality, CS))
continue;
int State = getStateForCallSite(BlockColors, FuncInfo, CS);
if (State != PrevState)
insertStateNumberStore(&I, State);
PrevState = State;
}
}
}

View File

@ -28,7 +28,11 @@ entry:
; CHECK: entry:
; CHECK: store i32 -1
; CHECK: call void @g(i32 3)
; CHECK-NEXT: call void @g(i32 4)
; CHECK-NEXT: call void @g(i32 5)
call void @g(i32 3)
call void @g(i32 4)
call void @g(i32 5)
store i32 0, i32* %tmp, align 4
%0 = bitcast i32* %tmp to i8*
; CHECK: store i32 0
@ -54,14 +58,22 @@ catch.3: ; preds = %catch.dispatch.1
; CHECK: catch.3:
; CHECK: store i32 3
; CHECK: call void @g(i32 1)
; CHECK-NEXT: call void @g(i32 2)
; CHECK-NEXT: call void @g(i32 3)
call void @g(i32 1)
call void @g(i32 2)
call void @g(i32 3)
catchret from %2 to label %try.cont
try.cont: ; preds = %catch.3
; CHECK: try.cont:
; CHECK: store i32 1
; CHECK: call void @g(i32 2)
; CHECK-NEXT: call void @g(i32 3)
; CHECK-NEXT: call void @g(i32 4)
call void @g(i32 2)
call void @g(i32 3)
call void @g(i32 4)
unreachable
unreachable: ; preds = %catch
@ -111,6 +123,10 @@ try.cont: ; preds = %catch2
; CHECK: try.cont:
; CHECK: store i32 1
; CHECK: call void @dtor()
; CHECK-NEXT: call void @dtor()
; CHECK-NEXT: call void @dtor()
call void @dtor() #3 [ "funclet"(token %1) ]
call void @dtor() #3 [ "funclet"(token %1) ]
call void @dtor() #3 [ "funclet"(token %1) ]
catchret from %1 to label %try.cont4
@ -131,6 +147,52 @@ unreachable1: ; preds = %catch
unreachable
}
; CHECK-LABEL: define void @required_state_store(
define void @required_state_store(i1 zeroext %cond) personality i32 (...)* @_except_handler3 {
entry:
%__exception_code = alloca i32, align 4
call void (...) @llvm.localescape(i32* nonnull %__exception_code)
; CHECK: store i32 -1
; CHECK: call void @g(i32 0)
call void @g(i32 0)
br i1 %cond, label %if.then, label %if.end
if.then: ; preds = %entry
; CHECK: store i32 0
; CHECK-NEXT: invoke void @g(i32 1)
invoke void @g(i32 1)
to label %if.end unwind label %catch.dispatch
catch.dispatch: ; preds = %if.then
%0 = catchswitch within none [label %__except.ret] unwind to caller
__except.ret: ; preds = %catch.dispatch
%1 = catchpad within %0 [i8* bitcast (i32 ()* @"\01?filt$0@0@required_state_store@@" to i8*)]
catchret from %1 to label %if.end
if.end: ; preds = %if.then, %__except.ret, %entry
; CHECK: store i32 -1
; CHECK-NEXT: call void @dtor()
call void @dtor()
ret void
}
define internal i32 @"\01?filt$0@0@required_state_store@@"() {
entry:
%0 = tail call i8* @llvm.frameaddress(i32 1)
%1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %0)
%2 = tail call i8* @llvm.localrecover(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %1, i32 0)
%__exception_code = bitcast i8* %2 to i32*
%3 = getelementptr inbounds i8, i8* %0, i32 -20
%4 = bitcast i8* %3 to { i32*, i8* }**
%5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
%6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
%7 = load i32*, i32** %6, align 4
%8 = load i32, i32* %7, align 4
store i32 %8, i32* %__exception_code, align 4
ret i32 1
}
declare void @g(i32) #0
declare void @dtor()
@ -139,6 +201,16 @@ declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
declare i32 @__CxxFrameHandler3(...)
declare i8* @llvm.frameaddress(i32)
declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
declare i8* @llvm.localrecover(i8*, i8*, i32)
declare void @llvm.localescape(...)
declare i32 @_except_handler3(...)
attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn }

View File

@ -51,7 +51,7 @@ ehcleanup: ; preds = %entry
; CHECK: "?dtor$2@?0?passes_two@4HA":
; CHECK: pushl %ebp
; CHECK: subl $8, %esp
; CHECK: addl $12, %ebp
; CHECK: addl $16, %ebp
; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
; CHECK: calll "??1A@@QAE@XZ"
; CHECK: addl $8, %esp