mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[esan|wset] EfficiencySanitizer working set tool fastpath
Summary: Adds fastpath instrumentation for esan's working set tool. The instrumentation for an intra-cache-line load or store consists of an inlined write to shadow memory bits for the corresponding cache line. Adds a basic test for this instrumentation. Reviewers: aizatsky Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits Differential Revision: http://reviews.llvm.org/D20483 llvm-svn: 270640
This commit is contained in:
parent
10bbd18b43
commit
605cdbc11c
@ -123,6 +123,7 @@ struct EfficiencySanitizerOptions {
|
||||
enum Type {
|
||||
ESAN_None = 0,
|
||||
ESAN_CacheFrag,
|
||||
ESAN_WorkingSet,
|
||||
} ToolType;
|
||||
};
|
||||
|
||||
|
@ -42,6 +42,9 @@ using namespace llvm;
|
||||
static cl::opt<bool>
|
||||
ClToolCacheFrag("esan-cache-frag", cl::init(false),
|
||||
cl::desc("Detect data cache fragmentation"), cl::Hidden);
|
||||
static cl::opt<bool>
|
||||
ClToolWorkingSet("esan-working-set", cl::init(false),
|
||||
cl::desc("Measure the working set size"), cl::Hidden);
|
||||
// Each new tool will get its own opt flag here.
|
||||
// These are converted to EfficiencySanitizerOptions for use
|
||||
// in the code.
|
||||
@ -65,12 +68,31 @@ static const char *const EsanModuleDtorName = "esan.module_dtor";
|
||||
static const char *const EsanInitName = "__esan_init";
|
||||
static const char *const EsanExitName = "__esan_exit";
|
||||
|
||||
// We must keep these Shadow* constants consistent with the esan runtime.
|
||||
// FIXME: Try to place these shadow constants, the names of the __esan_*
|
||||
// interface functions, and the ToolType enum into a header shared between
|
||||
// llvm and compiler-rt.
|
||||
static const uint64_t ShadowMask = 0x00000fffffffffffull;
|
||||
static const uint64_t ShadowOffs[3] = { // Indexed by scale
|
||||
0x0000130000000000ull,
|
||||
0x0000220000000000ull,
|
||||
0x0000440000000000ull,
|
||||
};
|
||||
// This array is indexed by the ToolType enum.
|
||||
static const int ShadowScale[] = {
|
||||
0, // ESAN_None.
|
||||
2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
|
||||
6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
static EfficiencySanitizerOptions
|
||||
OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
|
||||
if (ClToolCacheFrag)
|
||||
Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
|
||||
else if (ClToolWorkingSet)
|
||||
Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
|
||||
|
||||
// Direct opt invocation with no params will have the default ESAN_None.
|
||||
// We run the default tool in that case.
|
||||
@ -100,11 +122,14 @@ private:
|
||||
bool instrumentMemIntrinsic(MemIntrinsic *MI);
|
||||
bool shouldIgnoreMemoryAccess(Instruction *I);
|
||||
int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
|
||||
Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
|
||||
bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
|
||||
Value *Addr, unsigned Alignment);
|
||||
// Each tool has its own fastpath routine:
|
||||
bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
|
||||
Value *Addr, unsigned Alignment);
|
||||
bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
|
||||
Value *Addr, unsigned Alignment);
|
||||
|
||||
EfficiencySanitizerOptions Options;
|
||||
LLVMContext *Ctx;
|
||||
@ -226,11 +251,30 @@ bool EfficiencySanitizer::initOnModule(Module &M) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
|
||||
// Shadow = ((App & Mask) + Offs) >> Scale
|
||||
Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask));
|
||||
uint64_t Offs;
|
||||
int Scale = ShadowScale[Options.ToolType];
|
||||
if (Scale <= 2)
|
||||
Offs = ShadowOffs[Scale];
|
||||
else
|
||||
Offs = ShadowOffs[0] << Scale;
|
||||
Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
|
||||
if (Scale > 0)
|
||||
Shadow = IRB.CreateLShr(Shadow, Scale);
|
||||
return Shadow;
|
||||
}
|
||||
|
||||
bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
|
||||
if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
|
||||
// We'd like to know about cache fragmentation in vtable accesses and
|
||||
// constant data references, so we do not currently ignore anything.
|
||||
return false;
|
||||
} else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
|
||||
// TODO: the instrumentation disturbs the data layout on the stack, so we
|
||||
// may want to add an option to ignore stack references (if we can
|
||||
// distinguish them) to reduce overhead.
|
||||
}
|
||||
// TODO(bruening): future tools will be returning true for some cases.
|
||||
return false;
|
||||
@ -309,6 +353,11 @@ bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
|
||||
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
|
||||
const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
|
||||
Value *OnAccessFunc = nullptr;
|
||||
|
||||
// Convert 0 to the default alignment.
|
||||
if (Alignment == 0)
|
||||
Alignment = DL.getPrefTypeAlignment(OrigTy);
|
||||
|
||||
if (IsStore)
|
||||
NumInstrumentedStores++;
|
||||
else
|
||||
@ -384,6 +433,8 @@ bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
|
||||
Value *Addr, unsigned Alignment) {
|
||||
if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
|
||||
return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
|
||||
} else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
|
||||
return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -395,3 +446,56 @@ bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
|
||||
// TODO(bruening): implement a fastpath for aligned accesses
|
||||
return false;
|
||||
}
|
||||
|
||||
bool EfficiencySanitizer::instrumentFastpathWorkingSet(
|
||||
Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
|
||||
assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
|
||||
IRBuilder<> IRB(I);
|
||||
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
|
||||
const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
|
||||
// Bail to the slowpath if the access might touch multiple cache lines.
|
||||
// An access aligned to its size is guaranteed to be intra-cache-line.
|
||||
// getMemoryAccessFuncIndex has already ruled out a size larger than 16
|
||||
// and thus larger than a cache line for platforms this tool targets
|
||||
// (and our shadow memory setup assumes 64-byte cache lines).
|
||||
assert(TypeSize <= 64);
|
||||
if (!(TypeSize == 8 ||
|
||||
(Alignment % (TypeSize / 8)) == 0))
|
||||
return false;
|
||||
|
||||
// We inline instrumentation to set the corresponding shadow bits for
|
||||
// each cache line touched by the application. Here we handle a single
|
||||
// load or store where we've already ruled out the possibility that it
|
||||
// might touch more than one cache line and thus we simply update the
|
||||
// shadow memory for a single cache line.
|
||||
// Our shadow memory model is fine with races when manipulating shadow values.
|
||||
// We generate the following code:
|
||||
//
|
||||
// const char BitMask = 0x81;
|
||||
// char *ShadowAddr = appToShadow(AppAddr);
|
||||
// if ((*ShadowAddr & BitMask) != BitMask)
|
||||
// *ShadowAddr |= Bitmask;
|
||||
//
|
||||
Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
|
||||
Value *ShadowPtr = appToShadow(AddrPtr, IRB);
|
||||
Type *ShadowTy = IntegerType::get(*Ctx, 8U);
|
||||
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
|
||||
// The bottom bit is used for the current sampling period's working set.
|
||||
// The top bit is used for the total working set. We set both on each
|
||||
// memory access, if they are not already set.
|
||||
Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
|
||||
|
||||
Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
|
||||
// The AND and CMP will be turned into a TEST instruction by the compiler.
|
||||
Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
|
||||
TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
|
||||
// FIXME: do I need to call SetCurrentDebugLocation?
|
||||
IRB.SetInsertPoint(CmpTerm);
|
||||
// We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
|
||||
// which are used by the runtime library.
|
||||
Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
|
||||
IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
|
||||
IRB.SetInsertPoint(I);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
164
test/Instrumentation/EfficiencySanitizer/working_set_basic.ll
Normal file
164
test/Instrumentation/EfficiencySanitizer/working_set_basic.ll
Normal file
@ -0,0 +1,164 @@
|
||||
; Test basic EfficiencySanitizer working set instrumentation.
|
||||
;
|
||||
; RUN: opt < %s -esan -esan-working-set -S | FileCheck %s
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Intra-cache-line
|
||||
|
||||
define i8 @aligned1(i8* %a) {
|
||||
entry:
|
||||
%tmp1 = load i8, i8* %a, align 1
|
||||
ret i8 %tmp1
|
||||
; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor
|
||||
; CHECK: %0 = ptrtoint i8* %a to i64
|
||||
; CHECK-NEXT: %1 = and i64 %0, 17592186044415
|
||||
; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
|
||||
; CHECK-NEXT: %3 = lshr i64 %2, 6
|
||||
; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: %5 = load i8, i8* %4
|
||||
; CHECK-NEXT: %6 = and i8 %5, -127
|
||||
; CHECK-NEXT: %7 = icmp ne i8 %6, -127
|
||||
; CHECK-NEXT: br i1 %7, label %8, label %11
|
||||
; CHECK: %9 = or i8 %5, -127
|
||||
; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: store i8 %9, i8* %10
|
||||
; CHECK-NEXT: br label %11
|
||||
; CHECK: %tmp1 = load i8, i8* %a, align 1
|
||||
; CHECK-NEXT: ret i8 %tmp1
|
||||
}
|
||||
|
||||
define i16 @aligned2(i16* %a) {
|
||||
entry:
|
||||
%tmp1 = load i16, i16* %a, align 2
|
||||
ret i16 %tmp1
|
||||
; CHECK: %0 = ptrtoint i16* %a to i64
|
||||
; CHECK-NEXT: %1 = and i64 %0, 17592186044415
|
||||
; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
|
||||
; CHECK-NEXT: %3 = lshr i64 %2, 6
|
||||
; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: %5 = load i8, i8* %4
|
||||
; CHECK-NEXT: %6 = and i8 %5, -127
|
||||
; CHECK-NEXT: %7 = icmp ne i8 %6, -127
|
||||
; CHECK-NEXT: br i1 %7, label %8, label %11
|
||||
; CHECK: %9 = or i8 %5, -127
|
||||
; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: store i8 %9, i8* %10
|
||||
; CHECK-NEXT: br label %11
|
||||
; CHECK: %tmp1 = load i16, i16* %a, align 2
|
||||
; CHECK-NEXT: ret i16 %tmp1
|
||||
}
|
||||
|
||||
define i32 @aligned4(i32* %a) {
|
||||
entry:
|
||||
%tmp1 = load i32, i32* %a, align 4
|
||||
ret i32 %tmp1
|
||||
; CHECK: %0 = ptrtoint i32* %a to i64
|
||||
; CHECK-NEXT: %1 = and i64 %0, 17592186044415
|
||||
; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
|
||||
; CHECK-NEXT: %3 = lshr i64 %2, 6
|
||||
; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: %5 = load i8, i8* %4
|
||||
; CHECK-NEXT: %6 = and i8 %5, -127
|
||||
; CHECK-NEXT: %7 = icmp ne i8 %6, -127
|
||||
; CHECK-NEXT: br i1 %7, label %8, label %11
|
||||
; CHECK: %9 = or i8 %5, -127
|
||||
; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: store i8 %9, i8* %10
|
||||
; CHECK-NEXT: br label %11
|
||||
; CHECK: %tmp1 = load i32, i32* %a, align 4
|
||||
; CHECK-NEXT: ret i32 %tmp1
|
||||
}
|
||||
|
||||
define i64 @aligned8(i64* %a) {
|
||||
entry:
|
||||
%tmp1 = load i64, i64* %a, align 8
|
||||
ret i64 %tmp1
|
||||
; CHECK: %0 = ptrtoint i64* %a to i64
|
||||
; CHECK-NEXT: %1 = and i64 %0, 17592186044415
|
||||
; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
|
||||
; CHECK-NEXT: %3 = lshr i64 %2, 6
|
||||
; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: %5 = load i8, i8* %4
|
||||
; CHECK-NEXT: %6 = and i8 %5, -127
|
||||
; CHECK-NEXT: %7 = icmp ne i8 %6, -127
|
||||
; CHECK-NEXT: br i1 %7, label %8, label %11
|
||||
; CHECK: %9 = or i8 %5, -127
|
||||
; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
|
||||
; CHECK-NEXT: store i8 %9, i8* %10
|
||||
; CHECK-NEXT: br label %11
|
||||
; CHECK: %tmp1 = load i64, i64* %a, align 8
|
||||
; CHECK-NEXT: ret i64 %tmp1
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Not guaranteed to be intra-cache-line
|
||||
|
||||
define i16 @unaligned2(i16* %a) {
|
||||
entry:
|
||||
%tmp1 = load i16, i16* %a, align 1
|
||||
ret i16 %tmp1
|
||||
; CHECK: %0 = bitcast i16* %a to i8*
|
||||
; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0)
|
||||
; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1
|
||||
; CHECK-NEXT: ret i16 %tmp1
|
||||
}
|
||||
|
||||
define i32 @unaligned4(i32* %a) {
|
||||
entry:
|
||||
%tmp1 = load i32, i32* %a, align 2
|
||||
ret i32 %tmp1
|
||||
; CHECK: %0 = bitcast i32* %a to i8*
|
||||
; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0)
|
||||
; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2
|
||||
; CHECK-NEXT: ret i32 %tmp1
|
||||
}
|
||||
|
||||
define i64 @unaligned8(i64* %a) {
|
||||
entry:
|
||||
%tmp1 = load i64, i64* %a, align 4
|
||||
ret i64 %tmp1
|
||||
; CHECK: %0 = bitcast i64* %a to i8*
|
||||
; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0)
|
||||
; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4
|
||||
; CHECK-NEXT: ret i64 %tmp1
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Ensure that esan converts intrinsics to calls:
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
|
||||
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
|
||||
define void @memCpyTest(i8* nocapture %x, i8* nocapture %y) {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
|
||||
ret void
|
||||
; CHECK: define void @memCpyTest
|
||||
; CHECK: call i8* @memcpy
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
define void @memMoveTest(i8* nocapture %x, i8* nocapture %y) {
|
||||
entry:
|
||||
tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
|
||||
ret void
|
||||
; CHECK: define void @memMoveTest
|
||||
; CHECK: call i8* @memmove
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
define void @memSetTest(i8* nocapture %x) {
|
||||
entry:
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
|
||||
ret void
|
||||
; CHECK: define void @memSetTest
|
||||
; CHECK: call i8* @memset
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Top-level:
|
||||
|
||||
; CHECK: define internal void @esan.module_ctor()
|
||||
; CHECK: call void @__esan_init(i32 2, i64 ptrtoint (i64* @0 to i64))
|
Loading…
Reference in New Issue
Block a user