From bbccd958745de0b8d07d7b069e38925287ee2d3f Mon Sep 17 00:00:00 2001 From: Torok Edwin Date: Tue, 14 Jul 2009 18:44:28 +0000 Subject: [PATCH] Introduce a pointertracking pass. For now this only computes the allocated size of the memory pointed to by a pointer, and offset a pointer from allocated pointer. The actual checkLimits part will come later, after another round of review. llvm-svn: 75657 --- include/llvm/Analysis/PointerTracking.h | 132 ++++++++++++ include/llvm/LinkAllPasses.h | 2 + lib/Analysis/PointerTracking.cpp | 261 ++++++++++++++++++++++++ test/Analysis/PointerTracking/dg.exp | 3 + test/Analysis/PointerTracking/sizes.ll | 84 ++++++++ 5 files changed, 482 insertions(+) create mode 100644 include/llvm/Analysis/PointerTracking.h create mode 100644 lib/Analysis/PointerTracking.cpp create mode 100644 test/Analysis/PointerTracking/dg.exp create mode 100644 test/Analysis/PointerTracking/sizes.ll diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h new file mode 100644 index 00000000000..2256839ec2a --- /dev/null +++ b/include/llvm/Analysis/PointerTracking.h @@ -0,0 +1,132 @@ +//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// It knows that the libc functions "calloc" and "realloc" allocate memory, thus +// you should avoid using this pass if they mean something else for your +// language. +// +// All methods assume that the pointer is not NULL, if it is then the returned +// allocation size is wrong, and the result from checkLimits is wrong too. +// It also assumes that pointers are valid, and that it is not analyzing a +// use-after-free scenario. +// Due to these limitations the "size" returned by these methods should be +// considered as either 0 or the returned size. +// +// Another analysis pass should be used to find use-after-free/NULL dereference +// bugs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_POINTERTRACKING_H +#define LLVM_ANALYSIS_POINTERTRACKING_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/PredIteratorCache.h" + +namespace llvm { + class DominatorTree; + class ScalarEvolution; + class SCEV; + class Loop; + class LoopInfo; + class TargetData; + + // Result from solver, assuming pointer is not NULL, + // and it is not a use-after-free situation. + enum SolverResult { + AlwaysFalse,// always false with above constraints + AlwaysTrue,// always true with above constraints + Unknown // it can sometimes be true, sometimes false, or it is undecided + }; + + class PointerTracking : public FunctionPass { + public: + typedef ICmpInst::Predicate Predicate; + static char ID; + PointerTracking(); + + virtual bool doInitialization(Module &M); + + // If this pointer directly points to an allocation, return + // the number of elements of type Ty allocated. + // Otherwise return CouldNotCompute. + // Since allocations can fail by returning NULL, the real element count + // for every allocation is either 0 or the value returned by this function. + const SCEV *getAllocationElementCount(Value *P) const; + + // Same as getAllocationSize() but returns size in bytes. + // We consider one byte as 8 bits. + const SCEV *getAllocationSizeInBytes(Value *V) const; + + // Given a Pointer, determine a base pointer of known size, and an offset + // therefrom. + // When unable to determine, sets Base to NULL, and Limit/Offset to + // CouldNotCompute. + // BaseSize, and Offset are in bytes: Pointer == Base + Offset + void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize, + const SCEV *&Offset) const; + + // Compares the 2 scalar evolution expressions according to predicate, + // and if it can prove that the result is always true or always false + // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown. + enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B, + const Loop *L); + + // Determines whether the condition LHS RHS is sufficient + // for the condition A B to hold. + // Currently only ULT/ULE is supported. + // This errs on the side of returning false. + bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS, + const SCEV *A, Predicate Pred2, const SCEV *B, + const Loop *L); + + // Determines whether Offset is known to be always in [0, Limit) bounds. + // This errs on the side of returning Unknown. + enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit, + BasicBlock *BB); + + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void print(raw_ostream &OS, const Module* = 0) const; + virtual void print(std::ostream &OS, const Module* = 0) const; + private: + Function *FF; + TargetData *TD; + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; + + Function *callocFunc; + Function *reallocFunc; + PredIteratorCache predCache; + + SmallPtrSet analyzing; + + enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred, + const SCEV *A, const SCEV *B) const; + static bool isMonotonic(const SCEV *S); + bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const; + bool conditionSufficient(Value *Cond, bool negated, + const SCEV *A, Predicate Pred, const SCEV *B); + Value *getConditionToReach(BasicBlock *A, + DomTreeNodeBase *B, + bool &negated); + Value *getConditionToReach(BasicBlock *A, + BasicBlock *B, + bool &negated); + const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const; + const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const; + }; +} +#endif + diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 4891f2446e3..c0cd766abca 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/IntervalPartition.h" #include "llvm/Analysis/LoopVR.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PointerTracking.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Assembly/PrintModulePass.h" @@ -136,6 +137,7 @@ namespace { (void)new llvm::FindUsedTypes(); (void)new llvm::ScalarEvolution(); (void)new llvm::LoopVR(); + (void)new llvm::PointerTracking(); ((llvm::Function*)0)->viewCFGOnly(); llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0); X.add((llvm::Value*)0, 0); // for -print-alias-sets diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp new file mode 100644 index 00000000000..1ae2fe6910f --- /dev/null +++ b/lib/Analysis/PointerTracking.cpp @@ -0,0 +1,261 @@ +//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PointerTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Value.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { +char PointerTracking::ID=0; +PointerTracking::PointerTracking() : FunctionPass(&ID) {} + +bool PointerTracking::runOnFunction(Function &F) { + predCache.clear(); + assert(analyzing.empty()); + FF = &F; + TD = getAnalysisIfAvailable(); + SE = &getAnalysis(); + LI = &getAnalysis(); + DT = &getAnalysis(); + return false; +} + +void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.setPreservesAll(); +} + +bool PointerTracking::doInitialization(Module &M) { + const Type *PTy = PointerType::getUnqual(Type::Int8Ty); + + // Find calloc(i64, i64) or calloc(i32, i32). + callocFunc = M.getFunction("calloc"); + if (callocFunc) { + const FunctionType *Ty = callocFunc->getFunctionType(); + + std::vector args, args2; + args.push_back(Type::Int64Ty); + args.push_back(Type::Int64Ty); + args2.push_back(Type::Int32Ty); + args2.push_back(Type::Int32Ty); + const FunctionType *Calloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Calloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Calloc1Type && Ty != Calloc2Type) + callocFunc = 0; // Give up + } + + // Find realloc(i8*, i64) or realloc(i8*, i32). + reallocFunc = M.getFunction("realloc"); + if (reallocFunc) { + const FunctionType *Ty = reallocFunc->getFunctionType(); + std::vector args, args2; + args.push_back(PTy); + args.push_back(Type::Int64Ty); + args2.push_back(PTy); + args2.push_back(Type::Int32Ty); + + const FunctionType *Realloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Realloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Realloc1Type && Ty != Realloc2Type) + reallocFunc = 0; // Give up + } + return false; +} + +// Calculates the number of elements allocated for pointer P, +// the type of the element is stored in Ty. +const SCEV *PointerTracking::computeAllocationCount(Value *P, + const Type *&Ty) const { + Value *V = P->stripPointerCasts(); + if (AllocationInst *AI = dyn_cast(V)) { + Value *arraySize = AI->getArraySize(); + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (GlobalVariable *GV = dyn_cast(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast(C->getType())) { + Ty = ATy->getElementType(); + return SE->getConstant(Type::Int32Ty, ATy->getNumElements()); + } + } + Ty = GV->getType(); + return SE->getConstant(Type::Int32Ty, 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast(V)) { + CallSite CS(CI); + Function *F = dyn_cast(CS.getCalledValue()->stripPointerCasts()); + const Loop *L = LI->getLoopFor(CI->getParent()); + if (F == callocFunc) { + Ty = Type::Int8Ty; + // calloc allocates arg0*arg1 bytes. + return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)), + SE->getSCEV(CS.getArgument(1))), + L); + } else if (F == reallocFunc) { + Ty = Type::Int8Ty; + // realloc allocates arg1 bytes. + return SE->getSCEVAtScope(CS.getArgument(1), L); + } + } + + return SE->getCouldNotCompute(); +} + +// Calculates the number of elements of type Ty allocated for P. +const SCEV *PointerTracking::computeAllocationCountForType(Value *P, + const Type *Ty) + const { + const Type *elementTy; + const SCEV *Count = computeAllocationCount(P, elementTy); + if (isa(Count)) + return Count; + if (elementTy == Ty) + return Count; + + if (!TD) // need TargetData from this point forward + return SE->getCouldNotCompute(); + + uint64_t elementSize = TD->getTypeAllocSize(elementTy); + uint64_t wantSize = TD->getTypeAllocSize(Ty); + if (elementSize == wantSize) + return Count; + if (elementSize % wantSize) //fractional counts not possible + return SE->getCouldNotCompute(); + return SE->getMulExpr(Count, SE->getConstant(Count->getType(), + elementSize/wantSize)); +} + +const SCEV *PointerTracking::getAllocationElementCount(Value *V) const { + // We only deal with pointers. + const PointerType *PTy = cast(V->getType()); + return computeAllocationCountForType(V, PTy->getElementType()); +} + +const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const { + return computeAllocationCountForType(V, Type::Int8Ty); +} + +// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too +enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, + Predicate Pred, + const SCEV *A, + const SCEV *B) const { + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysTrue; + + Pred = ICmpInst::getInversePredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysFalse; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + return Unknown; +} + +enum SolverResult PointerTracking::checkLimits(const SCEV *Offset, + const SCEV *Limit, + BasicBlock *BB) +{ + //FIXME: merge implementation + return Unknown; +} + +void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base, + const SCEV *&Limit, + const SCEV *&Offset) const +{ + Pointer = Pointer->stripPointerCasts(); + Base = Pointer->getUnderlyingObject(); + Limit = getAllocationSizeInBytes(Base); + if (isa(Limit)) { + Base = 0; + Offset = Limit; + return; + } + + Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base)); + if (isa(Offset)) { + Base = 0; + Limit = Offset; + } +} + +void PointerTracking::print(raw_ostream &OS, const Module* M) const { + // Calling some PT methods may cause caches to be updated, however + // this should be safe for the same reason its safe for SCEV. + PointerTracking &PT = *const_cast(this); + for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { + if (!isa(I->getType())) + continue; + Value *Base; + const SCEV *Limit, *Offset; + getPointerOffset(&*I, Base, Limit, Offset); + if (!Base) + continue; + + if (Base == &*I) { + const SCEV *S = getAllocationElementCount(Base); + OS << *Base << " ==> " << *S << " elements, "; + OS << *Limit << " bytes allocated\n"; + continue; + } + OS << &*I << " -- base: " << *Base; + OS << " offset: " << *Offset; + + enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent()); + switch (res) { + case AlwaysTrue: + OS << " always safe\n"; + break; + case AlwaysFalse: + OS << " always unsafe\n"; + break; + case Unknown: + OS << " <>\n"; + break; + } + } +} + +void PointerTracking::print(std::ostream &o, const Module* M) const { + raw_os_ostream OS(o); + print(OS, M); +} + +static RegisterPass X("pointertracking", + "Track pointer bounds", false, true); +} diff --git a/test/Analysis/PointerTracking/dg.exp b/test/Analysis/PointerTracking/dg.exp new file mode 100644 index 00000000000..f2005891a59 --- /dev/null +++ b/test/Analysis/PointerTracking/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll new file mode 100644 index 00000000000..5da4dcc6f86 --- /dev/null +++ b/test/Analysis/PointerTracking/sizes.ll @@ -0,0 +1,84 @@ +; RUN: llvm-as < %s | opt -pointertracking -analyze | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" +@.str = internal constant [5 x i8] c"1234\00" ; <[5 x i8]*> [#uses=1] +@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8 ; [#uses=1] +@test1a = global [5 x i8] c"1234\00", align 1 ; <[5 x i8]*> [#uses=1] +@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4 ; <[5 x i32]*> [#uses=2] +@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8 ; [#uses=1] +@test0p = common global i32* null, align 8 ; [#uses=1] +@test0i = common global i32 0, align 4 ; [#uses=1] + +define i32 @foo0() nounwind { +entry: + %tmp = load i32** @test0p ; [#uses=1] + %conv = bitcast i32* %tmp to i8* ; [#uses=1] + %call = tail call i32 @bar(i8* %conv) nounwind ; [#uses=1] + %tmp1 = load i8** @test1p ; [#uses=1] + %call2 = tail call i32 @bar(i8* %tmp1) nounwind ; [#uses=1] + %call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind ; [#uses=1] + %call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind ; [#uses=1] + %tmp7 = load i32** @test2p ; [#uses=1] + %conv8 = bitcast i32* %tmp7 to i8* ; [#uses=1] + %call9 = tail call i32 @bar(i8* %conv8) nounwind ; [#uses=1] + %call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind ; [#uses=1] + %add = add i32 %call2, %call ; [#uses=1] + %add4 = add i32 %add, %call3 ; [#uses=1] + %add6 = add i32 %add4, %call5 ; [#uses=1] + %add10 = add i32 %add6, %call9 ; [#uses=1] + %add12 = add i32 %add10, %call11 ; [#uses=1] + ret i32 %add12 +} + +declare i32 @bar(i8*) + +define i32 @foo1(i32 %n) nounwind { +entry: +; CHECK: 'foo1': + %test4a = alloca [10 x i8], align 1 ; <[10 x i8]*> [#uses=1] +; CHECK: %test4a = +; CHECK: ==> 1 elements, 10 bytes allocated + %test6a = alloca [10 x i32], align 4 ; <[10 x i32]*> [#uses=1] +; CHECK: %test6a = +; CHECK: ==> 1 elements, 40 bytes allocated + %vla = alloca i8, i32 %n, align 1 ; [#uses=1] +; CHECK: %vla = +; CHECK: ==> %n elements, %n bytes allocated + %0 = shl i32 %n, 2 ; [#uses=1] + %vla7 = alloca i8, i32 %0, align 1 ; [#uses=1] +; CHECK: %vla7 = +; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated + %call = call i32 @bar(i8* %vla) nounwind ; [#uses=1] + %arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0 ; [#uses=1] + %call10 = call i32 @bar(i8* %arraydecay) nounwind ; [#uses=1] + %call11 = call i32 @bar(i8* %vla7) nounwind ; [#uses=1] + %ptrconv14 = bitcast [10 x i32]* %test6a to i8* ; [#uses=1] + %call15 = call i32 @bar(i8* %ptrconv14) nounwind ; [#uses=1] + %add = add i32 %call10, %call ; [#uses=1] + %add12 = add i32 %add, %call11 ; [#uses=1] + %add16 = add i32 %add12, %call15 ; [#uses=1] + ret i32 %add16 +} + +define i32 @foo2(i32 %n) nounwind { +entry: + %call = malloc i8, i32 %n ; [#uses=1] +; CHECK: %call = +; CHECK: ==> %n elements, %n bytes allocated + %call2 = tail call i8* @calloc(i64 2, i64 4) nounwind ; [#uses=1] +; CHECK: %call2 = +; CHECK: ==> 8 elements, 8 bytes allocated + %call4 = tail call i8* @realloc(i8* null, i64 16) nounwind ; [#uses=1] +; CHECK: %call4 = +; CHECK: ==> 16 elements, 16 bytes allocated + %call6 = tail call i32 @bar(i8* %call) nounwind ; [#uses=1] + %call8 = tail call i32 @bar(i8* %call2) nounwind ; [#uses=1] + %call10 = tail call i32 @bar(i8* %call4) nounwind ; [#uses=1] + %add = add i32 %call8, %call6 ; [#uses=1] + %add11 = add i32 %add, %call10 ; [#uses=1] + ret i32 %add11 +} + +declare noalias i8* @calloc(i64, i64) nounwind + +declare noalias i8* @realloc(i8* nocapture, i64) nounwind