diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index efdd6a7ed6e..94f979a3002 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -574,6 +574,27 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. bool Value::isDereferenceablePointer(const DataLayout *DL) const { + // When dereferenceability information is provided by a dereferenceable + // attribute, we know exactly how many bytes are dereferenceable. If we can + // determine the exact offset to the attributed variable, we can use that + // information here. + Type *Ty = getType()->getPointerElementType(); + if (Ty->isSized() && DL) { + APInt Offset(DL->getTypeStoreSizeInBits(getType()), 0); + const Value *BV = stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); + + APInt DerefBytes(Offset.getBitWidth(), 0); + if (const Argument *A = dyn_cast(BV)) + DerefBytes = A->getDereferenceableBytes(); + else if (ImmutableCallSite CS = BV) + DerefBytes = CS.getDereferenceableBytes(0); + + if (DerefBytes.getBoolValue() && Offset.isNonNegative()) { + if (DerefBytes.uge(Offset + DL->getTypeStoreSize(Ty))) + return true; + } + } + SmallPtrSet Visited; return ::isDereferenceablePointer(this, DL, Visited); } diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll index 972d75fff35..c230d1dcd43 100644 --- a/test/Transforms/LICM/hoist-deref-load.ll +++ b/test/Transforms/LICM/hoist-deref-load.ll @@ -82,5 +82,87 @@ for.end: ; preds = %for.inc, %entry ret void } +; This test represents the following function: +; void test3(int * restrict a, int * restrict b, int c[static 3], int n) { +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = c[2]*b[i]; +; } +; and we want to hoist the load of c[2] out of the loop. This can be done only +; because the dereferenceable attribute is on %c. + +; CHECK-LABEL: @test3 +; CHECK: load i32* %c2, align 4 +; CHECK: for.body: + +define void @test3(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(12) %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %c2 = getelementptr inbounds i32* %c, i64 2 + %1 = load i32* %c2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; This is the same as @test3, but with a dereferenceable attribute on %c with a +; size too small to cover c[2] (and so we should not hoist it). + +; CHECK-LABEL: @test4 +; CHECK: if.then: +; CHECK: load i32* %c2, align 4 + +define void @test4(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(11) %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %c2 = getelementptr inbounds i32* %c, i64 2 + %1 = load i32* %c2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + attributes #0 = { nounwind uwtable }