diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 19afe4157dc..a5b862bf930 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -147,7 +147,7 @@ private: static const unsigned MaxDepth = 3; bool isConsecutiveAccess(Value *A, Value *B); - bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth = 0) const; bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const; @@ -336,18 +336,29 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, - const APInt &PtrDelta, - unsigned Depth) const { + APInt PtrDelta, unsigned Depth) const { unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); APInt OffsetA(PtrBitWidth, 0); APInt OffsetB(PtrBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); - if (DL.getTypeStoreSizeInBits(PtrA->getType()) != PtrBitWidth || - DL.getTypeStoreSizeInBits(PtrB->getType()) != PtrBitWidth) + unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType()); + + if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType())) return false; + // In case if we have to shrink the pointer + // stripAndAccumulateInBoundsConstantOffsets should properly handle a + // possible overflow and the value should fit into a smallest data type + // used in the cast/gep chain. + assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth && + OffsetB.getMinSignedBits() <= NewPtrBitWidth); + + OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth); + OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth); + PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth); + APInt OffsetDelta = OffsetB - OffsetA; // Check if they are based on the same pointer. That makes the offsets diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll index 1e9ffdba341..1cb8d14f177 100644 --- a/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll +++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll @@ -54,4 +54,43 @@ entry: ret void } +; CHECK-LABEL: @ext_ptr +; CHECK load <2 x i32> +define void @ext_ptr(i32 addrspace(5)* %p) { +entry: + %gep1 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 0 + %gep2 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 1 + %a.ascast = addrspacecast i32 addrspace(5)* %gep1 to i32* + %b.ascast = addrspacecast i32 addrspace(5)* %gep2 to i32* + %tmp1 = load i32, i32* %a.ascast, align 8 + %tmp2 = load i32, i32* %b.ascast, align 8 + unreachable +} + +; CHECK-LABEL: @shrink_ptr +; CHECK load <2 x i32> +define void @shrink_ptr(i32* %p) { +entry: + %gep1 = getelementptr inbounds i32, i32* %p, i64 0 + %gep2 = getelementptr inbounds i32, i32* %p, i64 1 + %a.ascast = addrspacecast i32* %gep1 to i32 addrspace(5)* + %b.ascast = addrspacecast i32* %gep2 to i32 addrspace(5)* + %tmp1 = load i32, i32 addrspace(5)* %a.ascast, align 8 + %tmp2 = load i32, i32 addrspace(5)* %b.ascast, align 8 + unreachable +} + +; CHECK-LABEL: @ext_ptr_wrap +; CHECK: load <2 x i8> +define void @ext_ptr_wrap(i8 addrspace(5)* %p) { +entry: + %gep1 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 0 + %gep2 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 4294967295 + %a.ascast = addrspacecast i8 addrspace(5)* %gep1 to i8* + %b.ascast = addrspacecast i8 addrspace(5)* %gep2 to i8* + %tmp1 = load i8, i8* %a.ascast, align 1 + %tmp2 = load i8, i8* %b.ascast, align 1 + unreachable +} + !0 = !{}