diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 020c60cd30c..8c66decaaf5 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -338,10 +338,20 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, if (SrcSize < DestSize) return nullptr; + // Catch the obvious splat cases (since all-zeros can coerce non-integral + // pointers legally). + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! + return Constant::getAllOnesValue(DestTy); + // If the type sizes are the same and a cast is legal, just directly // cast the constant. // But be careful not to coerce non-integral pointers illegally. - if (SrcSize == DestSize) { + if (SrcSize == DestSize && + DL.isNonIntegralPointerType(SrcTy->getScalarType()) == + DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 75da481b8c1..6ff08cd2871 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -403,12 +403,6 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (Offset == -1) return Offset; - // Don't coerce non-integral pointers to integers or vice versa, and the - // memtransfer is implicitly a raw byte code - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) - // TODO: Can allow nullptrs from constant zeros - return -1; - unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll index 133bc71c9ce..a017dda926e 100644 --- a/test/Transforms/GVN/non-integral-pointers.ll +++ b/test/Transforms/GVN/non-integral-pointers.ll @@ -230,6 +230,21 @@ entry: ret i8 addrspace(4)* %ref } +define i64 addrspace(4)* @forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3) +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_memcpy_vload( ; CHECK-NEXT: entry: @@ -278,15 +293,30 @@ entry: ret <4 x i64> %ref } +define <1 x i64 addrspace(4)*> @forward_memcpy_vload3(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcpy_vload3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: ret <1 x i64 addrspace(4)*> +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + %val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0 + %ret = insertelement <1 x i64 addrspace(4)*> undef, i64 addrspace(4)* %val, i32 0 + ret <1 x i64 addrspace(4)*> %ret +} + ; Can forward since we can do so w/o breaking types -; TODO: missed optimization define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @forward_memcpy_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8 -; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; CHECK-NEXT: ret i8 addrspace(4)* null ; entry: %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*