diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 4bb8c436564..464ed97506f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1033,11 +1033,14 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. + unsigned IdxTyBW = IdxTy->getScalarSizeInBits(); if (Count->getType() != IdxTy) { // The exit count can be of pointer type. Convert it to the correct // integer type. if (ExitCount->getType()->isPointerTy()) Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); + else if (IdxTyBW < Count->getType()->getScalarSizeInBits()) + Count = CastInst::CreateTruncOrBitCast(Count, IdxTy, "tr.cnt", Loc); else Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); } diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll new file mode 100644 index 00000000000..7759b7085a1 --- /dev/null +++ b/test/Transforms/LoopVectorize/i8-induction.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global i8 0, align 1 +@b = common global i8 0, align 1 + +define void @f() nounwind uwtable ssp { +scalar.ph: + store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0 + %0 = load i8* @a, align 1, !tbaa !0 + br label %for.body + +for.body: + %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ] ; <------- i8 induction var. + %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ] + %conv2 = sext i8 %c.015 to i32 + %tobool = icmp ne i8 %c.015, 0 + %.sink = select i1 %tobool, i8 %c.015, i8 %0 + %mul = mul i8 %mul16, %.sink + %add = add nsw i32 %conv2, 1 + %conv8 = trunc i32 %add to i8 + %sext = shl i32 %add, 24 + %phitmp14 = icmp slt i32 %sext, 268435456 + br i1 %phitmp14, label %for.body, label %for.end + +for.end: ; preds = %for.body + store i8 %mul, i8* @b, align 1, !tbaa !0 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +