|
|
|
@ -195,23 +195,23 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -223,7 +223,7 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -263,24 +263,24 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -293,7 +293,7 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !5
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -334,24 +334,24 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -364,7 +364,7 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -404,23 +404,23 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
|
|
|
|
|
; CHECK-NEXT: [[TMP4]] = add <8 x i16> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[WIDE_LOAD]])
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add i16 [[TMP4]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -432,7 +432,7 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !9
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -472,24 +472,24 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16>
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[TMP6]] = add i16 [[TMP5]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP5]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -502,7 +502,7 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -542,23 +542,23 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP4]] = add <16 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[WIDE_LOAD]])
|
|
|
|
|
; CHECK-NEXT: [[TMP5]] = add i8 [[TMP4]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
|
|
|
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP4]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -570,7 +570,7 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -815,7 +815,7 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
|
|
|
|
@ -826,17 +826,17 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
|
|
|
|
|
; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -851,7 +851,7 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -893,7 +893,7 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
|
|
|
|
@ -906,17 +906,17 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
|
|
|
|
|
; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -933,7 +933,7 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -977,7 +977,7 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
@ -990,17 +990,17 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <4 x i32> [[TMP8]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
|
|
|
|
|
; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -1017,7 +1017,7 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -1061,7 +1061,7 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
|
|
|
|
@ -1072,17 +1072,17 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <8 x i16>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i16> [[WIDE_LOAD1]], [[WIDE_LOAD]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8]] = add <8 x i16> [[TMP7]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP7]])
|
|
|
|
|
; CHECK-NEXT: [[TMP9]] = add i16 [[TMP8]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP8]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -1097,7 +1097,7 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -1139,7 +1139,7 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
@ -1152,17 +1152,17 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP7]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i16>
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw <8 x i16> [[TMP8]], [[TMP4]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10]] = add <8 x i16> [[TMP9]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP9]])
|
|
|
|
|
; CHECK-NEXT: [[TMP11]] = add i16 [[TMP10]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP10]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -1179,7 +1179,7 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
@ -1223,7 +1223,7 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
|
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
|
|
|
|
@ -1234,17 +1234,17 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
|
|
|
|
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <16 x i8>*
|
|
|
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1
|
|
|
|
|
; CHECK-NEXT: [[TMP7:%.*]] = mul <16 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8]] = add <16 x i8> [[TMP7]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP7]])
|
|
|
|
|
; CHECK-NEXT: [[TMP9]] = add i8 [[TMP8]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP8]])
|
|
|
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
|
|
|
; CHECK: scalar.ph:
|
|
|
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
|
|
|
; CHECK: for.body:
|
|
|
|
|
; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
|
|
@ -1259,7 +1259,7 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
|
|
|
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
|
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25
|
|
|
|
|
; CHECK: for.cond.cleanup.loopexit:
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
|
|
|
; CHECK: for.cond.cleanup:
|
|
|
|
|
; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
|
|
|
|