From dd6ae694d679fdb80a408d4cef1af0e25a0dbdef Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 23 Jan 2012 21:01:15 +0000 Subject: [PATCH] Fix PR11829. PostRA LICM was too aggressive. This fixes a typo in r148589. llvm-svn: 148724 --- lib/CodeGen/MachineLICM.cpp | 8 +- test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll | 105 +++++++++++++++++++++ 2 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b9028ec6818..49a109e252c 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -462,13 +462,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI, // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) { - if (PhysRegDefs.test(Reg)) - PhysRegClobbers.set(Reg); - if (PhysRegClobbers.test(Reg)) + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + if (PhysRegClobbers.test(*AS)) // MI defined register is seen defined by another instruction in // the loop, it cannot be a LICM candidate. RuledOut = true; - PhysRegDefs.set(Reg); + PhysRegDefs.set(*AS); } } diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll new file mode 100644 index 00000000000..926daafbb7f --- /dev/null +++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs +; PR11829 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + br label %bb3 + +bb3: ; preds = %bb4, %bb2 + %tmp = icmp slt i32 undef, undef + br i1 %tmp, label %bb4, label %bb67 + +bb4: ; preds = %bb3 + %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp6 = and <4 x i32> %tmp5, + %tmp7 = or <4 x i32> %tmp6, + %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float> + %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>) + %tmp10 = fmul <4 x float> undef, %tmp9 + %tmp11 = fadd <4 x float> undef, %tmp10 + %tmp12 = bitcast <4 x float> zeroinitializer to i128 + %tmp13 = lshr i128 %tmp12, 64 + %tmp14 = trunc i128 %tmp13 to i64 + %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1 + %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind + %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind + %tmp18 = fmul <4 x float> %tmp17, %tmp16 + %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind + %tmp20 = fmul <4 x float> %tmp19, %tmp18 + %tmp21 = fmul <4 x float> %tmp20, zeroinitializer + %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind + call arm_aapcs_vfpcc void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind + %tmp23 = bitcast <4 x float> %tmp22 to i128 + %tmp24 = trunc i128 %tmp23 to i64 + %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0 + %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1 + %tmp27 = load float* undef, align 4, !tbaa !2 + %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3 + %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp30 = and <4 x i32> %tmp29, + %tmp31 = or <4 x i32> %tmp30, + %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float> + %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>) + %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind + %tmp35 = fmul <4 x float> %tmp34, undef + %tmp36 = fmul <4 x float> %tmp35, undef + %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp38 = load float* undef, align 4, !tbaa !2 + %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0 + %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp41 = load float* undef, align 4, !tbaa !2 + %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3 + %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer + %tmp44 = fmul <4 x float> %tmp33, %tmp43 + %tmp45 = fadd <4 x float> %tmp42, %tmp44 + %tmp46 = fsub <4 x float> %tmp45, undef + %tmp47 = fmul <4 x float> %tmp46, %tmp36 + %tmp48 = fadd <4 x float> undef, %tmp47 + %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp50 = load float* undef, align 4, !tbaa !2 + %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3 + %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind + %tmp54 = load float* %tmp52, align 4, !tbaa !2 + %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3 + %tmp56 = fsub <4 x float> , %tmp22 + %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind + %tmp58 = fmul <4 x float> undef, %tmp57 + %tmp59 = fsub <4 x float> %tmp51, %tmp48 + %tmp60 = fsub <4 x float> , %tmp58 + %tmp61 = fmul <4 x float> %tmp59, %tmp60 + %tmp62 = fadd <4 x float> %tmp48, %tmp61 + call arm_aapcs_vfpcc void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef) + %tmp63 = bitcast <4 x float> %tmp62 to i128 + %tmp64 = lshr i128 %tmp63, 64 + %tmp65 = trunc i128 %tmp64 to i64 + %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1 + call arm_aapcs_vfpcc void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef) + br label %bb3 + +bb67: ; preds = %bb3 + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64]) + +declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} +!2 = metadata !{metadata !"float", metadata !0}