TwoAddressInstructionPass doesn't really know how to merge live intervals when

lowering REG_SEQUENCE instructions. Insert copies for REG_SEQUENCE sources not killed to avoid breaking later passes. llvm-svn: 104146
2024-10-19 19:12:56 +02:00 · 2010-05-19 20:08:00 +00:00 · 2010-05-19 20:08:00 +00:00 · 6a2bfde3c8
commit 6a2bfde3c8
parent e3e13b67e2
2 changed files with 23 additions and 1 deletions
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@ -1275,9 +1275,10 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {

      if (!Seen.insert(SrcReg) ||
          MI->getParent() != DefMI->getParent() ||
+          !MI->getOperand(i).isKill() ||
          HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
        // REG_SEQUENCE cannot have duplicated operands, add a copy.
-        // Also add an copy if the source if live-in the block. We don't want
+        // Also add an copy if the source is live-in the block. We don't want
        // to end up with a partial-redef of a livein, e.g.
        // BB0:
        // reg1051:10<def> =
@ -1287,6 +1288,10 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
        // BB2:
        // reg1051:9<def> =
        // LiveIntervalAnalysis won't like it.
+        //
+        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+        // correctly up to date becomes very difficult. Insert a copy.
+        //
        const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
        unsigned NewReg = MRI->createVirtualRegister(RC);
        MachineBasicBlock::iterator InsertLoc = MI;
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@ -302,6 +302,23 @@ bb14:                                             ; preds = %bb6
  ret i32 0
 }

+; This test crashes the coalescer because live variables were not updated properly.
+define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d  ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
+  %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh)
+  %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd       ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> undef, %tmp2               ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7)
+  ret <8 x i8> undef
+}
+
 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly

 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly