diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 14102d1f3dd..6144ecd002e 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -898,6 +898,90 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (TID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); + if (UnfoldTID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector NewMIs; + bool Success = + TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true, /*UnfoldStore=*/false, + NewMIs); + (void)Success; + assert(Success && + "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " + "succeeded!"); + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.isKill()) + LV->replaceKillInstruction(Reg, mi, NewMIs[0]); + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + return false; } diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index ef19d72150a..6f8b89c3240 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -11,12 +11,12 @@ define float @foo(float %x) nounwind { %tmp14 = fadd float %tmp12, %tmp7 ret float %tmp14 -; CHECK: mulss LCPI0_0(%rip) -; CHECK: mulss LCPI0_1(%rip) +; CHECK: mulss +; CHECK: mulss ; CHECK: addss -; CHECK: mulss LCPI0_2(%rip) +; CHECK: mulss ; CHECK: addss -; CHECK: mulss LCPI0_3(%rip) +; CHECK: mulss ; CHECK: addss ; CHECK: ret } diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index 3f4f9ec1d13..b7e69b84bf8 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -465,14 +465,14 @@ bb5: ; preds = %bb3, %entry ; And the one at %bb68, where we want to be sure to use superhero mode: ; CHECK: BB10_10: -; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} -; CHECK-NEXT: mulps 48(%r{{[^,]*}}), %xmm{{.*}} -; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} -; CHECK-NEXT: mulps 32(%r{{[^,]*}}), %xmm{{.*}} -; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} -; CHECK-NEXT: mulps 16(%r{{[^,]*}}), %xmm{{.*}} -; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} -; CHECK-NEXT: mulps (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps 16(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}}) ; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}}) ; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}}) diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll index 9506c9b5db1..d7b08052531 100644 --- a/test/CodeGen/X86/pic.ll +++ b/test/CodeGen/X86/pic.ll @@ -189,7 +189,7 @@ bb12: ; LINUX: call .L7$pb ; LINUX: .L7$pb: ; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb), -; LINUX: addl .LJTI7_0@GOTOFF( +; LINUX: .LJTI7_0@GOTOFF( ; LINUX: jmpl * ; LINUX: .LJTI7_0: diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll index 271ad1aad0b..8ca0b12b547 100644 --- a/test/CodeGen/X86/stack-align.ll +++ b/test/CodeGen/X86/stack-align.ll @@ -9,14 +9,15 @@ target triple = "i686-apple-darwin8" define void @test({ double, double }* byval %z, double* %P) { entry: - %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; [#uses=1] - %tmp1 = load double* %tmp, align 8 ; [#uses=1] - %tmp2 = tail call double @fabs( double %tmp1 ) ; [#uses=1] - ; CHECK: andpd{{.*}}4(%esp), %xmm %tmp3 = load double* @G, align 16 ; [#uses=1] %tmp4 = tail call double @fabs( double %tmp3 ) ; [#uses=1] + volatile store double %tmp4, double* %P + %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; [#uses=1] + %tmp1 = volatile load double* %tmp, align 8 ; [#uses=1] + %tmp2 = tail call double @fabs( double %tmp1 ) ; [#uses=1] + ; CHECK: andpd{{.*}}4(%esp), %xmm %tmp6 = fadd double %tmp4, %tmp2 ; [#uses=1] - store double %tmp6, double* %P, align 8 + volatile store double %tmp6, double* %P, align 8 ret void } diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index d05dff8928b..107bdf9de3e 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -2,9 +2,11 @@ ; Check that lowered arguments on the stack do not overwrite each other. ; Add %in1 %p1 to a different temporary register (%eax). -; CHECK: movl %edi, %eax +; CHECK: movl 32(%rsp), %eax ; Move param %in1 to temp register (%r10d). ; CHECK: movl 40(%rsp), %r10d +; Add %in1 %p1 to a different temporary register (%eax). +; CHECK: addl %edi, %eax ; Move param %in2 to stack. ; CHECK: movl %r10d, 32(%rsp) ; Move result of addition to stack.