From 0edabc8a6f1d05f8c4e73865d8405d3280ec1ea8 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 11 May 2009 18:02:53 +0000 Subject: [PATCH] Convert a subtract into a negate and an add when it helps x86 address folding. llvm-svn: 71446 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 70 ++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.td | 2 +- test/CodeGen/X86/lea-neg.ll | 25 +++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/lea-neg.ll diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index f12b138abbf..bd1fea71a6d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -868,6 +868,76 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, } break; + case ISD::SUB: { + // Given A-B, if A can be completely folded into the address and + // the index field with the index field unused, use -B as the index. + // This is a win if a has multiple parts that can be folded into + // the address. Also, this saves a mov if the base register has + // other uses, since it avoids a two-address sub instruction, however + // it costs an additional mov if the index register has other uses. + + // Test if the LHS of the sub can be folded. + X86ISelAddressMode Backup = AM; + if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) { + AM = Backup; + break; + } + // Test if the index field is free for use. + if (AM.IndexReg.getNode() || AM.isRIPRel) { + AM = Backup; + break; + } + int Cost = 0; + SDValue RHS = N.getNode()->getOperand(1); + // If the RHS involves a register with multiple uses, this + // transformation incurs an extra mov, due to the neg instruction + // clobbering its operand. + if (!RHS.getNode()->hasOneUse() || + RHS.getNode()->getOpcode() == ISD::CopyFromReg || + RHS.getNode()->getOpcode() == ISD::TRUNCATE || + RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || + (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && + RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) + ++Cost; + // If the base is a register with multiple uses, this + // transformation may save a mov. + if ((AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() && + !AM.Base.Reg.getNode()->hasOneUse()) || + AM.BaseType == X86ISelAddressMode::FrameIndexBase) + --Cost; + // If the folded LHS was interesting, this transformation saves + // address arithmetic. + if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + + ((AM.Disp != 0) && (Backup.Disp == 0)) + + (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) + --Cost; + // If it doesn't look like it may be an overall win, don't do it. + if (Cost >= 0) { + AM = Backup; + break; + } + + // Ok, the transformation is legal and appears profitable. Go for it. + SDValue Zero = CurDAG->getConstant(0, N.getValueType()); + SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); + AM.IndexReg = Neg; + AM.Scale = 1; + + // Insert the new nodes into the topological ordering. + if (Zero.getNode()->getNodeId() == -1 || + Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Zero.getNode()); + Zero.getNode()->setNodeId(N.getNode()->getNodeId()); + } + if (Neg.getNode()->getNodeId() == -1 || + Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Neg.getNode()); + Neg.getNode()->setNodeId(N.getNode()->getNodeId()); + } + return false; + } + case ISD::ADD: { X86ISelAddressMode Backup = AM; if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) && diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d3b6857e68..1f103dd2f30 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -217,7 +217,7 @@ def brtarget : Operand; // Define X86 specific addressing mode. def addr : ComplexPattern; def lea32addr : ComplexPattern; + [add, sub, mul, shl, or, frameindex], []>; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. diff --git a/test/CodeGen/X86/lea-neg.ll b/test/CodeGen/X86/lea-neg.ll new file mode 100644 index 00000000000..61cd75c9cca --- /dev/null +++ b/test/CodeGen/X86/lea-neg.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep negl %t | count 1 +; RUN: not grep sub %t +; RUN: grep mov %t | count 1 +; RUN: grep {leal -4(} %t | count 1 + +; ISel the add of -4 with a neg and use an lea for the rest of the +; arithemtic. + +define i32 @test(i32 %x_offs) nounwind readnone { +entry: + %t0 = icmp sgt i32 %x_offs, 4 ; [#uses=1] + br i1 %t0, label %bb.nph, label %bb2 + +bb.nph: ; preds = %entry + %tmp = add i32 %x_offs, -5 ; [#uses=1] + %tmp6 = lshr i32 %tmp, 2 ; [#uses=1] + %tmp7 = mul i32 %tmp6, -4 ; [#uses=1] + %tmp8 = add i32 %tmp7, %x_offs ; [#uses=1] + %tmp9 = add i32 %tmp8, -4 ; [#uses=1] + ret i32 %tmp9 + +bb2: ; preds = %entry + ret i32 %x_offs +}