1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Convert a subtract into a negate and an add when it helps x86

address folding.

llvm-svn: 71446
This commit is contained in:
Dan Gohman 2009-05-11 18:02:53 +00:00
parent f5f153394d
commit 0edabc8a6f
3 changed files with 96 additions and 1 deletions

View File

@ -868,6 +868,76 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
}
break;
case ISD::SUB: {
// Given A-B, if A can be completely folded into the address and
// the index field with the index field unused, use -B as the index.
// This is a win if a has multiple parts that can be folded into
// the address. Also, this saves a mov if the base register has
// other uses, since it avoids a two-address sub instruction, however
// it costs an additional mov if the index register has other uses.
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
// Test if the index field is free for use.
if (AM.IndexReg.getNode() || AM.isRIPRel) {
AM = Backup;
break;
}
int Cost = 0;
SDValue RHS = N.getNode()->getOperand(1);
// If the RHS involves a register with multiple uses, this
// transformation incurs an extra mov, due to the neg instruction
// clobbering its operand.
if (!RHS.getNode()->hasOneUse() ||
RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
(RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
++Cost;
// If the base is a register with multiple uses, this
// transformation may save a mov.
if ((AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.getNode() &&
!AM.Base.Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
// If the folded LHS was interesting, this transformation saves
// address arithmetic.
if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
((AM.Disp != 0) && (Backup.Disp == 0)) +
(AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
--Cost;
// If it doesn't look like it may be an overall win, don't do it.
if (Cost >= 0) {
AM = Backup;
break;
}
// Ok, the transformation is legal and appears profitable. Go for it.
SDValue Zero = CurDAG->getConstant(0, N.getValueType());
SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
AM.IndexReg = Neg;
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
if (Zero.getNode()->getNodeId() == -1 ||
Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
CurDAG->RepositionNode(N.getNode(), Zero.getNode());
Zero.getNode()->setNodeId(N.getNode()->getNodeId());
}
if (Neg.getNode()->getNodeId() == -1 ||
Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
CurDAG->RepositionNode(N.getNode(), Neg.getNode());
Neg.getNode()->setNodeId(N.getNode()->getNodeId());
}
return false;
}
case ISD::ADD: {
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&

View File

@ -217,7 +217,7 @@ def brtarget : Operand<OtherVT>;
// Define X86 specific addressing mode.
def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
[add, mul, shl, or, frameindex], []>;
[add, sub, mul, shl, or, frameindex], []>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.

View File

@ -0,0 +1,25 @@
; RUN: llvm-as < %s | llc -march=x86-64 > %t
; RUN: grep negl %t | count 1
; RUN: not grep sub %t
; RUN: grep mov %t | count 1
; RUN: grep {leal -4(} %t | count 1
; ISel the add of -4 with a neg and use an lea for the rest of the
; arithemtic.
define i32 @test(i32 %x_offs) nounwind readnone {
entry:
%t0 = icmp sgt i32 %x_offs, 4 ; <i1> [#uses=1]
br i1 %t0, label %bb.nph, label %bb2
bb.nph: ; preds = %entry
%tmp = add i32 %x_offs, -5 ; <i32> [#uses=1]
%tmp6 = lshr i32 %tmp, 2 ; <i32> [#uses=1]
%tmp7 = mul i32 %tmp6, -4 ; <i32> [#uses=1]
%tmp8 = add i32 %tmp7, %x_offs ; <i32> [#uses=1]
%tmp9 = add i32 %tmp8, -4 ; <i32> [#uses=1]
ret i32 %tmp9
bb2: ; preds = %entry
ret i32 %x_offs
}