1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[Thumb] Select (CMPZ X, -C) -> (CMPZ (ADDS X, C), 0)

The CMPZ #0 disappears during peepholing, leaving just a tADDi3, tADDi8 or t2ADDri. This avoids having to materialize the expensive negative constant in Thumb-1, and allows a shrinking from a 32-bit CMN to a 16-bit ADDS in Thumb-2.

llvm-svn: 281040
This commit is contained in:
James Molloy 2016-09-09 12:52:24 +00:00
parent 80e498d6dd
commit 36bdf2dfda
6 changed files with 81 additions and 6 deletions

View File

@ -3126,6 +3126,48 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::CMPZ: {
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
// This allows us to avoid materializing the expensive negative constant.
// The CMPZ #0 is useless and will be peepholed away but we need to keep it
// for its glue output.
SDValue X = N->getOperand(0);
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
int64_t Addend = -C->getSExtValue();
SDNode *Add = nullptr;
// In T2 mode, ADDS can be better than CMN if the immediate fits in a
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
// Outside that range we can just use a CMN which is 32-bit but has a
// 12-bit immediate range.
if (Subtarget->isThumb2() && Addend < 1<<8) {
SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
} else if (!Subtarget->isThumb2() && Addend < 1<<8) {
// FIXME: Add T1 tADDi8 code.
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
} else if (!Subtarget->isThumb2() && Addend < 1<<3) {
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
}
}
// Other cases are autogenerated.
break;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s
; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T
; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A
; LSR should compare against the post-incremented induction variable.
; In this case, the immediate value is -2 which requires a cmn instruction.
@ -7,7 +7,8 @@
; CHECK-LABEL: f:
; CHECK: %for.body
; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
; CHECK: cmn{{.*}}[[IV]], #2
; CHECK-T: adds{{.*}}[[IV]], #2
; CHECK-A: cmn{{.*}}[[IV]], #2
; CHECK: bne
define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
entry:

View File

@ -280,7 +280,7 @@ entry:
; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}}
; T2-LABEL: t18:
; T2: and.w r0, {{r[0-9]+}}
; T2: and{{s|.w}} r0, {{r[0-9]+}}
%cmp = icmp ne i32 %x, 0
%cond = select i1 %cmp, i32 5, i32 2
%cmp1 = icmp ne i32 %x, -1

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T1 %s
; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T2 %s
; CHECK-LABEL: addri1:
; CHECK: adds r0, #3
; T1-NEXT: b{{eq|ne}}
; T2-NOT: cmp
define i32 @addri1(i32 %a, i32 %b) {
%c = add i32 %a, 3
%d = icmp eq i32 %c, 0
br i1 %d, label %true, label %false
true:
ret i32 4
false:
ret i32 5
}
; CHECK-LABEL: addri2:
; CHECK: adds r0, #254
; T1-NEXT: b{{eq|ne}}
; T2-NOT: cmp
define i32 @addri2(i32 %a, i32 %b) {
%c = add i32 %a, 254
%d = icmp eq i32 %c, 0
br i1 %d, label %true, label %false
true:
ret i32 4
false:
ret i32 5
}

View File

@ -16,7 +16,7 @@ entry:
bb: ; preds = %bb, %entry
; CHECK: LBB0_1:
; CHECK: subs [[R2:r[0-9]+]], #1
; CHECK: cmp.w [[R2]], #-1
; CHECK: adds {{.*}}, [[R2]], #1
; CHECK: bne LBB0_1
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]

View File

@ -3,7 +3,7 @@
; -0x000000bb = 4294967109
define i1 @f1(i32 %a) {
; CHECK-LABEL: f1:
; CHECK: cmn.w {{r.*}}, #187
; CHECK: adds {{r.*}}, #187
%tmp = icmp ne i32 %a, 4294967109
ret i1 %tmp
}