1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[ARM] Lower "(x<<c) > 0x80000000U" to "lsls" on Thumb1.

This is extremely specific, but saves three instructions when it's
legal.  I don't think the code can be usefully generalized.

Differential Revision: https://reviews.llvm.org/D65351

llvm-svn: 367492
This commit is contained in:
Eli Friedman 2019-07-31 23:19:21 +00:00
parent c986dd14c4
commit 910badf6bd
6 changed files with 36 additions and 8 deletions

View File

@ -2298,6 +2298,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
{ARM::tRSBS, ARM::tRSB},
{ARM::tLSLSri, ARM::tLSLri},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},

View File

@ -1490,6 +1490,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::ADDE: return "ARMISD::ADDE";
case ARMISD::SUBC: return "ARMISD::SUBC";
case ARMISD::SUBE: return "ARMISD::SUBE";
case ARMISD::LSLS: return "ARMISD::LSLS";
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
@ -4110,6 +4111,30 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
// The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
// single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
// way a cmp would.
// FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
// some tweaks to the heuristics for the previous and->shift transform.
// FIXME: Optimize cases where the LHS isn't a shift.
if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(RHS) &&
cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
unsigned ShiftAmt =
cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
DAG.getVTList(MVT::i32, MVT::i32),
LHS.getOperand(0),
DAG.getConstant(ShiftAmt, dl, MVT::i32));
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
Shift.getValue(1), SDValue());
ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
return Chain.getValue(1);
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
// If the RHS is a constant zero then the V (overflow) flag will never be

View File

@ -103,6 +103,7 @@ class VectorType;
ADDE, // Add using carry
SUBC, // Sub with carry
SUBE, // Sub using carry
LSLS, // Shift left producing carry
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.

View File

@ -194,6 +194,7 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>;
def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;

View File

@ -1362,6 +1362,12 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
[(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tLSLSri : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, imm0_31:$imm5),
2, IIC_iALUr,
[(set tGPR:$Rd, CPSR, (ARMlsls tGPR:$Rn, imm0_31:$imm5))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
}

View File

@ -31,10 +31,7 @@ define void @test2(i32 %x, void ()* %f) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: lsls r0, r0, #7
; CHECK-NEXT: cmp r0, r2
; CHECK-NEXT: lsls r0, r0, #8
; CHECK-NEXT: bhi .LBB1_2
; CHECK-NEXT: @ %bb.1: @ %if.then
; CHECK-NEXT: blx r1
@ -58,10 +55,7 @@ define void @test3(i32 %x, void ()* %f) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: lsls r0, r0, #2
; CHECK-NEXT: cmp r0, r2
; CHECK-NEXT: lsls r0, r0, #3
; CHECK-NEXT: bhi .LBB2_2
; CHECK-NEXT: @ %bb.1: @ %if.then
; CHECK-NEXT: blx r1