1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[ARM] Constant long shift combines

This changes the way that asrl and lsrl intrinsics are lowered, going
via a the ISEL ASRL and LSLL nodes instead of straight to machine nodes.
On top of that, it adds some constant folds for long shifts, in case it
turns out that the shift amount was either constant or 0.

Differential Revision: https://reviews.llvm.org/D75553
This commit is contained in:
David Green 2020-03-12 19:40:27 +00:00
parent 8574f053c4
commit 08ce568726
4 changed files with 78 additions and 90 deletions

View File

@ -4681,12 +4681,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_mve_sqrshrl:
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
return;
case Intrinsic::arm_mve_lsll:
SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
return;
case Intrinsic::arm_mve_asrl:
SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
return;
case Intrinsic::arm_mve_vadc:
case Intrinsic::arm_mve_vadc_predicated:

View File

@ -3808,6 +3808,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_mve_vreinterpretq:
return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
case Intrinsic::arm_mve_lsll:
return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::arm_mve_asrl:
return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@ -14138,6 +14144,34 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
// uses of the intrinsics.
if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
int ShiftAmt = C->getSExtValue();
if (ShiftAmt == 0) {
SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
DAG.ReplaceAllUsesWith(N, Merge.getNode());
return SDValue();
}
if (ShiftAmt >= -32 && ShiftAmt < 0) {
unsigned NewOpcode =
N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
DAG.getConstant(-ShiftAmt, DL, MVT::i32));
DAG.ReplaceAllUsesWith(N, NewShift.getNode());
return NewShift;
}
}
return SDValue();
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
@ -15033,6 +15067,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVCMPCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
case ARMISD::ASRL:
case ARMISD::LSRL:
case ARMISD::LSLL:
return PerformLongShiftCombine(N, DCI.DAG);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);

View File

@ -7,8 +7,6 @@ declare {i32, i32} @llvm.arm.mve.lsll(i32, i32, i32)
define i64 @asrl_0(i64 %X) {
; CHECK-LABEL: asrl_0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -27,8 +25,7 @@ entry:
define i64 @asrl_23(i64 %X) {
; CHECK-LABEL: asrl_23:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #23
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #23
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -47,8 +44,7 @@ entry:
define i64 @asrl_32(i64 %X) {
; CHECK-LABEL: asrl_32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -127,8 +123,7 @@ entry:
define i64 @asrl_m2(i64 %X) {
; CHECK-LABEL: asrl_m2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #1
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -147,8 +142,7 @@ entry:
define i64 @asrl_m32(i64 %X) {
; CHECK-LABEL: asrl_m32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -210,8 +204,6 @@ entry:
define i64 @lsll_0(i64 %X) {
; CHECK-LABEL: lsll_0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -230,8 +222,7 @@ entry:
define i64 @lsll_23(i64 %X) {
; CHECK-LABEL: lsll_23:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #23
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #23
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -250,8 +241,7 @@ entry:
define i64 @lsll_32(i64 %X) {
; CHECK-LABEL: lsll_32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -330,8 +320,7 @@ entry:
define i64 @lsll_m2(i64 %X) {
; CHECK-LABEL: lsll_m2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #1
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -350,8 +339,7 @@ entry:
define i64 @lsll_m32(i64 %X) {
; CHECK-LABEL: lsll_m32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32

View File

@ -7,8 +7,7 @@ declare {i32, i32} @llvm.arm.mve.lsll(i32, i32, i32)
define i32 @ashr_demand_bottom3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #3
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -28,8 +27,7 @@ entry:
define i32 @lsll_demand_bottom3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -49,8 +47,7 @@ entry:
define i32 @ashr_demand_bottomm3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -70,8 +67,7 @@ entry:
define i32 @lsll_demand_bottomm3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #3
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -92,8 +88,7 @@ entry:
define i32 @ashr_demand_bottom31(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #31
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -113,8 +108,7 @@ entry:
define i32 @lsll_demand_bottom31(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #31
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -134,8 +128,7 @@ entry:
define i32 @ashr_demand_bottomm31(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #30
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #31
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -155,8 +148,7 @@ entry:
define i32 @lsll_demand_bottomm31(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #30
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -177,8 +169,7 @@ entry:
define i32 @ashr_demand_bottom32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -198,8 +189,7 @@ entry:
define i32 @lsll_demand_bottom32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -219,8 +209,7 @@ entry:
define i32 @ashr_demand_bottomm32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -240,8 +229,7 @@ entry:
define i32 @lsll_demand_bottomm32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %X, 32
@ -352,8 +340,7 @@ entry:
define i32 @ashr_demand_top3(i64 %X) {
; CHECK-LABEL: ashr_demand_top3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #3
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -375,8 +362,7 @@ entry:
define i32 @lsll_demand_top3(i64 %X) {
; CHECK-LABEL: lsll_demand_top3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -398,8 +384,7 @@ entry:
define i32 @ashr_demand_topm3(i64 %X) {
; CHECK-LABEL: ashr_demand_topm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -421,8 +406,7 @@ entry:
define i32 @lsll_demand_topm3(i64 %X) {
; CHECK-LABEL: lsll_demand_topm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #3
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -445,8 +429,7 @@ entry:
define i32 @ashr_demand_top31(i64 %X) {
; CHECK-LABEL: ashr_demand_top31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #31
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -468,8 +451,7 @@ entry:
define i32 @lsll_demand_top31(i64 %X) {
; CHECK-LABEL: lsll_demand_top31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #31
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -491,8 +473,7 @@ entry:
define i32 @ashr_demand_topm31(i64 %X) {
; CHECK-LABEL: ashr_demand_topm31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #30
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #31
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -514,8 +495,7 @@ entry:
define i32 @lsll_demand_topm31(i64 %X) {
; CHECK-LABEL: lsll_demand_topm31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #30
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -538,8 +518,7 @@ entry:
define i32 @ashr_demand_top32(i64 %X) {
; CHECK-LABEL: ashr_demand_top32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #32
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -561,8 +540,7 @@ entry:
define i32 @lsll_demand_top32(i64 %X) {
; CHECK-LABEL: lsll_demand_top32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -584,8 +562,7 @@ entry:
define i32 @ashr_demand_topm32(i64 %X) {
; CHECK-LABEL: ashr_demand_topm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -607,8 +584,7 @@ entry:
define i32 @lsll_demand_topm32(i64 %X) {
; CHECK-LABEL: lsll_demand_topm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
entry:
@ -725,8 +701,7 @@ entry:
define i32 @ashr_demand_bottommask3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommask3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #3
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -748,8 +723,7 @@ entry:
define i32 @lsll_demand_bottommask3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommask3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -771,8 +745,7 @@ entry:
define i32 @ashr_demand_bottommaskm3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommaskm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #3
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -794,8 +767,7 @@ entry:
define i32 @lsll_demand_bottommaskm3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommaskm3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #2
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #3
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -818,8 +790,7 @@ entry:
define i32 @ashr_demand_bottommask32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommask32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: asrl r0, r1, #32
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -841,8 +812,7 @@ entry:
define i32 @lsll_demand_bottommask32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommask32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #32
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -864,8 +834,7 @@ entry:
define i32 @ashr_demand_bottommaskm32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommaskm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: lsll r0, r1, #32
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry:
@ -887,8 +856,7 @@ entry:
define i32 @lsll_demand_bottommaskm32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommaskm32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r2, #31
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: bic r0, r0, #1
; CHECK-NEXT: bx lr
entry: