mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[DAGCombiner] Add decomposition patterns for Mul-by-Imm.
Summary: This patch is derived from D87384. In this patch we expand the existing decomposition of mul-by-constant to be more general by implementing 2 patterns: ``` mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M)) mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M)) ``` The conversion will be trigged if the multiplier is a big constant that the target can't use a single multiplication instruction to handle. This is controlled by the hook `decomposeMulByConstant`. More over, the conversion benefits from an ILP improvement since the instructions are independent. A case with the sequence like following also gets benefit since a shift instruction is saved. ``` *res1 = a * 0x8800; *res2 = a * 0x8080; ``` Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D88201
This commit is contained in:
parent
47f4e42b78
commit
725cbaf080
@ -3622,19 +3622,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
|
||||
getShiftAmountTy(N0.getValueType()))));
|
||||
}
|
||||
|
||||
// Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
|
||||
// Try to transform:
|
||||
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
|
||||
// mul x, (2^N + 1) --> add (shl x, N), x
|
||||
// mul x, (2^N - 1) --> sub (shl x, N), x
|
||||
// Examples: x * 33 --> (x << 5) + x
|
||||
// x * 15 --> (x << 4) - x
|
||||
// x * -33 --> -((x << 5) + x)
|
||||
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
|
||||
// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
|
||||
// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
|
||||
// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
|
||||
// Examples: x * 0x8800 --> (x << 15) + (x << 11)
|
||||
// x * 0xf800 --> (x << 16) - (x << 11)
|
||||
// x * -0x8800 --> -((x << 15) + (x << 11))
|
||||
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
|
||||
if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
|
||||
// TODO: We could handle more general decomposition of any constant by
|
||||
// having the target set a limit on number of ops and making a
|
||||
// callback to determine that sequence (similar to sqrt expansion).
|
||||
unsigned MathOp = ISD::DELETED_NODE;
|
||||
APInt MulC = ConstValue1.abs();
|
||||
// The constant `2` should be treated as (2^0 + 1).
|
||||
unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
|
||||
MulC.lshrInPlace(TZeros);
|
||||
if ((MulC - 1).isPowerOf2())
|
||||
MathOp = ISD::ADD;
|
||||
else if ((MulC + 1).isPowerOf2())
|
||||
@ -3643,12 +3654,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
|
||||
if (MathOp != ISD::DELETED_NODE) {
|
||||
unsigned ShAmt =
|
||||
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
|
||||
ShAmt += TZeros;
|
||||
assert(ShAmt < VT.getScalarSizeInBits() &&
|
||||
"multiply-by-constant generated out of bounds shift");
|
||||
SDLoc DL(N);
|
||||
SDValue Shl =
|
||||
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
|
||||
SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
|
||||
SDValue R =
|
||||
TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
|
||||
DAG.getNode(ISD::SHL, DL, VT, N0,
|
||||
DAG.getConstant(TZeros, DL, VT)))
|
||||
: DAG.getNode(MathOp, DL, VT, Shl, N0);
|
||||
if (ConstValue1.isNegative())
|
||||
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
|
||||
return R;
|
||||
|
@ -16057,6 +16057,32 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
|
||||
SDValue C) const {
|
||||
// Check integral scalar types.
|
||||
if (!VT.isScalarInteger())
|
||||
return false;
|
||||
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
|
||||
if (!ConstNode->getAPIntValue().isSignedIntN(64))
|
||||
return false;
|
||||
// This transformation will generate >= 2 operations. But the following
|
||||
// cases will generate <= 2 instructions during ISEL. So exclude them.
|
||||
// 1. If the constant multiplier fits 16 bits, it can be handled by one
|
||||
// HW instruction, ie. MULLI
|
||||
// 2. If the multiplier after shifted fits 16 bits, an extra shift
|
||||
// instruction is needed than case 1, ie. MULLI and RLDICR
|
||||
int64_t Imm = ConstNode->getSExtValue();
|
||||
unsigned Shift = countTrailingZeros<uint64_t>(Imm);
|
||||
Imm >>= Shift;
|
||||
if (isInt<16>(Imm))
|
||||
return false;
|
||||
if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
|
||||
isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const {
|
||||
return isFMAFasterThanFMulAndFAdd(
|
||||
|
@ -931,6 +931,9 @@ namespace llvm {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
|
||||
SDValue C) const override;
|
||||
|
||||
bool isDesirableToTransformToIntegerOp(unsigned Opc,
|
||||
EVT VT) const override {
|
||||
// Only handle float load/store pair because float(fpr) load/store
|
||||
|
@ -48,10 +48,9 @@ define i64 @test4(i64 %x) {
|
||||
define i64 @test5(i64 %x) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, 16
|
||||
; CHECK-NEXT: ori 4, 4, 1
|
||||
; CHECK-NEXT: sldi 4, 4, 12
|
||||
; CHECK-NEXT: mulld 3, 3, 4
|
||||
; CHECK-NEXT: sldi 4, 3, 12
|
||||
; CHECK-NEXT: sldi 3, 3, 32
|
||||
; CHECK-NEXT: add 3, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
%y = mul i64 %x, 4294971392
|
||||
ret i64 %y
|
||||
@ -60,10 +59,10 @@ define i64 @test5(i64 %x) {
|
||||
define i64 @test6(i64 %x) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, -17
|
||||
; CHECK-NEXT: ori 4, 4, 65535
|
||||
; CHECK-NEXT: sldi 4, 4, 12
|
||||
; CHECK-NEXT: mulld 3, 3, 4
|
||||
; CHECK-NEXT: sldi 4, 3, 12
|
||||
; CHECK-NEXT: sldi 3, 3, 32
|
||||
; CHECK-NEXT: add 3, 3, 4
|
||||
; CHECK-NEXT: neg 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%y = mul i64 %x, -4294971392
|
||||
ret i64 %y
|
||||
@ -72,10 +71,9 @@ define i64 @test6(i64 %x) {
|
||||
define i64 @test7(i64 %x) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, 31
|
||||
; CHECK-NEXT: ori 4, 4, 65535
|
||||
; CHECK-NEXT: sldi 4, 4, 13
|
||||
; CHECK-NEXT: mulld 3, 3, 4
|
||||
; CHECK-NEXT: sldi 4, 3, 34
|
||||
; CHECK-NEXT: sldi 3, 3, 13
|
||||
; CHECK-NEXT: sub 3, 4, 3
|
||||
; CHECK-NEXT: blr
|
||||
%y = mul i64 %x, 17179860992
|
||||
ret i64 %y
|
||||
@ -84,10 +82,9 @@ define i64 @test7(i64 %x) {
|
||||
define i64 @test8(i64 %x) {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, -4
|
||||
; CHECK-NEXT: sldi 4, 4, 32
|
||||
; CHECK-NEXT: ori 4, 4, 8192
|
||||
; CHECK-NEXT: mulld 3, 3, 4
|
||||
; CHECK-NEXT: sldi 4, 3, 13
|
||||
; CHECK-NEXT: sldi 3, 3, 34
|
||||
; CHECK-NEXT: sub 3, 4, 3
|
||||
; CHECK-NEXT: blr
|
||||
%y = mul i64 %x, -17179860992
|
||||
ret i64 %y
|
||||
@ -96,12 +93,11 @@ define i64 @test8(i64 %x) {
|
||||
define i64 @test9(i64 %x) {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, 16
|
||||
; CHECK-NEXT: sldi 4, 3, 12
|
||||
; CHECK-NEXT: sldi 5, 3, 32
|
||||
; CHECK-NEXT: add 4, 5, 4
|
||||
; CHECK-NEXT: li 5, 8193
|
||||
; CHECK-NEXT: ori 4, 4, 1
|
||||
; CHECK-NEXT: sldi 5, 5, 19
|
||||
; CHECK-NEXT: sldi 4, 4, 12
|
||||
; CHECK-NEXT: mulld 4, 3, 4
|
||||
; CHECK-NEXT: mulld 3, 3, 5
|
||||
; CHECK-NEXT: sub 3, 4, 3
|
||||
; CHECK-NEXT: blr
|
||||
@ -114,13 +110,8 @@ define i64 @test9(i64 %x) {
|
||||
define i64 @test10(i64 %x) {
|
||||
; CHECK-LABEL: test10:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, 31
|
||||
; CHECK-NEXT: lis 5, 16383
|
||||
; CHECK-NEXT: ori 4, 4, 65535
|
||||
; CHECK-NEXT: ori 5, 5, 57344
|
||||
; CHECK-NEXT: sldi 4, 4, 13
|
||||
; CHECK-NEXT: mulld 4, 3, 4
|
||||
; CHECK-NEXT: mulld 3, 3, 5
|
||||
; CHECK-NEXT: sldi 4, 3, 34
|
||||
; CHECK-NEXT: sldi 3, 3, 30
|
||||
; CHECK-NEXT: sub 3, 4, 3
|
||||
; CHECK-NEXT: blr
|
||||
%y = mul i64 %x, 17179860992
|
||||
|
Loading…
Reference in New Issue
Block a user