1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Strength reduction of multiply by a constant by shift and add/sub in place

A shift and add/sub sequence combination is faster in place of a multiply by constant. 
Because the cycle or latency of multiply is not huge, we only consider such following
worthy patterns.

```
(mul x, 2^N + 1) => (add (shl x, N), x)
(mul x, -(2^N + 1)) => -(add (shl x, N), x)
(mul x, 2^N - 1) => (sub (shl x, N), x)
(mul x, -(2^N - 1)) => (sub x, (shl x, N))
```

And the cycles or latency is subtarget-dependent so that we need consider the
subtarget to determine to do or not do such transformation. 
Also data type is considered for different cycles or latency to do multiply.

Differential Revision: https://reviews.llvm.org/D58950

llvm-svn: 357233
This commit is contained in:
Zi Xuan Wu 2019-03-29 03:08:39 +00:00
parent ab1afdd4e4
commit fe88774ae8
7 changed files with 640 additions and 24 deletions

View File

@ -1071,6 +1071,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@ -12643,6 +12644,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRA(N, DCI);
case ISD::SRL:
return combineSRL(N, DCI);
case ISD::MUL:
return combineMUL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@ -14565,6 +14568,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
}
SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
if (!ConstOpOrElement)
return SDValue();
// An imul is usually smaller than the alternative sequence for legal type.
if (DAG.getMachineFunction().getFunction().optForMinSize() &&
isOperationLegal(ISD::MUL, N->getValueType(0)))
return SDValue();
auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
switch (this->Subtarget.getDarwinDirective()) {
default:
// TODO: enhance the condition for subtarget before pwr8
return false;
case PPC::DIR_PWR8:
// type mul add shl
// scalar 4 1 1
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
// type mul add shl
// scalar 5 2 2
// vector 7 2 2
// The cycle RATIO of related operations are showed as a table above.
// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
// scalar and vector type. For 2 instrs patterns, add/sub + shl
// are 4, it is always profitable; but for 3 instrs patterns
// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
// So we should only do it for vector type.
return IsAddOne && IsNeg ? VT.isVector() : true;
}
};
EVT VT = N->getValueType(0);
SDLoc DL(N);
const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
bool IsNeg = MulAmt.isNegative();
APInt MulAmtAbs = MulAmt.abs();
if ((MulAmtAbs - 1).isPowerOf2()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
// (mul x, -(2^N + 1)) => -(add (shl x, N), x)
if (!IsProfitable(IsNeg, true, VT))
return SDValue();
SDValue Op0 = N->getOperand(0);
SDValue Op1 =
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
if (!IsNeg)
return Res;
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
} else if ((MulAmtAbs + 1).isPowerOf2()) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
if (!IsProfitable(IsNeg, false, VT))
return SDValue();
SDValue Op0 = N->getOperand(0);
SDValue Op1 =
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
if (!IsNeg)
return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
else
return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
} else {
return SDValue();
}
}
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())

View File

@ -1121,6 +1121,7 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;

View File

@ -0,0 +1,92 @@
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=generic < %s -mtriple=ppc64-- | FileCheck %s -check-prefix=GENERIC-CHECK
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr8 < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr9 < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
define i64 @foo(i64 %a) {
entry:
%mul = mul nsw i64 %a, 6
ret i64 %mul
}
; GENERIC-CHECK-LABEL: @foo
; GENERIC-CHECK: mulli r3, r3, 6
; GENERIC-CHECK: blr
define i64 @test1(i64 %a) {
%tmp.1 = mul nsw i64 %a, 16 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test1:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
define i64 @test2(i64 %a) {
%tmp.1 = mul nsw i64 %a, 17 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test2:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
define i64 @test3(i64 %a) {
%tmp.1 = mul nsw i64 %a, 15 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test3:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3
; negtive constant
define i64 @test4(i64 %a) {
%tmp.1 = mul nsw i64 %a, -16 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test4:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
define i64 @test5(i64 %a) {
%tmp.1 = mul nsw i64 %a, -17 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test5:
; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
; PWR8-CHECK-NOT: mul
; PWR8-CHECK: sldi r[[REG1:[0-9]+]], r3, 4
; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
define i64 @test6(i64 %a) {
%tmp.1 = mul nsw i64 %a, -15 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test6:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r3, r[[REG1]]
; CHECK-NOT: neg
; boundary case
define i64 @test7(i64 %a) {
%tmp.1 = mul nsw i64 %a, -9223372036854775808 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test7:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
define i64 @test8(i64 %a) {
%tmp.1 = mul nsw i64 %a, 9223372036854775807 ; <i64> [#uses=1]
ret i64 %tmp.1
}
; CHECK-LABEL: test8:
; CHECK-NOT: mul
; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3

View File

@ -0,0 +1,382 @@
; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-P8
; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s --check-prefixes=CHECK,CHECK-P9
define <16 x i8> @test1_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test1_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
define <16 x i8> @test2_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test2_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vaddubm v[[REG3:[0-9]+]], v2, v[[REG2]]
define <16 x i8> @test3_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test3_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
; negtive constant
define <16 x i8> @test4_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test4_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
; CHECK-NEXT: vsububm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
define <16 x i8> @test5_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test5_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vaddubm v[[REG4:[0-9]+]], v2, v[[REG3]]
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
; CHECK-NEXT: vsububm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
define <16 x i8> @test6_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test6_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v2, v[[REG2]]
; boundary case
define <16 x i8> @test7_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test7_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG5:[0-9]+]], v2, v[[REG1]]
define <16 x i8> @test8_v16i8(<16 x i8> %a) {
%tmp.1 = mul nsw <16 x i8> %a, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127> ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp.1
}
; CHECK-LABEL: test8_v16i8:
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
; CHECK-NOT: vmul
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
define <8 x i16> @test1_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test1_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
define <8 x i16> @test2_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test2_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vadduhm v[[REG3:[0-9]+]], v2, v[[REG2]]
define <8 x i16> @test3_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test3_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
; negtive constant
define <8 x i16> @test4_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test4_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
; CHECK-NEXT: vsubuhm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
define <8 x i16> @test5_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test5_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vadduhm v[[REG4:[0-9]+]], v2, v[[REG3]]
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
; CHECK-NEXT: vsubuhm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
define <8 x i16> @test6_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test6_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v2, v[[REG2]]
; boundary case
define <8 x i16> @test7_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test7_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 15
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG5:[0-9]+]], v2, v[[REG1]]
define <8 x i16> @test8_v8i16(<8 x i16> %a) {
%tmp.1 = mul nsw <8 x i16> %a, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp.1
}
; CHECK-LABEL: test8_v8i16:
; CHECK: vspltish v[[REG1:[0-9]+]], 15
; CHECK-NOT: vmul
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
define <4 x i32> @test1_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test1_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
define <4 x i32> @test2_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 17, i32 17, i32 17, i32 17> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test2_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
define <4 x i32> @test3_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test3_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v[[REG2]], v2
; negtive constant
define <4 x i32> @test4_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 -16, i32 -16, i32 -16, i32 -16> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test4_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-P8-NEXT: xxlxor v[[REG3:[0-9]+]],
; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG3]], v[[REG2]]
; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG2]]
define <4 x i32> @test5_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 -17, i32 -17, i32 -17, i32 -17> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test5_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG3]]
define <4 x i32> @test6_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 -15, i32 -15, i32 -15, i32 -15> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test6_v4i32:
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v2, v[[REG2]]
; boundary case
define <4 x i32> @test7_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test7_v4i32:
; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
define <4 x i32> @test8_v4i32(<4 x i32> %a) {
%tmp.1 = mul nsw <4 x i32> %a, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp.1
}
; CHECK-LABEL: test8_v4i32:
; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
; CHECK-NOT: vmul
; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
; CHECK-NEXT: vsubuwm v[[REG6:[0-9]+]], v[[REG5]], v2
define <2 x i64> @test1_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 16, i64 16> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test1_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
define <2 x i64> @test2_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 17, i64 17> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test2_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
define <2 x i64> @test3_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 15, i64 15> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test3_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
; negtive constant
define <2 x i64> @test4_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 -16, i64 -16> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test4_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
; CHECK-P8-NEXT: vsubudm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
; CHECK-P9-NEXT: vnegd v[[REG4:[0-9]+]], v[[REG3]]
define <2 x i64> @test5_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 -17, i64 -17> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test5_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
; CHECK-P8-NEXT: xxlxor v[[REG5:[0-9]+]],
; CHECK-P8-NEXT: vsubudm v[[REG6:[0-9]+]], v[[REG5]], v[[REG4]]
; CHECK-P9-NEXT: vnegd v{{[0-9]+}}, v[[REG4]]
define <2 x i64> @test6_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 -15, i64 -15> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test6_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]
; boundary case
define <2 x i64> @test7_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 -9223372036854775808, i64 -9223372036854775808> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test7_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG4:[0-9]+]], v2, v[[REG2]]
define <2 x i64> @test8_v2i64(<2 x i64> %a) {
%tmp.1 = mul nsw <2 x i64> %a, <i64 9223372036854775807, i64 9223372036854775807> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test8_v2i64:
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2

View File

@ -0,0 +1,79 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
define i32 @test1(i32 %a) {
%tmp.1 = mul nsw i32 %a, 16 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test1:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
define i32 @test2(i32 %a) {
%tmp.1 = mul nsw i32 %a, 17 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test2:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
define i32 @test3(i32 %a) {
%tmp.1 = mul nsw i32 %a, 15 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test3:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]
; negtive constant
define i32 @test4(i32 %a) {
%tmp.1 = mul nsw i32 %a, -16 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test4:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
define i32 @test5(i32 %a) {
%tmp.1 = mul nsw i32 %a, -17 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test5:
; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
; PWR8-CHECK-NOT: mul
; PWR8-CHECK: slwi r[[REG1:[0-9]+]], r3, 4
; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
define i32 @test6(i32 %a) {
%tmp.1 = mul nsw i32 %a, -15 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test6:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r[[REG1]], r3
; CHECK-NOT: neg
; boundary case
define i32 @test7(i32 %a) {
%tmp.1 = mul nsw i32 %a, -2147483648 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test7:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
define i32 @test8(i32 %a) {
%tmp.1 = mul nsw i32 %a, 2147483647 ; <i32> [#uses=1]
ret i32 %tmp.1
}
; CHECK-LABEL: test8:
; CHECK-NOT: mul
; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]

View File

@ -1,8 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep mul
define i32 @test1(i32 %a) {
%tmp.1 = mul i32 %a, -2 ; <i32> [#uses=1]
%tmp.2 = add i32 %tmp.1, 63 ; <i32> [#uses=1]
ret i32 %tmp.2
}

View File

@ -1,16 +0,0 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define i64 @foo(i64 %a) #0 {
entry:
%mul = mul nsw i64 %a, 3
ret i64 %mul
}
; CHECK-LABEL: @foo
; CHECK: mulli 3, 3, 3
; CHECK: blr
attributes #0 = { nounwind readnone }