mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[PowerPC]Exploit P9 vabsdu for unsigned vselect patterns
For type v4i32/v8ii16/v16i8, do following transforms: (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) -> (vabsd a, b) (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) -> (vabsd a, b) (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) -> (vabsd a, b) (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) -> (vabsd a, b) Differential Revision: https://reviews.llvm.org/D55812 llvm-svn: 349599
This commit is contained in:
parent
df742158c7
commit
f9689d0d71
@ -1085,6 +1085,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
|
||||
if (Subtarget.hasP9Altivec()) {
|
||||
setTargetDAGCombine(ISD::ABS);
|
||||
setTargetDAGCombine(ISD::VSELECT);
|
||||
}
|
||||
|
||||
// Darwin long double math library functions have $LDBL128 appended.
|
||||
@ -13267,6 +13268,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return DAGCombineBuildVector(N, DCI);
|
||||
case ISD::ABS:
|
||||
return combineABS(N, DCI);
|
||||
case ISD::VSELECT:
|
||||
return combineVSelect(N, DCI);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -14597,3 +14600,65 @@ SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// For type v4i32/v8ii16/v16i8, transform
|
||||
// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
|
||||
// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
|
||||
// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
|
||||
// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
|
||||
SDValue PPCTargetLowering::combineVSelect(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
|
||||
assert(Subtarget.hasP9Altivec() &&
|
||||
"Only combine this when P9 altivec supported!");
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc dl(N);
|
||||
SDValue Cond = N->getOperand(0);
|
||||
SDValue TrueOpnd = N->getOperand(1);
|
||||
SDValue FalseOpnd = N->getOperand(2);
|
||||
EVT VT = N->getOperand(1).getValueType();
|
||||
|
||||
if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
|
||||
FalseOpnd.getOpcode() != ISD::SUB)
|
||||
return SDValue();
|
||||
|
||||
// ABSD only available for type v4i32/v8i16/v16i8
|
||||
if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
|
||||
return SDValue();
|
||||
|
||||
// At least to save one more dependent computation
|
||||
if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
|
||||
return SDValue();
|
||||
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||
|
||||
// Can only handle unsigned comparison here
|
||||
switch (CC) {
|
||||
default:
|
||||
return SDValue();
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
break;
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
std::swap(TrueOpnd, FalseOpnd);
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue CmpOpnd1 = Cond.getOperand(0);
|
||||
SDValue CmpOpnd2 = Cond.getOperand(1);
|
||||
|
||||
// SETCC CmpOpnd1 CmpOpnd2 cond
|
||||
// TrueOpnd = CmpOpnd1 - CmpOpnd2
|
||||
// FalseOpnd = CmpOpnd2 - CmpOpnd1
|
||||
if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
|
||||
TrueOpnd.getOperand(1) == CmpOpnd2 &&
|
||||
FalseOpnd.getOperand(0) == CmpOpnd2 &&
|
||||
FalseOpnd.getOperand(1) == CmpOpnd1) {
|
||||
return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
|
||||
CmpOpnd1, CmpOpnd2,
|
||||
DAG.getTargetConstant(0, dl, MVT::i32));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -1118,6 +1118,7 @@ namespace llvm {
|
||||
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
|
||||
/// SETCC with integer subtraction when (1) there is a legal way of doing it
|
||||
|
@ -526,9 +526,10 @@ define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) {
|
||||
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
|
||||
ret <4 x i32> %6
|
||||
; CHECK-LABEL: absd_int32_ugt
|
||||
; CHECK: vcmpgtuw
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuw
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int32_ugt
|
||||
; CHECK-PWR8: vcmpgtuw
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -542,9 +543,10 @@ define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) {
|
||||
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
|
||||
ret <4 x i32> %6
|
||||
; CHECK-LABEL: absd_int32_uge
|
||||
; CHECK: vcmpgtuw
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuw
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int32_uge
|
||||
; CHECK-PWR8: vcmpgtuw
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -558,9 +560,10 @@ define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) {
|
||||
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
||||
ret <4 x i32> %6
|
||||
; CHECK-LABEL: absd_int32_ult
|
||||
; CHECK: vcmpgtuw
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuw
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int32_ult
|
||||
; CHECK-PWR8: vcmpgtuw
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -574,9 +577,10 @@ define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) {
|
||||
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
||||
ret <4 x i32> %6
|
||||
; CHECK-LABEL: absd_int32_ule
|
||||
; CHECK: vcmpgtuw
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuw
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int32_ule
|
||||
; CHECK-PWR8: vcmpgtuw
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -590,9 +594,10 @@ define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) {
|
||||
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
|
||||
ret <8 x i16> %6
|
||||
; CHECK-LABEL: absd_int16_ugt
|
||||
; CHECK: vcmpgtuh
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuh
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduh v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int16_ugt
|
||||
; CHECK-PWR8: vcmpgtuh
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -606,9 +611,10 @@ define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) {
|
||||
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
|
||||
ret <8 x i16> %6
|
||||
; CHECK-LABEL: absd_int16_uge
|
||||
; CHECK: vcmpgtuh
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuh
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduh v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int16_uge
|
||||
; CHECK-PWR8: vcmpgtuh
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -622,9 +628,10 @@ define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) {
|
||||
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
|
||||
ret <8 x i16> %6
|
||||
; CHECK-LABEL: absd_int16_ult
|
||||
; CHECK: vcmpgtuh
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuh
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduh v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int16_ult
|
||||
; CHECK-PWR8: vcmpgtuh
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -638,9 +645,10 @@ define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) {
|
||||
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
|
||||
ret <8 x i16> %6
|
||||
; CHECK-LABEL: absd_int16_ule
|
||||
; CHECK: vcmpgtuh
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtuh
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsduh v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int16_ule
|
||||
; CHECK-PWR8: vcmpgtuh
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -654,9 +662,10 @@ define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) {
|
||||
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
|
||||
ret <16 x i8> %6
|
||||
; CHECK-LABEL: absd_int8_ugt
|
||||
; CHECK: vcmpgtub
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtub
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsdub v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int8_ugt
|
||||
; CHECK-PWR8: vcmpgtub
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -670,9 +679,10 @@ define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) {
|
||||
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
|
||||
ret <16 x i8> %6
|
||||
; CHECK-LABEL: absd_int8_uge
|
||||
; CHECK: vcmpgtub
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtub
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsdub v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int8_uge
|
||||
; CHECK-PWR8: vcmpgtub
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -686,9 +696,10 @@ define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) {
|
||||
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
|
||||
ret <16 x i8> %6
|
||||
; CHECK-LABEL: absd_int8_ult
|
||||
; CHECK: vcmpgtub
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtub
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsdub v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int8_ult
|
||||
; CHECK-PWR8: vcmpgtub
|
||||
; CHECK-PWR8: xxsel
|
||||
@ -702,15 +713,52 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
|
||||
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
|
||||
ret <16 x i8> %6
|
||||
; CHECK-LABEL: absd_int8_ule
|
||||
; CHECK: vcmpgtub
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-NOT: vcmpgtub
|
||||
; CHECK-NOT: xxsel
|
||||
; CHECK: vabsdub v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-PWR8-LABEL: absd_int8_ule
|
||||
; CHECK-PWR8: vcmpgtub
|
||||
; CHECK-PWR8: xxsel
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; some cases we are unable to optimize
|
||||
; check whether goes beyond the scope
|
||||
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
|
||||
%3 = icmp ugt <4 x i32> %0, %1
|
||||
%4 = sub <4 x i32> %0, %1
|
||||
%5 = sub <4 x i32> %1, %0
|
||||
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
||||
ret <4 x i32> %6
|
||||
; CHECK-LABEL: absd_int32_ugt_opp
|
||||
; CHECK-NOT: vabsduw
|
||||
; CHECK: vcmpgtuw
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: absd_int32_ugt_opp
|
||||
; CHECK-PWR8: vcmpgtuw
|
||||
; CHECK-PWR8: xxsel
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
|
||||
%3 = icmp ugt <2 x i64> %0, %1
|
||||
%4 = sub <2 x i64> %0, %1
|
||||
%5 = sub <2 x i64> %1, %0
|
||||
%6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5
|
||||
ret <2 x i64> %6
|
||||
; CHECK-LABEL: absd_int64_ugt
|
||||
; CHECK-NOT: vabsduw
|
||||
; CHECK: vcmpgtud
|
||||
; CHECK: xxsel
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: absd_int64_ugt
|
||||
; CHECK-PWR8: vcmpgtud
|
||||
; CHECK-PWR8: xxsel
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
|
||||
|
||||
declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)
|
||||
|
Loading…
Reference in New Issue
Block a user