mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
DAGCombiner: Canonicalize vector integer abs in the same way we do it for scalars.
This already helps SSE2 x86 a lot because it lacks an efficient way to represent a vector select. The long term goal is to enable the backend to match a canonicalized pattern into a single instruction (e.g. vabs or pabs). llvm-svn: 180597
This commit is contained in:
parent
fe6c769d60
commit
7ce75fb032
@ -205,6 +205,7 @@ namespace {
|
||||
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
|
||||
SDValue visitCTPOP(SDNode *N);
|
||||
SDValue visitSELECT(SDNode *N);
|
||||
SDValue visitVSELECT(SDNode *N);
|
||||
SDValue visitSELECT_CC(SDNode *N);
|
||||
SDValue visitSETCC(SDNode *N);
|
||||
SDValue visitSIGN_EXTEND(SDNode *N);
|
||||
@ -1126,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
|
||||
case ISD::CTPOP: return visitCTPOP(N);
|
||||
case ISD::SELECT: return visitSELECT(N);
|
||||
case ISD::VSELECT: return visitVSELECT(N);
|
||||
case ISD::SELECT_CC: return visitSELECT_CC(N);
|
||||
case ISD::SETCC: return visitSETCC(N);
|
||||
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
|
||||
@ -4162,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue N2 = N->getOperand(2);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Canonicalize integer abs.
|
||||
// vselect (setg[te] X, 0), X, -X ->
|
||||
// vselect (setgt X, -1), X, -X ->
|
||||
// vselect (setl[te] X, 0), -X, X ->
|
||||
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
|
||||
if (N0.getOpcode() == ISD::SETCC) {
|
||||
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
|
||||
bool isAbs = false;
|
||||
bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
|
||||
|
||||
if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
|
||||
(ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
|
||||
N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
|
||||
isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
|
||||
else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
|
||||
N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
|
||||
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
|
||||
|
||||
if (isAbs) {
|
||||
EVT VT = LHS.getValueType();
|
||||
SDValue Shift = DAG.getNode(
|
||||
ISD::SRA, DL, VT, LHS,
|
||||
DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
|
||||
AddToWorkList(Shift.getNode());
|
||||
AddToWorkList(Add.getNode());
|
||||
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
66
test/CodeGen/X86/viabs.ll
Normal file
66
test/CodeGen/X86/viabs.ll
Normal file
@ -0,0 +1,66 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
|
||||
|
||||
define <4 x i32> @test1(<4 x i32> %a) nounwind {
|
||||
; SSE2: test1:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test2(<4 x i32> %a) nounwind {
|
||||
; SSE2: test2:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sge <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test3(<4 x i32> %a) nounwind {
|
||||
; SSE2: test3:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test4(<4 x i32> %a) nounwind {
|
||||
; SSE2: test4:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp slt <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test5(<4 x i32> %a) nounwind {
|
||||
; SSE2: test5:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sle <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
|
||||
ret <4 x i32> %abs
|
||||
}
|
Loading…
Reference in New Issue
Block a user