1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] Add two combine rules to simplify dag nodes introduced during type legalization when promoting nodes with illegal vector type.

This patch teaches the backend how to simplify/canonicalize dag node
sequences normally introduced by the backend when promoting certain dag nodes
with illegal vector type.

This patch adds two new combine rules:
1) fold (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
        (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)

2) fold (BINOP (shuffle (A, Undef, <Mask>)), (shuffle (B, Undef, <Mask>))) ->
        (shuffle (BINOP A, B), Undef, <Mask>).

Both rules are only triggered on the type-legalized DAG.
In particular, rule 1. is a target specific combine rule that attempts
to sink a bitconvert into the operands of a binary operation.
Rule 2. is a target independet rule that attempts to move a shuffle
immediately after a binary operation.

llvm-svn: 209930
This commit is contained in:
Andrea Di Biagio 2014-05-30 23:17:53 +00:00
parent 8abf11ea97
commit 3a03708285
4 changed files with 355 additions and 27 deletions

View File

@ -10801,6 +10801,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
// Type legalization might introduce new shuffles in the DAG.
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
if (SVN0->getMask().equals(SVN1->getMask())) {
EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
LHS.getOperand(0), RHS.getOperand(0));
AddUsersToWorkList(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
}
}
return SDValue();
}

View File

@ -17495,6 +17495,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// Don't create instructions with illegal types after legalize types has run.
@ -17507,6 +17509,57 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// During Type Legalization, when promoting illegal vector types,
// the backend might introduce new shuffle dag nodes and bitcasts.
//
// This code performs the following transformation:
// fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
// (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
//
// We do this only if both the bitcast and the BINOP dag nodes have
// one use. Also, perform this transformation only if the new binary
// operation is legal. This is to avoid introducing dag nodes that
// potentially need to be further expanded (or custom lowered) into a
// less optimal sequence of dag nodes.
if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
N0.getOpcode() == ISD::BITCAST) {
SDValue BC0 = N0.getOperand(0);
EVT SVT = BC0.getValueType();
unsigned Opcode = BC0.getOpcode();
unsigned NumElts = VT.getVectorNumElements();
if (BC0.hasOneUse() && SVT.isVector() &&
SVT.getVectorNumElements() * 2 == NumElts &&
TLI.isOperationLegal(Opcode, VT)) {
bool CanFold = false;
switch (Opcode) {
default : break;
case ISD::ADD :
case ISD::FADD :
case ISD::SUB :
case ISD::FSUB :
case ISD::MUL :
case ISD::FMUL :
CanFold = true;
}
unsigned SVTNumElts = SVT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) < 0;
if (CanFold) {
SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
}
}
}
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();

View File

@ -0,0 +1,273 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
define double @test1_add(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%add = add <2 x i32> %1, %2
%3 = bitcast <2 x i32> %add to double
ret double %3
}
; CHECK-LABEL: test1_add
; SSE41: paddd
; AVX: vpaddd
; CHECK-NEXT: ret
define double @test2_add(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%add = add <4 x i16> %1, %2
%3 = bitcast <4 x i16> %add to double
ret double %3
}
; CHECK-LABEL: test2_add
; SSE41: paddw
; AVX: vpaddw
; CHECK-NEXT: ret
define double @test3_add(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%add = add <8 x i8> %1, %2
%3 = bitcast <8 x i8> %add to double
ret double %3
}
; CHECK-LABEL: test3_add
; SSE41: paddb
; AVX: vpaddb
; CHECK-NEXT: ret
define double @test1_sub(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%sub = sub <2 x i32> %1, %2
%3 = bitcast <2 x i32> %sub to double
ret double %3
}
; CHECK-LABEL: test1_sub
; SSE41: psubd
; AVX: vpsubd
; CHECK-NEXT: ret
define double @test2_sub(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%sub = sub <4 x i16> %1, %2
%3 = bitcast <4 x i16> %sub to double
ret double %3
}
; CHECK-LABEL: test2_sub
; SSE41: psubw
; AVX: vpsubw
; CHECK-NEXT: ret
define double @test3_sub(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%sub = sub <8 x i8> %1, %2
%3 = bitcast <8 x i8> %sub to double
ret double %3
}
; CHECK-LABEL: test3_sub
; SSE41: psubb
; AVX: vpsubb
; CHECK-NEXT: ret
define double @test1_mul(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%mul = mul <2 x i32> %1, %2
%3 = bitcast <2 x i32> %mul to double
ret double %3
}
; CHECK-LABEL: test1_mul
; SSE41: pmulld
; AVX: vpmulld
; CHECK-NEXT: ret
define double @test2_mul(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%mul = mul <4 x i16> %1, %2
%3 = bitcast <4 x i16> %mul to double
ret double %3
}
; CHECK-LABEL: test2_mul
; SSE41: pmullw
; AVX: vpmullw
; CHECK-NEXT: ret
; There is no legal ISD::MUL with type MVT::v8i16.
define double @test3_mul(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%mul = mul <8 x i8> %1, %2
%3 = bitcast <8 x i8> %mul to double
ret double %3
}
; CHECK-LABEL: test3_mul
; CHECK: pmullw
; CHECK-NEXT: pshufb
; CHECK-NEXT: ret
define double @test1_and(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%and = and <2 x i32> %1, %2
%3 = bitcast <2 x i32> %and to double
ret double %3
}
; CHECK-LABEL: test1_and
; SSE41: andps
; AVX: vandps
; CHECK-NEXT: ret
define double @test2_and(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%and = and <4 x i16> %1, %2
%3 = bitcast <4 x i16> %and to double
ret double %3
}
; CHECK-LABEL: test2_and
; SSE41: andps
; AVX: vandps
; CHECK-NEXT: ret
define double @test3_and(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%and = and <8 x i8> %1, %2
%3 = bitcast <8 x i8> %and to double
ret double %3
}
; CHECK-LABEL: test3_and
; SSE41: andps
; AVX: vandps
; CHECK-NEXT: ret
define double @test1_or(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%or = or <2 x i32> %1, %2
%3 = bitcast <2 x i32> %or to double
ret double %3
}
; CHECK-LABEL: test1_or
; SSE41: orps
; AVX: vorps
; CHECK-NEXT: ret
define double @test2_or(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%or = or <4 x i16> %1, %2
%3 = bitcast <4 x i16> %or to double
ret double %3
}
; CHECK-LABEL: test2_or
; SSE41: orps
; AVX: vorps
; CHECK-NEXT: ret
define double @test3_or(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%or = or <8 x i8> %1, %2
%3 = bitcast <8 x i8> %or to double
ret double %3
}
; CHECK-LABEL: test3_or
; SSE41: orps
; AVX: vorps
; CHECK-NEXT: ret
define double @test1_xor(double %A, double %B) {
%1 = bitcast double %A to <2 x i32>
%2 = bitcast double %B to <2 x i32>
%xor = xor <2 x i32> %1, %2
%3 = bitcast <2 x i32> %xor to double
ret double %3
}
; CHECK-LABEL: test1_xor
; SSE41: xorps
; AVX: vxorps
; CHECK-NEXT: ret
define double @test2_xor(double %A, double %B) {
%1 = bitcast double %A to <4 x i16>
%2 = bitcast double %B to <4 x i16>
%xor = xor <4 x i16> %1, %2
%3 = bitcast <4 x i16> %xor to double
ret double %3
}
; CHECK-LABEL: test2_xor
; SSE41: xorps
; AVX: vxorps
; CHECK-NEXT: ret
define double @test3_xor(double %A, double %B) {
%1 = bitcast double %A to <8 x i8>
%2 = bitcast double %B to <8 x i8>
%xor = xor <8 x i8> %1, %2
%3 = bitcast <8 x i8> %xor to double
ret double %3
}
; CHECK-LABEL: test3_xor
; SSE41: xorps
; AVX: vxorps
; CHECK-NEXT: ret
define double @test_fadd(double %A, double %B) {
%1 = bitcast double %A to <2 x float>
%2 = bitcast double %B to <2 x float>
%add = fadd <2 x float> %1, %2
%3 = bitcast <2 x float> %add to double
ret double %3
}
; CHECK-LABEL: test_fadd
; SSE41: addps
; AVX: vaddps
; CHECK-NEXT: ret
define double @test_fsub(double %A, double %B) {
%1 = bitcast double %A to <2 x float>
%2 = bitcast double %B to <2 x float>
%sub = fsub <2 x float> %1, %2
%3 = bitcast <2 x float> %sub to double
ret double %3
}
; CHECK-LABEL: test_fsub
; SSE41: subps
; AVX: vsubps
; CHECK-NEXT: ret
define double @test_fmul(double %A, double %B) {
%1 = bitcast double %A to <2 x float>
%2 = bitcast double %B to <2 x float>
%mul = fmul <2 x float> %1, %2
%3 = bitcast <2 x float> %mul to double
ret double %3
}
; CHECK-LABEL: test_fmul
; SSE41: mulps
; AVX: vmulps
; CHECK-NEXT: ret

View File

@ -14,7 +14,7 @@ define double @test1(double %A) {
; CHECK-LABEL: test1
; CHECK-NOT: movsd
; CHECK: pshufd
; CHECK-NEXT: paddq
; CHECK-NEXT: paddd
; CHECK-NEXT: pshufd
; CHECK-NEXT: ret
@ -26,16 +26,9 @@ define double @test2(double %A, double %B) {
%3 = bitcast <2 x i32> %add to double
ret double %3
}
; FIXME: Ideally we should be able to fold the entire body of @test2 into a
; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
; sequence pshufd+pshufd+paddq+pshufd.
; CHECK-LABEL: test2
; CHECK-NOT: movsd
; CHECK: pshufd
; CHECK-NEXT: pshufd
; CHECK-NEXT: paddq
; CHECK-NEXT: pshufd
; CHECK: paddd
; CHECK-NEXT: ret
@ -91,7 +84,7 @@ define double @test6(double %A) {
; CHECK-LABEL: test6
; CHECK-NOT: movsd
; CHECK: punpcklwd
; CHECK-NEXT: paddd
; CHECK-NEXT: paddw
; CHECK-NEXT: pshufb
; CHECK-NEXT: ret
@ -103,16 +96,10 @@ define double @test7(double %A, double %B) {
%3 = bitcast <4 x i16> %add to double
ret double %3
}
; FIXME: Ideally we should be able to fold the entire body of @test7 into a
; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the
; sequence pshufd+pshufd+paddd+pshufd.
; CHECK-LABEL: test7
; CHECK-NOT: movsd
; CHECK: punpcklwd
; CHECK-NEXT: punpcklwd
; CHECK-NEXT: paddd
; CHECK-NEXT: pshufb
; CHECK-NOT: punpcklwd
; CHECK: paddw
; CHECK-NEXT: ret
@ -129,7 +116,7 @@ define double @test8(double %A) {
; CHECK-LABEL: test8
; CHECK-NOT: movsd
; CHECK: punpcklbw
; CHECK-NEXT: paddw
; CHECK-NEXT: paddb
; CHECK-NEXT: pshufb
; CHECK-NEXT: ret
@ -141,15 +128,9 @@ define double @test9(double %A, double %B) {
%3 = bitcast <8 x i8> %add to double
ret double %3
}
; FIXME: Ideally we should be able to fold the entire body of @test9 into a
; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the
; sequence pshufd+pshufd+paddw+pshufd.
; CHECK-LABEL: test9
; CHECK-NOT: movsd
; CHECK: punpcklbw
; CHECK-NEXT: punpcklbw
; CHECK-NEXT: paddw
; CHECK-NEXT: pshufb
; CHECK-NOT: punpcklbw
; CHECK: paddb
; CHECK-NEXT: ret