mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
This commit contains a few changes that had to go in together.
1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) (and also scalar_to_vector). 2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src). Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B)) 3. Optimize swizzles of shuffles: shuff(shuff(x, y), undef) -> shuff(x, y). 4. Fix an X86ISelLowering optimization which was very bitcast-sensitive. Code which was previously compiled to this: movd (%rsi), %xmm0 movdqa .LCPI0_0(%rip), %xmm2 pshufb %xmm2, %xmm0 movd (%rdi), %xmm1 pshufb %xmm2, %xmm1 pxor %xmm0, %xmm1 pshufb .LCPI0_1(%rip), %xmm1 movd %xmm1, (%rdi) ret Now compiles to this: movl (%rsi), %eax xorl %eax, (%rdi) ret llvm-svn: 153848
This commit is contained in:
parent
44174d3b7a
commit
2729f54295
@ -2336,6 +2336,68 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
|
||||
ORNode, N0.getOperand(1));
|
||||
}
|
||||
|
||||
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
|
||||
// Only perform this optimization after type legalization and before
|
||||
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
|
||||
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
|
||||
// we don't want to undo this promotion.
|
||||
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
|
||||
// on scalars.
|
||||
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||
&& Level == AfterLegalizeVectorOps) {
|
||||
SDValue In0 = N0.getOperand(0);
|
||||
SDValue In1 = N1.getOperand(0);
|
||||
EVT In0Ty = In0.getValueType();
|
||||
EVT In1Ty = In1.getValueType();
|
||||
// If both incoming values are integers, and the original types are the same.
|
||||
if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
|
||||
SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
|
||||
SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
|
||||
AddToWorkList(Op.getNode());
|
||||
return BC;
|
||||
}
|
||||
}
|
||||
|
||||
// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
|
||||
// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
|
||||
// If both shuffles use the same mask, and both shuffle within a single
|
||||
// vector, then it is worthwhile to move the swizzle after the operation.
|
||||
// The type-legalizer generates this pattern when loading illegal
|
||||
// vector types from memory. In many cases this allows additional shuffle
|
||||
// optimizations.
|
||||
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
|
||||
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
|
||||
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
|
||||
SDValue In0 = SVN0->getOperand(0);
|
||||
SDValue In1 = SVN1->getOperand(0);
|
||||
EVT In0Ty = In0.getValueType();
|
||||
EVT In1Ty = In1.getValueType();
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
// Check that both shuffles are swizzles.
|
||||
bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
|
||||
N1.getOperand(1).getOpcode() == ISD::UNDEF);
|
||||
|
||||
// Check that both shuffles use the same mask. The masks are known to be of
|
||||
// the same length because the result vector type is the same.
|
||||
bool SameMask = true;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
int Idx0 = SVN0->getMaskElt(i);
|
||||
int Idx1 = SVN1->getMaskElt(i);
|
||||
if (Idx0 != Idx1) {
|
||||
SameMask = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
|
||||
SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
|
||||
SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
|
||||
DAG.getUNDEF(VT), &SVN0->getMask()[0]);
|
||||
AddToWorkList(Op.getNode());
|
||||
return Shuff;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -7721,6 +7783,36 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
return N0;
|
||||
}
|
||||
}
|
||||
|
||||
// If this shuffle node is simply a swizzle of another shuffle node,
|
||||
// optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
|
||||
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
|
||||
N1.getOpcode() == ISD::UNDEF) {
|
||||
|
||||
SmallVector<int, 8> NewMask;
|
||||
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
|
||||
|
||||
EVT InVT = N0.getValueType();
|
||||
int InNumElts = InVT.getVectorNumElements();
|
||||
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
int Idx = SVN->getMaskElt(i);
|
||||
// If we access the second (undef) operand then this index can be
|
||||
// canonicalized to undef as well.
|
||||
if (Idx >= InNumElts)
|
||||
Idx = -1;
|
||||
// Next, this index comes from the first value, which is the incoming
|
||||
// shuffle. Adopt the incoming index.
|
||||
if (Idx >= 0)
|
||||
Idx = OtherSV->getMaskElt(Idx);
|
||||
|
||||
NewMask.push_back(Idx);
|
||||
}
|
||||
|
||||
return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
|
||||
OtherSV->getOperand(1), &NewMask[0]);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -14000,13 +14000,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
|
||||
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
|
||||
if (Mask.getOpcode() != ISD::BITCAST ||
|
||||
X.getOpcode() != ISD::BITCAST ||
|
||||
Y.getOpcode() != ISD::BITCAST)
|
||||
return SDValue();
|
||||
|
||||
// Look through mask bitcast.
|
||||
Mask = Mask.getOperand(0);
|
||||
if (Mask.getOpcode() == ISD::BITCAST)
|
||||
Mask = Mask.getOperand(0);
|
||||
if (X.getOpcode() == ISD::BITCAST)
|
||||
X = X.getOperand(0);
|
||||
if (Y.getOpcode() == ISD::BITCAST)
|
||||
Y = Y.getOperand(0);
|
||||
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
|
||||
// Validate that the Mask operand is a vector sra node.
|
||||
@ -14027,8 +14028,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
// Now we know we at least have a plendvb with the mask val. See if
|
||||
// we can form a psignb/w/d.
|
||||
// psign = x.type == y.type == mask.type && y = sub(0, x);
|
||||
X = X.getOperand(0);
|
||||
Y = Y.getOperand(0);
|
||||
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
|
||||
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
|
||||
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
|
||||
|
@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
|
||||
entry:
|
||||
; CHECK: t10:
|
||||
; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
|
||||
; CHECK: vmul.f32 q8, q8, d0[0]
|
||||
; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
|
||||
; CHECK: vadd.f32 q8, q8, q8
|
||||
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=cellspu -o %t1.s
|
||||
; RUN: grep rot %t1.s | count 86
|
||||
; RUN: grep rot %t1.s | count 85
|
||||
; RUN: grep roth %t1.s | count 8
|
||||
; RUN: grep roti.*5 %t1.s | count 1
|
||||
; RUN: grep roti.*27 %t1.s | count 1
|
||||
|
@ -3,14 +3,14 @@
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;CHECK: ltstore
|
||||
;CHECK: pshufd
|
||||
;CHECK: pshufd
|
||||
;CHECK: ret
|
||||
define void @ltstore() {
|
||||
;CHECK: movq
|
||||
;CHECK-NEXT: movq
|
||||
;CHECK-NEXT: ret
|
||||
define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {
|
||||
entry:
|
||||
%in = load <4 x i32>* undef
|
||||
%in = load <4 x i32>* %pIn
|
||||
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x i32> %j, <2 x i32>* undef
|
||||
store <2 x i32> %j, <2 x i32>* %pOut
|
||||
ret void
|
||||
}
|
||||
|
||||
|
14
test/CodeGen/X86/SwizzleShuff.ll
Normal file
14
test/CodeGen/X86/SwizzleShuff.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; Check that we perform a scalar XOR on i32.
|
||||
|
||||
; CHECK: pull_bitcast
|
||||
; CHECK: xorl
|
||||
; CHECK: ret
|
||||
define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
|
||||
%A = load <4 x i8>* %pA
|
||||
%B = load <4 x i8>* %pB
|
||||
%C = xor <4 x i8> %A, %B
|
||||
store <4 x i8> %C, <4 x i8>* %pA
|
||||
ret void
|
||||
}
|
@ -27,11 +27,11 @@ entry:
|
||||
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
|
||||
entry:
|
||||
; CHECK: t02
|
||||
; CHECK: movaps
|
||||
; CHECK: shufps
|
||||
; CHECK: pshufd
|
||||
; CHECK: movq
|
||||
; CHECK: ret
|
||||
; CHECK: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: ret
|
||||
%0 = bitcast <8 x i32>* %source to <4 x i32>*
|
||||
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
|
||||
%tmp2 = load <4 x i32>* %arrayidx, align 16
|
||||
|
@ -33,7 +33,7 @@ entry:
|
||||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
|
||||
entry:
|
||||
; CHECK: shuf3:
|
||||
; CHECK: shufps
|
||||
; CHECK: shufd
|
||||
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
Loading…
Reference in New Issue
Block a user