mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[DAGCombiner] Vector constant folding for comparisons
This patch adds support for vector constant folding of integer/float comparisons. This requires FoldConstantVectorArithmetic to support scalar constant operands (in this case ISD::CONDCASE). In future we should be able to support other scalar constant types as necessary (and possibly start calling FoldConstantVectorArithmetic for all node creations) Differential Revision: http://reviews.llvm.org/D14683 llvm-svn: 253504
This commit is contained in:
parent
cf7a7cf40d
commit
8ddf1acb2a
@ -3318,21 +3318,22 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
auto IsSameVectorSize = [&](const SDValue &Op) {
|
||||
return Op.getValueType().isVector() &&
|
||||
auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
|
||||
return !Op.getValueType().isVector() ||
|
||||
Op.getValueType().getVectorNumElements() == NumElts;
|
||||
};
|
||||
|
||||
auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
|
||||
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
|
||||
return (Op.getOpcode() == ISD::UNDEF) || (BV && BV->isConstant());
|
||||
return (Op.getOpcode() == ISD::UNDEF) ||
|
||||
(Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
|
||||
};
|
||||
|
||||
// All operands must be vector types with the same number of elements as
|
||||
// the result type and must be either UNDEF or a build vector of constant
|
||||
// or UNDEF scalars.
|
||||
if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
|
||||
!std::all_of(Ops.begin(), Ops.end(), IsSameVectorSize))
|
||||
!std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
|
||||
return SDValue();
|
||||
|
||||
// Find legal integer scalar type for constant promotion and
|
||||
@ -3353,8 +3354,11 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
|
||||
EVT InSVT = Op.getValueType().getScalarType();
|
||||
BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
|
||||
if (!InBV) {
|
||||
// We've checked that this is UNDEF above.
|
||||
ScalarOps.push_back(getUNDEF(InSVT));
|
||||
// We've checked that this is UNDEF or a constant of some kind.
|
||||
if (Op.isUndef())
|
||||
ScalarOps.push_back(getUNDEF(InSVT));
|
||||
else
|
||||
ScalarOps.push_back(Op);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -3919,6 +3923,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
|
||||
// Use FoldSetCC to simplify SETCC's.
|
||||
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
|
||||
return V;
|
||||
// Vector constant folding.
|
||||
SDValue Ops[] = {N1, N2, N3};
|
||||
if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
|
||||
return V;
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT:
|
||||
|
@ -41,24 +41,7 @@ entry:
|
||||
define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
|
||||
; CHECK-LABEL: mp_11193:
|
||||
; CHECK: # BB#0: # %allocas
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00]
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [9.000000e+00,1.000000e+00,9.000000e+00,1.000000e+00]
|
||||
; CHECK-NEXT: cmpltps %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; CHECK-NEXT: pshufb %xmm2, %xmm1
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; CHECK-NEXT: cmpltps %xmm0, %xmm3
|
||||
; CHECK-NEXT: pshufb %xmm2, %xmm3
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
|
||||
; CHECK-NEXT: psllw $15, %xmm3
|
||||
; CHECK-NEXT: psraw $15, %xmm3
|
||||
; CHECK-NEXT: pextrb $0, %xmm3, %eax
|
||||
; CHECK-NEXT: shlb $7, %al
|
||||
; CHECK-NEXT: sarb $7, %al
|
||||
; CHECK-NEXT: movsbl %al, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ssl %eax, %xmm0
|
||||
; CHECK-NEXT: movss %xmm0, (%rsi)
|
||||
; CHECK-NEXT: movl $-1082130432, (%rsi) # imm = 0xFFFFFFFFBF800000
|
||||
; CHECK-NEXT: retq
|
||||
allocas:
|
||||
%bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
|
||||
|
@ -4,37 +4,12 @@
|
||||
; 0x1 means that we only look at the first bit.
|
||||
|
||||
define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
|
||||
; CHECK: 0x1
|
||||
; CHECK-LABEL: ui_to_fp_conv:
|
||||
; CHECK: # BB#0: # %allocas
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,3.000000e+00,3.000000e+00]
|
||||
; CHECK-NEXT: cmpltps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: psllw $15, %xmm0
|
||||
; CHECK-NEXT: psraw $15, %xmm0
|
||||
; CHECK-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
|
||||
; CHECK-NEXT: pand %xmm2, %xmm1
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [1258291200,1258291200,1258291200,1258291200]
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm4
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0],xmm3[1],xmm4[2],xmm3[3],xmm4[4],xmm3[5],xmm4[6],xmm3[7]
|
||||
; CHECK-NEXT: psrld $16, %xmm1
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm5[1],xmm1[2],xmm5[3],xmm1[4],xmm5[5],xmm1[6],xmm5[7]
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
|
||||
; CHECK-NEXT: addps %xmm6, %xmm1
|
||||
; CHECK-NEXT: addps %xmm4, %xmm1
|
||||
; CHECK-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; CHECK-NEXT: pand %xmm2, %xmm0
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
|
||||
; CHECK-NEXT: psrld $16, %xmm0
|
||||
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm5[1],xmm0[2],xmm5[3],xmm0[4],xmm5[5],xmm0[6],xmm5[7]
|
||||
; CHECK-NEXT: addps %xmm6, %xmm0
|
||||
; CHECK-NEXT: addps %xmm3, %xmm0
|
||||
; CHECK-NEXT: movups %xmm0, 16(%rsi)
|
||||
; CHECK-NEXT: movups %xmm1, (%rsi)
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,0.000000e+00,0.000000e+00]
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: movups %xmm1, 16(%rsi)
|
||||
; CHECK-NEXT: movups %xmm0, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
allocas:
|
||||
%bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user