1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[DAGCombiner] Add vector demanded elements support to ComputeNumSignBits

Currently ComputeNumSignBits returns the minimum number of sign bits for all elements of vector data, when we may only be interested in one/some of the elements.

This patch adds a DemandedElts argument that allows us to specify the elements we actually care about. The original ComputeNumSignBits implementation calls with a DemandedElts demanding all elements to match current behaviour. Scalar types set this to 1.

I've only added support for BUILD_VECTOR and EXTRACT_VECTOR_ELT so far, all others will default to demanding all elements but can be updated in due course.

Followup to D25691.

Differential Revision: https://reviews.llvm.org/D31311

llvm-svn: 299219
This commit is contained in:
Simon Pilgrim 2017-03-31 13:54:09 +00:00
parent 1397ecba80
commit 026e8c9b44
9 changed files with 65 additions and 24 deletions

View File

@ -1317,6 +1317,17 @@ public:
/// target nodes to be understood.
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
/// bit (itself), but other cases can give us information. For example,
/// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
/// to each other, so we return 3. The DemandedElts argument allows
/// us to only collect the minimum sign bits of the requested vector elements.
/// Targets can implement the ComputeNumSignBitsForTarget method in the
/// TargetLowering class to allow target nodes to be understood.
unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth = 0) const;
/// Return true if the specified operand is an ISD::ADD with a ConstantSDNode
/// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that
/// is guaranteed to have the same semantics as an ADD. This handles the

View File

@ -2432,8 +2432,11 @@ public:
unsigned Depth = 0) const;
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
/// information about sign bits to the DAG Combiner. The DemandedElts
/// argument allows us to only collect the minimum sign bits that are shared
/// by the requested vector elements.
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const;

View File

@ -2899,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth) const {
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned Tmp, Tmp2;
@ -2907,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
if (Depth == 6)
return 1; // Limit search depth.
if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
switch (Op.getOpcode()) {
default: break;
case ISD::AssertSext:
@ -2924,6 +2936,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
case ISD::BUILD_VECTOR:
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
@ -3116,18 +3131,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
case ISD::EXTRACT_VECTOR_ELT: {
// At the moment we keep this simple and skip tracking the specific
// element. This way we get the lowest common denominator for all elements
// of the vector.
// TODO: get information for given vector element
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
// anything about sign bits. But if the sizes match we can derive knowledge
// about sign bits from the vector operand.
if (BitWidth == EltBitWidth)
return ComputeNumSignBits(Op.getOperand(0), Depth+1);
break;
if (BitWidth != EltBitWidth)
break;
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR:
return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@ -3162,7 +3187,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
if (NumBits > 1)
FirstAnswer = std::max(FirstAnswer, NumBits);
}
@ -3170,7 +3196,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
computeKnownBits(Op, KnownZero, KnownOne, Depth);
computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0

View File

@ -1338,6 +1338,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &,
const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||

View File

@ -3596,7 +3596,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
}
unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
switch (Op.getOpcode()) {
case AMDGPUISD::BFE_I32: {
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));

View File

@ -206,7 +206,8 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's

View File

@ -26745,7 +26745,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
unsigned VTBits = Op.getScalarValueSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {

View File

@ -832,6 +832,7 @@ namespace llvm {
/// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;

View File

@ -23,18 +23,14 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
; X32-LABEL: knownbits_mask_extract_uitofp:
; X32: # BB#0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: pushl %eax
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: fildll {{[0-9]+}}(%esp)
; X32-NEXT: fstps {{[0-9]+}}(%esp)
; X32-NEXT: flds {{[0-9]+}}(%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: vmovd %xmm0, %eax
; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_extract_uitofp:
@ -42,7 +38,7 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X64-NEXT: retq
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
%2 = extractelement <2 x i64> %1, i32 0