mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[DAGCombiner] Add vector demanded elements support to ComputeNumSignBits
Currently ComputeNumSignBits returns the minimum number of sign bits for all elements of vector data, when we may only be interested in one/some of the elements. This patch adds a DemandedElts argument that allows us to specify the elements we actually care about. The original ComputeNumSignBits implementation calls with a DemandedElts demanding all elements to match current behaviour. Scalar types set this to 1. I've only added support for BUILD_VECTOR and EXTRACT_VECTOR_ELT so far, all others will default to demanding all elements but can be updated in due course. Followup to D25691. Differential Revision: https://reviews.llvm.org/D31311 llvm-svn: 299219
This commit is contained in:
parent
1397ecba80
commit
026e8c9b44
@ -1317,6 +1317,17 @@ public:
|
||||
/// target nodes to be understood.
|
||||
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
|
||||
|
||||
/// Return the number of times the sign bit of the register is replicated into
|
||||
/// the other bits. We know that at least 1 bit is always equal to the sign
|
||||
/// bit (itself), but other cases can give us information. For example,
|
||||
/// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
|
||||
/// to each other, so we return 3. The DemandedElts argument allows
|
||||
/// us to only collect the minimum sign bits of the requested vector elements.
|
||||
/// Targets can implement the ComputeNumSignBitsForTarget method in the
|
||||
/// TargetLowering class to allow target nodes to be understood.
|
||||
unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
/// Return true if the specified operand is an ISD::ADD with a ConstantSDNode
|
||||
/// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that
|
||||
/// is guaranteed to have the same semantics as an ADD. This handles the
|
||||
|
@ -2432,8 +2432,11 @@ public:
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
/// This method can be implemented by targets that want to expose additional
|
||||
/// information about sign bits to the DAG Combiner.
|
||||
/// information about sign bits to the DAG Combiner. The DemandedElts
|
||||
/// argument allows us to only collect the minimum sign bits that are shared
|
||||
/// by the requested vector elements.
|
||||
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
|
@ -2899,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
|
||||
|
||||
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
EVT VT = Op.getValueType();
|
||||
APInt DemandedElts = VT.isVector()
|
||||
? APInt::getAllOnesValue(VT.getVectorNumElements())
|
||||
: APInt(1, 1);
|
||||
return ComputeNumSignBits(Op, DemandedElts, Depth);
|
||||
}
|
||||
|
||||
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
||||
unsigned Depth) const {
|
||||
EVT VT = Op.getValueType();
|
||||
assert(VT.isInteger() && "Invalid VT!");
|
||||
unsigned VTBits = VT.getScalarSizeInBits();
|
||||
unsigned Tmp, Tmp2;
|
||||
@ -2907,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
if (Depth == 6)
|
||||
return 1; // Limit search depth.
|
||||
|
||||
if (!DemandedElts)
|
||||
return 1; // No demanded elts, better to assume we don't know anything.
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
default: break;
|
||||
case ISD::AssertSext:
|
||||
@ -2924,6 +2936,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
case ISD::BUILD_VECTOR:
|
||||
Tmp = VTBits;
|
||||
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
|
||||
if (!DemandedElts[i])
|
||||
continue;
|
||||
|
||||
SDValue SrcOp = Op.getOperand(i);
|
||||
Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
|
||||
|
||||
@ -3116,18 +3131,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
|
||||
}
|
||||
case ISD::EXTRACT_VECTOR_ELT: {
|
||||
// At the moment we keep this simple and skip tracking the specific
|
||||
// element. This way we get the lowest common denominator for all elements
|
||||
// of the vector.
|
||||
// TODO: get information for given vector element
|
||||
SDValue InVec = Op.getOperand(0);
|
||||
SDValue EltNo = Op.getOperand(1);
|
||||
EVT VecVT = InVec.getValueType();
|
||||
const unsigned BitWidth = Op.getValueSizeInBits();
|
||||
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
|
||||
const unsigned NumSrcElts = VecVT.getVectorNumElements();
|
||||
|
||||
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
|
||||
// anything about sign bits. But if the sizes match we can derive knowledge
|
||||
// about sign bits from the vector operand.
|
||||
if (BitWidth == EltBitWidth)
|
||||
return ComputeNumSignBits(Op.getOperand(0), Depth+1);
|
||||
break;
|
||||
if (BitWidth != EltBitWidth)
|
||||
break;
|
||||
|
||||
// If we know the element index, just demand that vector element, else for
|
||||
// an unknown element index, ignore DemandedElts and demand them all.
|
||||
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
|
||||
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
|
||||
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
|
||||
DemandedSrcElts =
|
||||
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
|
||||
|
||||
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
|
||||
}
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
|
||||
@ -3162,7 +3187,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
|
||||
unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
|
||||
unsigned NumBits =
|
||||
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
|
||||
if (NumBits > 1)
|
||||
FirstAnswer = std::max(FirstAnswer, NumBits);
|
||||
}
|
||||
@ -3170,7 +3196,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
// Finally, if we can prove that the top bits of the result are 0's or 1's,
|
||||
// use this information.
|
||||
APInt KnownZero, KnownOne;
|
||||
computeKnownBits(Op, KnownZero, KnownOne, Depth);
|
||||
computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
|
||||
|
||||
APInt Mask;
|
||||
if (KnownZero.isNegative()) { // sign bit is 0
|
||||
|
@ -1338,6 +1338,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
/// This method can be implemented by targets that want to expose additional
|
||||
/// information about sign bits to the DAG Combiner.
|
||||
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
|
||||
const APInt &,
|
||||
const SelectionDAG &,
|
||||
unsigned Depth) const {
|
||||
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
|
||||
|
@ -3596,7 +3596,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
|
||||
}
|
||||
|
||||
unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
|
||||
SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
|
||||
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
|
||||
unsigned Depth) const {
|
||||
switch (Op.getOpcode()) {
|
||||
case AMDGPUISD::BFE_I32: {
|
||||
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||
|
@ -206,7 +206,8 @@ public:
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
|
||||
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
|
||||
|
@ -26745,7 +26745,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
}
|
||||
|
||||
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
|
||||
SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
|
||||
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
|
||||
unsigned Depth) const {
|
||||
unsigned VTBits = Op.getScalarValueSizeInBits();
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
|
@ -832,6 +832,7 @@ namespace llvm {
|
||||
|
||||
/// Determine the number of bits in the operation that are sign bits.
|
||||
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
|
||||
const APInt &DemandedElts,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const override;
|
||||
|
||||
|
@ -23,18 +23,14 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
|
||||
define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
|
||||
; X32-LABEL: knownbits_mask_extract_uitofp:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
|
||||
; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_extract_uitofp:
|
||||
@ -42,7 +38,7 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
|
||||
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
|
||||
%2 = extractelement <2 x i64> %1, i32 0
|
||||
|
Loading…
Reference in New Issue
Block a user