llvm-mirror/include/llvm/Analysis/VectorUtils.h

//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines some vectorizer utilities.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H

#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"

namespace llvm {

template <typename T> class ArrayRef;
class DemandedBits;
class GetElementPtrInst;
class Loop;
class ScalarEvolution;
class TargetTransformInfo;
class Type;
class Value;

namespace Intrinsic {
enum ID : unsigned;
}

/// \brief Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
/// scalars for the scalar form of the intrinsic and all vectors for
/// the vector form of the intrinsic.
bool isTriviallyVectorizable(Intrinsic::ID ID);

/// \brief Identifies if the intrinsic has a scalar operand. It checks for
/// ctlz,cttz and powi special intrinsics whose argument is scalar.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);

/// \brief Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its intrinsic ID, in case it does not found it return not_intrinsic.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
                                          const TargetLibraryInfo *TLI);

/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);

/// \brief If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);

/// \brief If a value has only one user that is a CastInst, return it.
Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);

/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);

/// \brief Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
Value *findScalarElement(Value *V, unsigned EltNo);

/// \brief Get splat value if the input is a splat vector or return nullptr.
/// The value may be extracted from a splat constants vector or from
/// a sequence of instructions that broadcast a single value into a vector.
const Value *getSplatValue(const Value *V);

/// \brief Compute a map of integer instructions to their minimum legal type
/// size.
///
/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
/// type (e.g. i32) whenever arithmetic is performed on them.
///
/// For targets with native i8 or i16 operations, usually InstCombine can shrink
/// the arithmetic type down again. However InstCombine refuses to create
/// illegal types, so for targets without i8 or i16 registers, the lengthening
/// and shrinking remains.
///
/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
/// their scalar equivalents do not, so during vectorization it is important to
/// remove these lengthens and truncates when deciding the profitability of
/// vectorization.
///
/// This function analyzes the given range of instructions and determines the
/// minimum type size each can be converted to. It attempts to remove or
/// minimize type size changes across each def-use chain, so for example in the
/// following code:
///
///   %1 = load i8, i8*
///   %2 = add i8 %1, 2
///   %3 = load i16, i16*
///   %4 = zext i8 %2 to i32
///   %5 = zext i16 %3 to i32
///   %6 = add i32 %4, %5
///   %7 = trunc i32 %6 to i16
///
/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
///
/// If the optional TargetTransformInfo is provided, this function tries harder
/// to do less work by only looking at illegal types.
MapVector<Instruction*, uint64_t>
computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
                         DemandedBits &DB,
                         const TargetTransformInfo *TTI=nullptr);

/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
/// MD_nontemporal].  For K in Kinds, we get the MDNode for K from each of the
/// elements of VL, compute their "intersection" (i.e., the most generic
/// metadata value that covers all of the individual values), and set I's
/// metadata for M equal to the intersection value.
///
/// This function always sets a (possibly null) value for each K in Kinds.
Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);

} // llvm namespace

#endif
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! llvm-svn: 205855 2014-04-09 16:20:47 +02:00			`//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -- C++ --=====//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file defines some vectorizer utilities.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H`
			`#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H`

[LoopVectorize] Use MapVector rather than DenseMap for MinBWs. The order in which instructions are truncated in truncateToMinimalBitwidths effects code generation. Switch to a map with a determinisic order, since the iteration order over a DenseMap is not defined. This code is not hot, so the difference in container performance isn't interesting. Many thanks to David Blaikie for making me aware of MapVector! Fixes PR25490. Differential Revision: http://reviews.llvm.org/D14981 llvm-svn: 254179 2015-11-26 21:39:51 +01:00			`#include "llvm/ADT/MapVector.h"`
Re-sort #include lines using my handy dandy ./utils/sort_includes.py script. This is in preparation for changes to lots of include lines. llvm-svn: 229088 2015-02-13 10:09:03 +01:00			`#include "llvm/Analysis/TargetLibraryInfo.h"`
Add missing #include found by modules build. llvm-svn: 206726 2014-04-21 01:39:19 +02:00
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! llvm-svn: 205855 2014-04-09 16:20:47 +02:00			`namespace llvm {`

[NFC] Header cleanup Removed some unused headers, replaced some headers with forward class declarations. Found using simple scripts like this one: clear && ack --cpp -l '#include "llvm/ADT/IndexedMap.h"' \| xargs grep -L 'IndexedMap[<]' \| xargs grep -n --color=auto 'IndexedMap' Patch by Eugene Kosov <claprix@yandex.ru> Differential Revision: http://reviews.llvm.org/D19219 From: Mehdi Amini <mehdi.amini@apple.com> llvm-svn: 266595 2016-04-18 11:17:29 +02:00			`template <typename T> class ArrayRef;`
Port DemandedBits to the new pass manager. Differential Revision: http://reviews.llvm.org/D18679 llvm-svn: 266699 2016-04-19 01:55:01 +02:00			`class DemandedBits;`
Move getStrideFromPointer and friends from LoopVectorize to VectorUtils The following functions are moved from the LoopVectorizer to VectorUtils: - getGEPInductionOperand - stripGetElementPtr - getUniqueCastUse - getStrideFromPointer These used to be static functions in LoopVectorize, but will also be used by the upcoming loop versioning LICM transformation. Patch by Ashutosh Nema! llvm-svn: 241980 2015-07-11 12:52:42 +02:00			`class GetElementPtrInst;`
			`class Loop;`
			`class ScalarEvolution;`
[LoopVectorize] Shrink integer operations into the smallest type possible C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int type (e.g. i32) whenever arithmetic is performed on them. For targets with native i8 or i16 operations, usually InstCombine can shrink the arithmetic type down again. However InstCombine refuses to create illegal types, so for targets without i8 or i16 registers, the lengthening and shrinking remains. Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when their scalar equivalents do not, so during vectorization it is important to remove these lengthens and truncates when deciding the profitability of vectorization. The algorithm this uses starts at truncs and icmps, trawling their use-def chains until they terminate or instructions outside the loop are found (or unsafe instructions like inttoptr casts are found). If the use-def chains starting from different root instructions (truncs/icmps) meet, they are unioned. The demanded bits of each node in the graph are ORed together to form an overall mask of the demanded bits in the entire graph. The minimum bitwidth that graph can be truncated to is the bitwidth minus the number of leading zeroes in the overall mask. The intention is that this algorithm should "first do no harm", so it will never insert extra cast instructions. This is why the use-def graphs are unioned, so that subgraphs with different minimum bitwidths do not need casts inserted between them. This algorithm works hard to reduce compile time impact. DemandedBits are only queried if there are extends of illegal types and if a truncate to an illegal type is seen. In the general case, this results in a simple linear scan of the instructions in the loop. No non-noise compile time impact was seen on a clang bootstrap build. llvm-svn: 250032 2015-10-12 14:34:45 +02:00			`class TargetTransformInfo;`
Move getStrideFromPointer and friends from LoopVectorize to VectorUtils The following functions are moved from the LoopVectorizer to VectorUtils: - getGEPInductionOperand - stripGetElementPtr - getUniqueCastUse - getStrideFromPointer These used to be static functions in LoopVectorize, but will also be used by the upcoming loop versioning LICM transformation. Patch by Ashutosh Nema! llvm-svn: 241980 2015-07-11 12:52:42 +02:00			`class Type;`
			`class Value;`

[ValueTracking, VectorUtils] Refactor getIntrinsicIDForCall The functionality contained within getIntrinsicIDForCall is two-fold: it checks if a CallInst's callee is a vectorizable intrinsic. If it isn't an intrinsic, it attempts to map the call's target to a suitable intrinsic. Move the mapping functionality into getIntrinsicForCallSite and rename getIntrinsicIDForCall to getVectorIntrinsicIDForCall while reimplementing it in terms of getIntrinsicForCallSite. llvm-svn: 266801 2016-04-19 21:10:21 +02:00			`namespace Intrinsic {`
			`enum ID : unsigned;`
			`}`

SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! llvm-svn: 205855 2014-04-09 16:20:47 +02:00			`/// \brief Identify if the intrinsic is trivially vectorizable.`
			`/// This method returns true if the intrinsic's argument types are all`
			`/// scalars for the scalar form of the intrinsic and all vectors for`
			`/// the vector form of the intrinsic.`
Fix ODR violation waiting to happen by making static function definitions in VectorUtils.h non-static and defined out of line Patch by Ashutosh Nema Differential Revision: http://reviews.llvm.org/D10682 llvm-svn: 240794 2015-06-26 18:57:30 +02:00			`bool isTriviallyVectorizable(Intrinsic::ID ID);`

			`/// \brief Identifies if the intrinsic has a scalar operand. It checks for`
			`/// ctlz,cttz and powi special intrinsics whose argument is scalar.`
			`bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);`

			`/// \brief Returns intrinsic ID for call.`
			`/// For the input call instruction it finds mapping intrinsic and returns`
			`/// its intrinsic ID, in case it does not found it return not_intrinsic.`
[ValueTracking, VectorUtils] Refactor getIntrinsicIDForCall The functionality contained within getIntrinsicIDForCall is two-fold: it checks if a CallInst's callee is a vectorizable intrinsic. If it isn't an intrinsic, it attempts to map the call's target to a suitable intrinsic. Move the mapping functionality into getIntrinsicForCallSite and rename getIntrinsicIDForCall to getVectorIntrinsicIDForCall while reimplementing it in terms of getIntrinsicForCallSite. llvm-svn: 266801 2016-04-19 21:10:21 +02:00			`Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,`
			`const TargetLibraryInfo *TLI);`
Vectorize intrinsic math function calls in SLPVectorizer. This patch adds support to recognize and vectorize intrinsic math functions in SLPVectorizer. Review: http://reviews.llvm.org/D3560 and http://reviews.llvm.org/D3559 llvm-svn: 207901 2014-05-03 11:59:54 +02:00
Move getStrideFromPointer and friends from LoopVectorize to VectorUtils The following functions are moved from the LoopVectorizer to VectorUtils: - getGEPInductionOperand - stripGetElementPtr - getUniqueCastUse - getStrideFromPointer These used to be static functions in LoopVectorize, but will also be used by the upcoming loop versioning LICM transformation. Patch by Ashutosh Nema! llvm-svn: 241980 2015-07-11 12:52:42 +02:00			`/// \brief Find the operand of the GEP that should be checked for consecutive`
			`/// stores. This ignores trailing indices that have no effect on the final`
			`/// pointer.`
			`unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);`

Whitespace. llvm-svn: 247543 2015-09-14 13:14:39 +02:00			`/// \brief If the argument is a GEP, then returns the operand identified by`
			`/// getGEPInductionOperand. However, if there is some other non-loop-invariant`
Move getStrideFromPointer and friends from LoopVectorize to VectorUtils The following functions are moved from the LoopVectorizer to VectorUtils: - getGEPInductionOperand - stripGetElementPtr - getUniqueCastUse - getStrideFromPointer These used to be static functions in LoopVectorize, but will also be used by the upcoming loop versioning LICM transformation. Patch by Ashutosh Nema! llvm-svn: 241980 2015-07-11 12:52:42 +02:00			`/// operand, it returns that instead.`
			`Value stripGetElementPtr(Value Ptr, ScalarEvolution SE, Loop Lp);`

			`/// \brief If a value has only one user that is a CastInst, return it.`
			`Value getUniqueCastUse(Value Ptr, Loop Lp, Type Ty);`

			`/// \brief Get the stride of a pointer access in a loop. Looks for symbolic`
			`/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.`
			`Value getStrideFromPointer(Value Ptr, ScalarEvolution SE, Loop Lp);`

[InstSimplify] Teach InstSimplify how to simplify extractelement llvm-svn: 242008 2015-07-13 03:15:53 +02:00			`/// \brief Given a vector and an element number, see if the scalar value is`
			`/// already around as a register, for example if it were inserted then extracted`
			`/// from the vector.`
			`Value findScalarElement(Value V, unsigned EltNo);`

Revert "Revert "New interface function is added to VectorUtils Value getSplatValue(Value Val);"" This reverts commit r246379. It seems that the commit was not the culprit, and the bot will be investigated for instability. llvm-svn: 246380 2015-08-30 12:49:04 +02:00			`/// \brief Get splat value if the input is a splat vector or return nullptr.`
			`/// The value may be extracted from a splat constants vector or from`
			`/// a sequence of instructions that broadcast a single value into a vector.`
Fixed a failure in cost calculation for vector GEP Cost calculation for vector GEP failed with due to invalid cast to GEP index operand. The bug is fixed, added a test. http://reviews.llvm.org/D14976 llvm-svn: 254408 2015-12-01 13:08:36 +01:00			`const Value getSplatValue(const Value V);`
Revert "Revert "New interface function is added to VectorUtils Value getSplatValue(Value Val);"" This reverts commit r246379. It seems that the commit was not the culprit, and the bot will be investigated for instability. llvm-svn: 246380 2015-08-30 12:49:04 +02:00
[LoopVectorize] Shrink integer operations into the smallest type possible C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int type (e.g. i32) whenever arithmetic is performed on them. For targets with native i8 or i16 operations, usually InstCombine can shrink the arithmetic type down again. However InstCombine refuses to create illegal types, so for targets without i8 or i16 registers, the lengthening and shrinking remains. Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when their scalar equivalents do not, so during vectorization it is important to remove these lengthens and truncates when deciding the profitability of vectorization. The algorithm this uses starts at truncs and icmps, trawling their use-def chains until they terminate or instructions outside the loop are found (or unsafe instructions like inttoptr casts are found). If the use-def chains starting from different root instructions (truncs/icmps) meet, they are unioned. The demanded bits of each node in the graph are ORed together to form an overall mask of the demanded bits in the entire graph. The minimum bitwidth that graph can be truncated to is the bitwidth minus the number of leading zeroes in the overall mask. The intention is that this algorithm should "first do no harm", so it will never insert extra cast instructions. This is why the use-def graphs are unioned, so that subgraphs with different minimum bitwidths do not need casts inserted between them. This algorithm works hard to reduce compile time impact. DemandedBits are only queried if there are extends of illegal types and if a truncate to an illegal type is seen. In the general case, this results in a simple linear scan of the instructions in the loop. No non-noise compile time impact was seen on a clang bootstrap build. llvm-svn: 250032 2015-10-12 14:34:45 +02:00			`/// \brief Compute a map of integer instructions to their minimum legal type`
			`/// size.`
			`///`
			`/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int`
			`/// type (e.g. i32) whenever arithmetic is performed on them.`
			`///`
			`/// For targets with native i8 or i16 operations, usually InstCombine can shrink`
			`/// the arithmetic type down again. However InstCombine refuses to create`
			`/// illegal types, so for targets without i8 or i16 registers, the lengthening`
			`/// and shrinking remains.`
			`///`
			`/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when`
			`/// their scalar equivalents do not, so during vectorization it is important to`
			`/// remove these lengthens and truncates when deciding the profitability of`
			`/// vectorization.`
			`///`
			`/// This function analyzes the given range of instructions and determines the`
			`/// minimum type size each can be converted to. It attempts to remove or`
			`/// minimize type size changes across each def-use chain, so for example in the`
			`/// following code:`
			`///`
			`/// %1 = load i8, i8*`
			`/// %2 = add i8 %1, 2`
			`/// %3 = load i16, i16*`
			`/// %4 = zext i8 %2 to i32`
			`/// %5 = zext i16 %3 to i32`
			`/// %6 = add i32 %4, %5`
			`/// %7 = trunc i32 %6 to i16`
			`///`
			`/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes`
			`/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.`
			`///`
			`/// If the optional TargetTransformInfo is provided, this function tries harder`
			`/// to do less work by only looking at illegal types.`
[LoopVectorize] Use MapVector rather than DenseMap for MinBWs. The order in which instructions are truncated in truncateToMinimalBitwidths effects code generation. Switch to a map with a determinisic order, since the iteration order over a DenseMap is not defined. This code is not hot, so the difference in container performance isn't interesting. Many thanks to David Blaikie for making me aware of MapVector! Fixes PR25490. Differential Revision: http://reviews.llvm.org/D14981 llvm-svn: 254179 2015-11-26 21:39:51 +01:00			`MapVector<Instruction*, uint64_t>`
[LoopVectorize] Shrink integer operations into the smallest type possible C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int type (e.g. i32) whenever arithmetic is performed on them. For targets with native i8 or i16 operations, usually InstCombine can shrink the arithmetic type down again. However InstCombine refuses to create illegal types, so for targets without i8 or i16 registers, the lengthening and shrinking remains. Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when their scalar equivalents do not, so during vectorization it is important to remove these lengthens and truncates when deciding the profitability of vectorization. The algorithm this uses starts at truncs and icmps, trawling their use-def chains until they terminate or instructions outside the loop are found (or unsafe instructions like inttoptr casts are found). If the use-def chains starting from different root instructions (truncs/icmps) meet, they are unioned. The demanded bits of each node in the graph are ORed together to form an overall mask of the demanded bits in the entire graph. The minimum bitwidth that graph can be truncated to is the bitwidth minus the number of leading zeroes in the overall mask. The intention is that this algorithm should "first do no harm", so it will never insert extra cast instructions. This is why the use-def graphs are unioned, so that subgraphs with different minimum bitwidths do not need casts inserted between them. This algorithm works hard to reduce compile time impact. DemandedBits are only queried if there are extends of illegal types and if a truncate to an illegal type is seen. In the general case, this results in a simple linear scan of the instructions in the loop. No non-noise compile time impact was seen on a clang bootstrap build. llvm-svn: 250032 2015-10-12 14:34:45 +02:00			`computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,`
			`DemandedBits &DB,`
			`const TargetTransformInfo *TTI=nullptr);`
SLPVectorizer: Move propagateMetadata to VectorUtils This will be re-used by the LoadStoreVectorizer. Fix handling of range metadata and testcase by Justin Lebar. llvm-svn: 274281 2016-06-30 23:17:59 +02:00
			`/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,`
			`/// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the`
			`/// elements of VL, compute their "intersection" (i.e., the most generic`
			`/// metadata value that covers all of the individual values), and set I's`
			`/// metadata for M equal to the intersection value.`
			`///`
			`/// This function always sets a (possibly null) value for each K in Kinds.`
			`Instruction propagateMetadata(Instruction I, ArrayRef<Value *> VL);`

Revert r240137 (Fixed/added namespace ending comments using clang-tidy. NFC) Apparently, the style needs to be agreed upon first. llvm-svn: 240390 2015-06-23 11:49:53 +02:00			`} // llvm namespace`
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! llvm-svn: 205855 2014-04-09 16:20:47 +02:00
			`#endif`