1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00
llvm-mirror/include/llvm/IR/Attributes.h

1028 lines
41 KiB
C
Raw Normal View History

//===- llvm/Attributes.h - Container for Attributes -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file contains the simple types necessary to represent the
/// attributes associated with functions and their calls.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_ATTRIBUTES_H
#define LLVM_IR_ATTRIBUTES_H
#include "llvm-c/Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <bitset>
#include <cassert>
#include <cstdint>
#include <map>
#include <string>
#include <utility>
namespace llvm {
class AttrBuilder;
class AttributeImpl;
class AttributeListImpl;
class AttributeSetNode;
template<typename T> struct DenseMapInfo;
class FoldingSetNodeID;
class Function;
class LLVMContext;
class Type;
//===----------------------------------------------------------------------===//
/// \class
/// Functions, function parameters, and return types can have attributes
/// to indicate how they should be treated by optimizations and code
/// generation. This class represents one of those attributes. It's light-weight
/// and should be passed around by-value.
class Attribute {
public:
/// This enumeration lists the attributes that can be associated with
/// parameters, function results, or the function itself.
2012-10-16 08:10:45 +02:00
///
/// Note: The `uwtable' attribute is about the ABI or the user mandating an
/// entry in the unwind table. The `nounwind' attribute is about an exception
/// passing by the function.
2012-10-16 08:10:45 +02:00
///
/// In a theoretical system that uses tables for profiling and SjLj for
/// exceptions, they would be fully independent. In a normal system that uses
/// tables for both, the semantics are:
2012-10-16 08:10:45 +02:00
///
/// nil = Needs an entry because an exception might pass by.
/// nounwind = No need for an entry
/// uwtable = Needs an entry because the ABI says so and because
/// an exception might pass by.
/// uwtable + nounwind = Needs an entry because the ABI says so.
enum AttrKind {
2012-10-15 21:58:25 +02:00
// IR-Level Attributes
None, ///< No attributes have been set
#define GET_ATTR_NAMES
#define ATTRIBUTE_ENUM(ENUM_NAME, OTHER) ENUM_NAME,
#include "llvm/IR/Attributes.inc"
EndAttrKinds, ///< Sentinal value useful for loops
EmptyKey, ///< Use as Empty key for DenseMap of AttrKind
TombstoneKey, ///< Use as Tombstone key for DenseMap of AttrKind
};
2015-09-22 13:14:39 +02:00
private:
AttributeImpl *pImpl = nullptr;
Attribute(AttributeImpl *A) : pImpl(A) {}
2015-09-22 13:14:39 +02:00
public:
Attribute() = default;
//===--------------------------------------------------------------------===//
// Attribute Construction
//===--------------------------------------------------------------------===//
/// Return a uniquified Attribute object.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
static Attribute get(LLVMContext &Context, StringRef Kind,
StringRef Val = StringRef());
static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
/// Return a uniquified Attribute object that has the specific
/// alignment set.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment);
static Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment);
static Attribute getWithDereferenceableBytes(LLVMContext &Context,
uint64_t Bytes);
static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context,
uint64_t Bytes);
static Attribute getWithAllocSizeArgs(LLVMContext &Context,
unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
static Attribute getWithVScaleRangeArgs(LLVMContext &Context,
unsigned MinValue, unsigned MaxValue);
static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
static Attribute getWithStructRetType(LLVMContext &Context, Type *Ty);
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
static Attribute getWithByRefType(LLVMContext &Context, Type *Ty);
static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty);
static Attribute getWithInAllocaType(LLVMContext &Context, Type *Ty);
/// For a typed attribute, return the equivalent attribute with the type
/// changed to \p ReplacementTy.
Attribute getWithNewType(LLVMContext &Context, Type *ReplacementTy) {
assert(isTypeAttribute() && "this requires a typed attribute");
return get(Context, getKindAsEnum(), ReplacementTy);
}
static Attribute::AttrKind getAttrKindFromName(StringRef AttrName);
static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind);
/// Return true if and only if the attribute has an Argument.
static bool doesAttrKindHaveArgument(Attribute::AttrKind AttrKind);
/// Return true if the provided string matches the IR name of an attribute.
/// example: "noalias" return true but not "NoAlias"
static bool isExistingAttribute(StringRef Name);
//===--------------------------------------------------------------------===//
// Attribute Accessors
//===--------------------------------------------------------------------===//
/// Return true if the attribute is an Attribute::AttrKind type.
bool isEnumAttribute() const;
/// Return true if the attribute is an integer attribute.
bool isIntAttribute() const;
/// Return true if the attribute is a string (target-dependent)
/// attribute.
bool isStringAttribute() const;
/// Return true if the attribute is a type attribute.
bool isTypeAttribute() const;
/// Return true if the attribute is any kind of attribute.
bool isValid() const { return pImpl; }
/// Return true if the attribute is present.
bool hasAttribute(AttrKind Val) const;
/// Return true if the target-dependent attribute is present.
bool hasAttribute(StringRef Val) const;
/// Return the attribute's kind as an enum (Attribute::AttrKind). This
/// requires the attribute to be an enum, integer, or type attribute.
Attribute::AttrKind getKindAsEnum() const;
/// Return the attribute's value as an integer. This requires that the
/// attribute be an integer attribute.
uint64_t getValueAsInt() const;
/// Return the attribute's kind as a string. This requires the
/// attribute to be a string attribute.
StringRef getKindAsString() const;
/// Return the attribute's value as a string. This requires the
/// attribute to be a string attribute.
StringRef getValueAsString() const;
/// Return the attribute's value as a Type. This requires the attribute to be
/// a type attribute.
Type *getValueAsType() const;
/// Returns the alignment field of an attribute as a byte alignment
/// value.
MaybeAlign getAlignment() const;
/// Returns the stack alignment field of an attribute as a byte
/// alignment value.
MaybeAlign getStackAlignment() const;
/// Returns the number of dereferenceable bytes from the
/// dereferenceable attribute.
uint64_t getDereferenceableBytes() const;
/// Returns the number of dereferenceable_or_null bytes from the
/// dereferenceable_or_null attribute.
uint64_t getDereferenceableOrNullBytes() const;
/// Returns the argument numbers for the allocsize attribute (or pair(0, 0)
/// if not known).
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
/// Returns the argument numbers for the vscale_range attribute (or pair(0, 0)
/// if not known).
std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
/// The Attribute is converted to a string of equivalent mnemonic. This
/// is, presumably, for writing out the mnemonics for the assembly writer.
std::string getAsString(bool InAttrGrp = false) const;
/// Equality and non-equality operators.
bool operator==(Attribute A) const { return pImpl == A.pImpl; }
bool operator!=(Attribute A) const { return pImpl != A.pImpl; }
/// Less-than operator. Useful for sorting the attributes list.
bool operator<(Attribute A) const;
void Profile(FoldingSetNodeID &ID) const;
/// Return a raw pointer that uniquely identifies this attribute.
void *getRawPointer() const {
return pImpl;
}
/// Get an attribute from a raw pointer created by getRawPointer.
static Attribute fromRawPointer(void *RawPtr) {
return Attribute(reinterpret_cast<AttributeImpl*>(RawPtr));
}
};
// Specialized opaque value conversions.
inline LLVMAttributeRef wrap(Attribute Attr) {
return reinterpret_cast<LLVMAttributeRef>(Attr.getRawPointer());
}
// Specialized opaque value conversions.
inline Attribute unwrap(LLVMAttributeRef Attr) {
return Attribute::fromRawPointer(Attr);
}
//===----------------------------------------------------------------------===//
/// \class
/// This class holds the attributes for a particular argument, parameter,
/// function, or return value. It is an immutable value type that is cheap to
/// copy. Adding and removing enum attributes is intended to be fast, but adding
/// and removing string or integer attributes involves a FoldingSet lookup.
class AttributeSet {
friend AttributeListImpl;
template <typename Ty> friend struct DenseMapInfo;
// TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
// This will allow an efficient implementation of addAttribute and
// removeAttribute for enum attrs.
/// Private implementation pointer.
AttributeSetNode *SetNode = nullptr;
private:
explicit AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {}
public:
/// AttributeSet is a trivially copyable value type.
AttributeSet() = default;
AttributeSet(const AttributeSet &) = default;
~AttributeSet() = default;
static AttributeSet get(LLVMContext &C, const AttrBuilder &B);
static AttributeSet get(LLVMContext &C, ArrayRef<Attribute> Attrs);
bool operator==(const AttributeSet &O) const { return SetNode == O.SetNode; }
bool operator!=(const AttributeSet &O) const { return !(*this == O); }
/// Add an argument attribute. Returns a new set because attribute sets are
/// immutable.
LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C,
Attribute::AttrKind Kind) const;
/// Add a target-dependent attribute. Returns a new set because attribute sets
/// are immutable.
LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
StringRef Value = StringRef()) const;
/// Add attributes to the attribute set. Returns a new set because attribute
/// sets are immutable.
LLVM_NODISCARD AttributeSet addAttributes(LLVMContext &C,
AttributeSet AS) const;
/// Remove the specified attribute from this set. Returns a new set because
/// attribute sets are immutable.
LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
Attribute::AttrKind Kind) const;
/// Remove the specified attribute from this set. Returns a new set because
/// attribute sets are immutable.
LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
StringRef Kind) const;
/// Remove the specified attributes from this set. Returns a new set because
/// attribute sets are immutable.
LLVM_NODISCARD AttributeSet
removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const;
/// Return the number of attributes in this set.
unsigned getNumAttributes() const;
/// Return true if attributes exists in this set.
bool hasAttributes() const { return SetNode != nullptr; }
/// Return true if the attribute exists in this set.
bool hasAttribute(Attribute::AttrKind Kind) const;
/// Return true if the attribute exists in this set.
bool hasAttribute(StringRef Kind) const;
/// Return the attribute object.
Attribute getAttribute(Attribute::AttrKind Kind) const;
/// Return the target-dependent attribute object.
Attribute getAttribute(StringRef Kind) const;
MaybeAlign getAlignment() const;
MaybeAlign getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
Type *getByValType() const;
Type *getStructRetType() const;
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
Type *getByRefType() const;
Type *getPreallocatedType() const;
Type *getInAllocaType() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
std::string getAsString(bool InAttrGrp = false) const;
using iterator = const Attribute *;
iterator begin() const;
iterator end() const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const;
#endif
};
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeSet.
template <> struct DenseMapInfo<AttributeSet> {
static AttributeSet getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
}
static AttributeSet getTombstoneKey() {
auto Val = static_cast<uintptr_t>(-2);
Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
}
static unsigned getHashValue(AttributeSet AS) {
return (unsigned((uintptr_t)AS.SetNode) >> 4) ^
(unsigned((uintptr_t)AS.SetNode) >> 9);
}
static bool isEqual(AttributeSet LHS, AttributeSet RHS) { return LHS == RHS; }
};
//===----------------------------------------------------------------------===//
/// \class
/// This class holds the attributes for a function, its return value, and
2013-02-06 21:05:44 +01:00
/// its parameters. You access the attributes for each of them via an index into
/// the AttributeList object. The function attributes are at index
/// `AttributeList::FunctionIndex', the return value is at index
/// `AttributeList::ReturnIndex', and the attributes for the parameters start at
/// index `AttributeList::FirstArgIndex'.
class AttributeList {
public:
enum AttrIndex : unsigned {
ReturnIndex = 0U,
FunctionIndex = ~0U,
FirstArgIndex = 1,
};
2015-09-22 13:14:39 +02:00
private:
friend class AttrBuilder;
friend class AttributeListImpl;
friend class AttributeSet;
friend class AttributeSetNode;
template <typename Ty> friend struct DenseMapInfo;
/// The attributes that we are managing. This can be null to represent
/// the empty attributes list.
AttributeListImpl *pImpl = nullptr;
public:
/// Create an AttributeList with the specified parameters in it.
static AttributeList get(LLVMContext &C,
ArrayRef<std::pair<unsigned, Attribute>> Attrs);
static AttributeList get(LLVMContext &C,
ArrayRef<std::pair<unsigned, AttributeSet>> Attrs);
/// Create an AttributeList from attribute sets for a function, its
/// return value, and all of its arguments.
static AttributeList get(LLVMContext &C, AttributeSet FnAttrs,
AttributeSet RetAttrs,
ArrayRef<AttributeSet> ArgAttrs);
private:
explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {}
2015-09-22 13:14:39 +02:00
static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
AttributeList setAttributes(LLVMContext &C, unsigned Index,
AttributeSet Attrs) const;
public:
AttributeList() = default;
//===--------------------------------------------------------------------===//
// AttributeList Construction and Mutation
//===--------------------------------------------------------------------===//
/// Return an AttributeList with the specified parameters in it.
static AttributeList get(LLVMContext &C, ArrayRef<AttributeList> Attrs);
static AttributeList get(LLVMContext &C, unsigned Index,
ArrayRef<Attribute::AttrKind> Kinds);
static AttributeList get(LLVMContext &C, unsigned Index,
ArrayRef<Attribute::AttrKind> Kinds,
ArrayRef<uint64_t> Values);
static AttributeList get(LLVMContext &C, unsigned Index,
ArrayRef<StringRef> Kind);
static AttributeList get(LLVMContext &C, unsigned Index,
const AttrBuilder &B);
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Kind) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
StringRef Value = StringRef()) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
Attribute A) const;
/// Add attributes to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
const AttrBuilder &B) const;
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
LLVM_NODISCARD AttributeList addParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
return addAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
LLVM_NODISCARD AttributeList
addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
StringRef Value = StringRef()) const {
return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
}
/// Add an attribute to the attribute list at the given arg indices. Returns a
/// new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C,
ArrayRef<unsigned> ArgNos,
Attribute A) const;
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
unsigned ArgNo,
const AttrBuilder &B) const {
return addAttributes(C, ArgNo + FirstArgIndex, B);
}
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Kind) const;
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
StringRef Kind) const;
/// Remove the specified attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeAttributes(
LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
/// Remove all attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
unsigned Index) const;
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
unsigned ArgNo,
StringRef Kind) const {
return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(
LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
}
/// Remove noundef attribute and other attributes that imply undefined
/// behavior if a `undef` or `poison` value is passed from this attribute
/// list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
removeParamUndefImplyingAttributes(LLVMContext &C, unsigned ArgNo) const;
/// Remove all attributes at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
unsigned ArgNo) const {
return removeAttributes(C, ArgNo + FirstArgIndex);
}
/// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih
/// \p ReplacementTy, preserving all other attributes.
LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C,
unsigned ArgNo,
Attribute::AttrKind Kind,
Type *ReplacementTy) const {
Attribute Attr = getAttribute(ArgNo, Kind);
auto Attrs = removeAttribute(C, ArgNo, Kind);
return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy));
}
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
unsigned Index,
uint64_t Bytes) const;
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// arg index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
}
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
LLVMContext &C, unsigned Index, uint64_t Bytes) const;
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given arg index. Returns a new list because attribute lists are
/// immutable.
LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
}
/// Add the allocsize attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
/// Add the allocsize attribute to the attribute set at the given arg index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg) {
return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
}
/// Add the vscale_range attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addVScaleRangeAttr(LLVMContext &C,
unsigned Index,
unsigned MinValue,
unsigned MaxValue);
//===--------------------------------------------------------------------===//
// AttributeList Accessors
//===--------------------------------------------------------------------===//
/// The attributes for the specified index are returned.
AttributeSet getAttributes(unsigned Index) const;
/// The attributes for the argument or parameter at the given index are
/// returned.
AttributeSet getParamAttributes(unsigned ArgNo) const;
/// The attributes for the ret value are returned.
AttributeSet getRetAttributes() const;
/// The function attributes are returned.
AttributeSet getFnAttributes() const;
/// Return true if the attribute exists at the given index.
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
/// Return true if the attribute exists at the given index.
bool hasAttribute(unsigned Index, StringRef Kind) const;
/// Return true if attribute exists at the given index.
bool hasAttributes(unsigned Index) const;
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
return hasAttribute(ArgNo + FirstArgIndex, Kind);
}
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
return hasAttribute(ArgNo + FirstArgIndex, Kind);
}
/// Return true if attributes exists for the given argument
bool hasParamAttrs(unsigned ArgNo) const {
return hasAttributes(ArgNo + FirstArgIndex);
}
/// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
/// may be faster.
bool hasFnAttribute(Attribute::AttrKind Kind) const;
/// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
/// may be faster.
bool hasFnAttribute(StringRef Kind) const;
/// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
/// Return true if the specified attribute is set for at least one
/// parameter or for the return value. If Index is not nullptr, the index
/// of a parameter with the specified attribute is provided.
bool hasAttrSomewhere(Attribute::AttrKind Kind,
unsigned *Index = nullptr) const;
/// Return the attribute object that exists at the given index.
Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
/// Return the attribute object that exists at the given index.
Attribute getAttribute(unsigned Index, StringRef Kind) const;
/// Return the attribute object that exists at the arg index.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
return getAttribute(ArgNo + FirstArgIndex, Kind);
}
/// Return the attribute object that exists at the given index.
Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
return getAttribute(ArgNo + FirstArgIndex, Kind);
}
/// Return the alignment of the return value.
MaybeAlign getRetAlignment() const;
/// Return the alignment for the specified function parameter.
MaybeAlign getParamAlignment(unsigned ArgNo) const;
/// Return the byval type for the specified function parameter.
Type *getParamByValType(unsigned ArgNo) const;
/// Return the sret type for the specified function parameter.
Type *getParamStructRetType(unsigned ArgNo) const;
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
/// Return the byref type for the specified function parameter.
Type *getParamByRefType(unsigned ArgNo) const;
Reland [X86] Codegen for preallocated See https://reviews.llvm.org/D74651 for the preallocated IR constructs and LangRef changes. In X86TargetLowering::LowerCall(), if a call is preallocated, record each argument's offset from the stack pointer and the total stack adjustment. Associate the call Value with an integer index. Store the info in X86MachineFunctionInfo with the integer index as the key. This adds two new target independent ISDOpcodes and two new target dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}. The setup ISelDAG node takes in a chain and outputs a chain and a SrcValue of the preallocated call Value. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an %esp adjustment, the exact amount determined by looking in X86MachineFunctionInfo with the integer index key. The arg ISelDAG node takes in a chain, a SrcValue of the preallocated call Value, and the arg index int constant. It produces a chain and the pointer fo the arg. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a lea of the stack pointer plus an offset determined by looking in X86MachineFunctionInfo with the integer index key. Force any function containing a preallocated call to use the frame pointer. Does not yet handle a setup without a call, or a conditional call. Does not yet handle musttail. That requires a LangRef change first. Tried to look at all references to inalloca and see if they apply to preallocated. I've made preallocated versions of tests testing inalloca whenever possible and when they make sense (e.g. not alloca related, inalloca edge cases). Aside from the tests added here, I checked that this codegen produces correct code for something like ``` struct A { A(); A(A&&); ~A(); }; void bar() { foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8); } ``` by replacing the inalloca version of the .ll file with the appropriate preallocated code. Running the executable produces the same results as using the current inalloca implementation. Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland. Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77689
2020-03-16 20:32:36 +01:00
/// Return the preallocated type for the specified function parameter.
Type *getParamPreallocatedType(unsigned ArgNo) const;
/// Return the inalloca type for the specified function parameter.
Type *getParamInAllocaType(unsigned ArgNo) const;
/// Get the stack alignment.
MaybeAlign getStackAlignment(unsigned Index) const;
/// Get the number of dereferenceable bytes (or zero if unknown).
uint64_t getDereferenceableBytes(unsigned Index) const;
/// Get the number of dereferenceable bytes (or zero if unknown) of an
/// arg.
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
return getDereferenceableBytes(ArgNo + FirstArgIndex);
}
/// Get the number of dereferenceable_or_null bytes (or zero if
/// unknown).
uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
/// Get the number of dereferenceable_or_null bytes (or zero if
/// unknown) of an arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
}
/// Get the allocsize argument numbers (or pair(0, 0) if unknown).
std::pair<unsigned, Optional<unsigned>>
getAllocSizeArgs(unsigned Index) const;
/// Get the vscale_range argument numbers (or pair(0, 0) if unknown).
std::pair<unsigned, unsigned> getVScaleRangeArgs(unsigned Index) const;
/// Return the attributes at the index as a string.
std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
//===--------------------------------------------------------------------===//
// AttributeList Introspection
//===--------------------------------------------------------------------===//
using iterator = const AttributeSet *;
iterator begin() const;
iterator end() const;
unsigned getNumAttrSets() const;
/// Use these to iterate over the valid attribute indices.
unsigned index_begin() const { return AttributeList::FunctionIndex; }
unsigned index_end() const { return getNumAttrSets() - 1; }
/// operator==/!= - Provide equality predicates.
bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; }
/// Return a raw pointer that uniquely identifies this attribute list.
void *getRawPointer() const {
return pImpl;
}
/// Return true if there are no attributes.
bool isEmpty() const { return pImpl == nullptr; }
2012-10-16 08:10:45 +02:00
void dump() const;
};
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeList.
template <> struct DenseMapInfo<AttributeList> {
static AttributeList getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
}
static AttributeList getTombstoneKey() {
auto Val = static_cast<uintptr_t>(-2);
Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
}
static unsigned getHashValue(AttributeList AS) {
return (unsigned((uintptr_t)AS.pImpl) >> 4) ^
(unsigned((uintptr_t)AS.pImpl) >> 9);
}
static bool isEqual(AttributeList LHS, AttributeList RHS) {
return LHS == RHS;
}
};
//===----------------------------------------------------------------------===//
/// \class
/// This class is used in conjunction with the Attribute::get method to
/// create an Attribute object. The object itself is uniquified. The Builder's
/// value, however, is not. So this can be used as a quick way to test for
/// equality, presence of attributes, etc.
class AttrBuilder {
std::bitset<Attribute::EndAttrKinds> Attrs;
std::map<SmallString<32>, SmallString<32>, std::less<>> TargetDepAttrs;
MaybeAlign Alignment;
MaybeAlign StackAlignment;
uint64_t DerefBytes = 0;
uint64_t DerefOrNullBytes = 0;
uint64_t AllocSizeArgs = 0;
uint64_t VScaleRangeArgs = 0;
Type *ByValType = nullptr;
Type *StructRetType = nullptr;
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
Type *ByRefType = nullptr;
Type *PreallocatedType = nullptr;
Type *InAllocaType = nullptr;
2015-09-22 13:14:39 +02:00
public:
AttrBuilder() = default;
AttrBuilder(const Attribute &A) {
addAttribute(A);
}
AttrBuilder(AttributeList AS, unsigned Idx);
AttrBuilder(AttributeSet AS);
void clear();
/// Add an attribute to the builder.
AttrBuilder &addAttribute(Attribute::AttrKind Val) {
assert((unsigned)Val < Attribute::EndAttrKinds &&
"Attribute out of range!");
assert(!Attribute::doesAttrKindHaveArgument(Val) &&
"Adding integer attribute without adding a value!");
Attrs[Val] = true;
return *this;
}
/// Add the Attribute object to the builder.
AttrBuilder &addAttribute(Attribute A);
/// Add the target-dependent attribute to the builder.
AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef());
/// Remove an attribute from the builder.
AttrBuilder &removeAttribute(Attribute::AttrKind Val);
/// Remove the attributes from the builder.
AttrBuilder &removeAttributes(AttributeList A, uint64_t WithoutIndex);
/// Remove the target-dependent attribute to the builder.
AttrBuilder &removeAttribute(StringRef A);
/// Add the attributes from the builder.
AttrBuilder &merge(const AttrBuilder &B);
/// Remove the attributes from the builder.
AttrBuilder &remove(const AttrBuilder &B);
/// Return true if the builder has any attribute that's in the
/// specified builder.
bool overlaps(const AttrBuilder &B) const;
/// Return true if the builder has the specified attribute.
bool contains(Attribute::AttrKind A) const {
assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
return Attrs[A];
}
/// Return true if the builder has the specified target-dependent
/// attribute.
bool contains(StringRef A) const;
/// Return true if the builder has IR-level attributes.
bool hasAttributes() const;
/// Return true if the builder has any attribute that's in the
/// specified attribute.
bool hasAttributes(AttributeList A, uint64_t Index) const;
/// Return true if the builder has an alignment attribute.
bool hasAlignmentAttr() const;
/// Retrieve the alignment attribute, if it exists.
MaybeAlign getAlignment() const { return Alignment; }
/// Retrieve the stack alignment attribute, if it exists.
MaybeAlign getStackAlignment() const { return StackAlignment; }
/// Retrieve the number of dereferenceable bytes, if the
/// dereferenceable attribute exists (zero is returned otherwise).
uint64_t getDereferenceableBytes() const { return DerefBytes; }
/// Retrieve the number of dereferenceable_or_null bytes, if the
/// dereferenceable_or_null attribute exists (zero is returned otherwise).
uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
/// Retrieve the byval type.
Type *getByValType() const { return ByValType; }
/// Retrieve the sret type.
Type *getStructRetType() const { return StructRetType; }
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
/// Retrieve the byref type.
Type *getByRefType() const { return ByRefType; }
/// Retrieve the preallocated type.
Type *getPreallocatedType() const { return PreallocatedType; }
/// Retrieve the inalloca type.
Type *getInAllocaType() const { return InAllocaType; }
/// Retrieve the allocsize args, if the allocsize attribute exists. If it
/// doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
/// Retrieve the vscale_range args, if the vscale_range attribute exists. If
/// it doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
/// This turns an alignment into the form used internally in Attribute.
/// This call has no effect if Align is not set.
AttrBuilder &addAlignmentAttr(MaybeAlign Align);
/// This turns an int alignment (which must be a power of 2) into the
/// form used internally in Attribute.
/// This call has no effect if Align is 0.
/// Deprecated, use the version using a MaybeAlign.
inline AttrBuilder &addAlignmentAttr(unsigned Align) {
return addAlignmentAttr(MaybeAlign(Align));
}
/// This turns a stack alignment into the form used internally in Attribute.
/// This call has no effect if Align is not set.
AttrBuilder &addStackAlignmentAttr(MaybeAlign Align);
/// This turns an int stack alignment (which must be a power of 2) into
/// the form used internally in Attribute.
/// This call has no effect if Align is 0.
/// Deprecated, use the version using a MaybeAlign.
inline AttrBuilder &addStackAlignmentAttr(unsigned Align) {
return addStackAlignmentAttr(MaybeAlign(Align));
}
/// This turns the number of dereferenceable bytes into the form used
/// internally in Attribute.
AttrBuilder &addDereferenceableAttr(uint64_t Bytes);
/// This turns the number of dereferenceable_or_null bytes into the
/// form used internally in Attribute.
AttrBuilder &addDereferenceableOrNullAttr(uint64_t Bytes);
/// This turns one (or two) ints into the form used internally in Attribute.
AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
/// This turns two ints into the form used internally in Attribute.
AttrBuilder &addVScaleRangeAttr(unsigned MinValue, unsigned MaxValue);
/// This turns a byval type into the form used internally in Attribute.
AttrBuilder &addByValAttr(Type *Ty);
/// This turns a sret type into the form used internally in Attribute.
AttrBuilder &addStructRetAttr(Type *Ty);
IR: Define byref parameter attribute This allows tracking the in-memory type of a pointer argument to a function for ABI purposes. This is essentially a stripped down version of byval to remove some of the stack-copy implications in its definition. This includes the base IR changes, and some tests for places where it should be treated similarly to byval. Codegen support will be in a future patch. My original attempt at solving some of these problems was to repurpose byval with a different address space from the stack. However, it is technically permitted for the callee to introduce a write to the argument, although nothing does this in reality. There is also talk of removing and replacing the byval attribute, so a new attribute would need to take its place anyway. This is intended avoid some optimization issues with the current handling of aggregate arguments, as well as fixes inflexibilty in how frontends can specify the kernel ABI. The most honest representation of the amdgpu_kernel convention is to expose all kernel arguments as loads from constant memory. Today, these are raw, SSA Argument values and codegen is responsible for turning these into loads. Background: There currently isn't a satisfactory way to represent how arguments for the amdgpu_kernel calling convention are passed. In reality, arguments are passed in a single, flat, constant memory buffer implicitly passed to the function. It is also illegal to call this function in the IR, and this is only ever invoked by a driver of some kind. It does not make sense to have a stack passed parameter in this context as is implied by byval. It is never valid to write to the kernel arguments, as this would corrupt the inputs seen by other dispatches of the kernel. These argumets are also not in the same address space as the stack, so a copy is needed to an alloca. From a source C-like language, the kernel parameters are invisible. Semantically, a copy is always required from the constant argument memory to a mutable variable. The current clang calling convention lowering emits raw values, including aggregates into the function argument list, since using byval would not make sense. This has some unfortunate consequences for the optimizer. In the aggregate case, we end up with an aggregate store to alloca, which both SROA and instcombine turn into a store of each aggregate field. The optimizer never pieces this back together to see that this is really just a copy from constant memory, so we end up stuck with expensive stack usage. This also means the backend dictates the alignment of arguments, and arbitrarily picks the LLVM IR ABI type alignment. By allowing an explicit alignment, frontends can make better decisions. For example, there's real no advantage to an aligment higher than 4, so a frontend could choose to compact the argument layout. Similarly, there is a high penalty to using an alignment lower than 4, so a frontend could opt into more padding for small arguments. Another design consideration is when it is appropriate to expose the fact that these arguments are all really passed in adjacent memory. Currently we have a late IR optimization pass in codegen to rewrite the kernel argument values into explicit loads to enable vectorization. In most programs, unrelated argument loads can be merged together. However, exposing this property directly from the frontend has some disadvantages. We still need a way to track the original argument sizes and alignments to report to the driver. I find using some side-channel, metadata mechanism to track this unappealing. If the kernel arguments were exposed as a single buffer to begin with, alias analysis would be unaware that the padding bits betewen arguments are meaningless. Another family of problems is there are still some gaps in replacing all of the available parameter attributes with metadata equivalents once lowered to loads. The immediate plan is to start using this new attribute to handle all aggregate argumets for kernels. Long term, it makes sense to migrate all kernel arguments, including scalars, to be passed indirectly in the same manner. Additional context is in D79744.
2020-06-05 22:58:47 +02:00
/// This turns a byref type into the form used internally in Attribute.
AttrBuilder &addByRefAttr(Type *Ty);
/// This turns a preallocated type into the form used internally in Attribute.
AttrBuilder &addPreallocatedAttr(Type *Ty);
/// This turns an inalloca type into the form used internally in Attribute.
AttrBuilder &addInAllocaAttr(Type *Ty);
/// Add an allocsize attribute, using the representation returned by
/// Attribute.getIntValue().
AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
/// Add a vscale_range attribute, using the representation returned by
/// Attribute.getIntValue().
AttrBuilder &addVScaleRangeAttrFromRawRepr(uint64_t RawVScaleRangeRepr);
/// Return true if the builder contains no target-independent
/// attributes.
bool empty() const { return Attrs.none(); }
// Iterators for target-dependent attributes.
using td_type = decltype(TargetDepAttrs)::value_type;
using td_iterator = decltype(TargetDepAttrs)::iterator;
using td_const_iterator = decltype(TargetDepAttrs)::const_iterator;
using td_range = iterator_range<td_iterator>;
using td_const_range = iterator_range<td_const_iterator>;
td_iterator td_begin() { return TargetDepAttrs.begin(); }
td_iterator td_end() { return TargetDepAttrs.end(); }
td_const_iterator td_begin() const { return TargetDepAttrs.begin(); }
td_const_iterator td_end() const { return TargetDepAttrs.end(); }
td_range td_attrs() { return td_range(td_begin(), td_end()); }
td_const_range td_attrs() const {
return td_const_range(td_begin(), td_end());
}
bool td_empty() const { return TargetDepAttrs.empty(); }
Make LLVM build in C++20 mode Part of the <=> changes in C++20 make certain patterns of writing equality operators ambiguous with themselves (sorry!). This patch goes through and adjusts all the comparison operators such that they should work in both C++17 and C++20 modes. It also makes two other small C++20-specific changes (adding a constructor to a type that cases to be an aggregate, and adding casts from u8 literals which no longer have type const char*). There were four categories of errors that this review fixes. Here are canonical examples of them, ordered from most to least common: // 1) Missing const namespace missing_const { struct A { #ifndef FIXED bool operator==(A const&); #else bool operator==(A const&) const; #endif }; bool a = A{} == A{}; // error } // 2) Type mismatch on CRTP namespace crtp_mismatch { template <typename Derived> struct Base { #ifndef FIXED bool operator==(Derived const&) const; #else // in one case changed to taking Base const& friend bool operator==(Derived const&, Derived const&); #endif }; struct D : Base<D> { }; bool b = D{} == D{}; // error } // 3) iterator/const_iterator with only mixed comparison namespace iter_const_iter { template <bool Const> struct iterator { using const_iterator = iterator<true>; iterator(); template <bool B, std::enable_if_t<(Const && !B), int> = 0> iterator(iterator<B> const&); #ifndef FIXED bool operator==(const_iterator const&) const; #else friend bool operator==(iterator const&, iterator const&); #endif }; bool c = iterator<false>{} == iterator<false>{} // error || iterator<false>{} == iterator<true>{} || iterator<true>{} == iterator<false>{} || iterator<true>{} == iterator<true>{}; } // 4) Same-type comparison but only have mixed-type operator namespace ambiguous_choice { enum Color { Red }; struct C { C(); C(Color); operator Color() const; bool operator==(Color) const; friend bool operator==(C, C); }; bool c = C{} == C{}; // error bool d = C{} == Red; } Differential revision: https://reviews.llvm.org/D78938
2020-12-17 11:41:35 +01:00
bool operator==(const AttrBuilder &B) const;
bool operator!=(const AttrBuilder &B) const { return !(*this == B); }
};
namespace AttributeFuncs {
/// Which attributes cannot be applied to a type.
AttrBuilder typeIncompatible(Type *Ty);
/// \returns Return true if the two functions have compatible target-independent
/// attributes for inlining purposes.
bool areInlineCompatible(const Function &Caller, const Function &Callee);
/// Checks if there are any incompatible function attributes between
/// \p A and \p B.
///
/// \param [in] A - The first function to be compared with.
/// \param [in] B - The second function to be compared with.
/// \returns true if the functions have compatible attributes.
bool areOutlineCompatible(const Function &A, const Function &B);
/// Merge caller's and callee's attributes.
void mergeAttributesForInlining(Function &Caller, const Function &Callee);
/// Merges the functions attributes from \p ToMerge into function \p Base.
///
/// \param [in,out] Base - The function being merged into.
/// \param [in] ToMerge - The function to merge attributes from.
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge);
} // end namespace AttributeFuncs
} // end namespace llvm
#endif // LLVM_IR_ATTRIBUTES_H