2017-05-11 01:41:30 +02:00
|
|
|
//===- Function.cpp - Implement the Global object classes -----------------===//
|
2005-04-22 01:48:37 +02:00
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2005-04-22 01:48:37 +02:00
|
|
|
//
|
2003-10-20 21:43:21 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
2001-06-06 22:29:01 +02:00
|
|
|
//
|
2013-01-02 10:10:48 +01:00
|
|
|
// This file implements the Function class for the IR library.
|
2001-06-06 22:29:01 +02:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/IR/Function.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "SymbolTableListTraitsImpl.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/DenseSet.h"
|
|
|
|
#include "llvm/ADT/None.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2020-07-08 07:43:24 +02:00
|
|
|
#include "llvm/IR/AbstractCallSite.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/IR/Argument.h"
|
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
2015-05-13 17:13:45 +02:00
|
|
|
#include "llvm/IR/Constants.h"
|
2013-01-02 12:36:10 +01:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2014-03-04 11:30:26 +01:00
|
|
|
#include "llvm/IR/InstIterator.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
2021-02-04 23:38:40 +01:00
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2019-12-11 16:55:26 +01:00
|
|
|
#include "llvm/IR/IntrinsicsAArch64.h"
|
|
|
|
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
|
|
|
#include "llvm/IR/IntrinsicsARM.h"
|
|
|
|
#include "llvm/IR/IntrinsicsBPF.h"
|
|
|
|
#include "llvm/IR/IntrinsicsHexagon.h"
|
|
|
|
#include "llvm/IR/IntrinsicsMips.h"
|
|
|
|
#include "llvm/IR/IntrinsicsNVPTX.h"
|
|
|
|
#include "llvm/IR/IntrinsicsPowerPC.h"
|
|
|
|
#include "llvm/IR/IntrinsicsR600.h"
|
|
|
|
#include "llvm/IR/IntrinsicsRISCV.h"
|
|
|
|
#include "llvm/IR/IntrinsicsS390.h"
|
2020-11-09 22:50:15 +01:00
|
|
|
#include "llvm/IR/IntrinsicsVE.h"
|
2019-12-11 16:55:26 +01:00
|
|
|
#include "llvm/IR/IntrinsicsWebAssembly.h"
|
|
|
|
#include "llvm/IR/IntrinsicsX86.h"
|
|
|
|
#include "llvm/IR/IntrinsicsXCore.h"
|
2013-01-02 12:36:10 +01:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2015-05-13 17:13:45 +02:00
|
|
|
#include "llvm/IR/MDBuilder.h"
|
|
|
|
#include "llvm/IR/Metadata.h"
|
2013-01-02 12:36:10 +01:00
|
|
|
#include "llvm/IR/Module.h"
|
2021-02-05 00:35:43 +01:00
|
|
|
#include "llvm/IR/Operator.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/IR/SymbolTableListTraits.h"
|
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/Use.h"
|
|
|
|
#include "llvm/IR/User.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
|
|
|
#include "llvm/IR/ValueSymbolTable.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
2021-05-27 06:01:20 +02:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2017-05-11 01:41:30 +02:00
|
|
|
#include "llvm/Support/Compiler.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
|
|
|
2003-11-21 21:23:48 +01:00
|
|
|
using namespace llvm;
|
2018-01-17 23:24:23 +01:00
|
|
|
using ProfileCount = Function::ProfileCount;
|
2003-11-11 23:41:34 +01:00
|
|
|
|
2002-06-25 18:13:24 +02:00
|
|
|
// Explicit instantiations of SymbolTableListTraits since some of the methods
|
|
|
|
// are not in the public header file...
|
2015-10-07 22:05:10 +02:00
|
|
|
template class llvm::SymbolTableListTraits<BasicBlock>;
|
2001-06-06 22:29:01 +02:00
|
|
|
|
2021-05-27 06:01:20 +02:00
|
|
|
static cl::opt<unsigned> NonGlobalValueMaxNameSize(
|
|
|
|
"non-global-value-max-name-size", cl::Hidden, cl::init(1024),
|
|
|
|
cl::desc("Maximum size for the name of non-global values."));
|
|
|
|
|
2002-04-09 21:39:35 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Argument Implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-03-17 18:16:39 +01:00
|
|
|
Argument::Argument(Type *Ty, const Twine &Name, Function *Par, unsigned ArgNo)
|
|
|
|
: Value(Ty, Value::ArgumentVal), Parent(Par), ArgNo(ArgNo) {
|
2007-02-12 06:18:08 +01:00
|
|
|
setName(Name);
|
2002-09-06 23:33:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void Argument::setParent(Function *parent) {
|
|
|
|
Parent = parent;
|
|
|
|
}
|
|
|
|
|
2021-01-13 03:43:38 +01:00
|
|
|
bool Argument::hasNonNullAttr(bool AllowUndefOrPoison) const {
|
2014-05-20 03:23:40 +02:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2021-01-13 03:43:38 +01:00
|
|
|
if (getParent()->hasParamAttribute(getArgNo(), Attribute::NonNull) &&
|
|
|
|
(AllowUndefOrPoison ||
|
|
|
|
getParent()->hasParamAttribute(getArgNo(), Attribute::NoUndef)))
|
2014-07-18 17:51:28 +02:00
|
|
|
return true;
|
|
|
|
else if (getDereferenceableBytes() > 0 &&
|
llvm: Add support for "-fno-delete-null-pointer-checks"
Summary:
Support for this option is needed for building Linux kernel.
This is a very frequently requested feature by kernel developers.
More details : https://lkml.org/lkml/2018/4/4/601
GCC option description for -fdelete-null-pointer-checks:
This Assume that programs cannot safely dereference null pointers,
and that no code or data element resides at address zero.
-fno-delete-null-pointer-checks is the inverse of this implying that
null pointer dereferencing is not undefined.
This feature is implemented in LLVM IR in this CL as the function attribute
"null-pointer-is-valid"="true" in IR (Under review at D47894).
The CL updates several passes that assumed null pointer dereferencing is
undefined to not optimize when the "null-pointer-is-valid"="true"
attribute is present.
Reviewers: t.p.northover, efriedma, jyknight, chandlerc, rnk, srhines, void, george.burgess.iv
Reviewed By: efriedma, george.burgess.iv
Subscribers: eraman, haicheng, george.burgess.iv, drinkcat, theraven, reames, sanjoy, xbolva00, llvm-commits
Differential Revision: https://reviews.llvm.org/D47895
llvm-svn: 336613
2018-07-10 00:27:23 +02:00
|
|
|
!NullPointerIsDefined(getParent(),
|
|
|
|
getType()->getPointerAddressSpace()))
|
2014-07-18 17:51:28 +02:00
|
|
|
return true;
|
|
|
|
return false;
|
2014-05-20 03:23:40 +02:00
|
|
|
}
|
|
|
|
|
2008-01-24 18:47:11 +01:00
|
|
|
bool Argument::hasByValAttr() const {
|
2010-02-16 12:11:14 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::ByVal);
|
2008-01-24 18:47:11 +01:00
|
|
|
}
|
|
|
|
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
bool Argument::hasByRefAttr() const {
|
|
|
|
if (!getType()->isPointerTy())
|
|
|
|
return false;
|
|
|
|
return hasAttribute(Attribute::ByRef);
|
|
|
|
}
|
|
|
|
|
2016-03-29 19:37:21 +02:00
|
|
|
bool Argument::hasSwiftSelfAttr() const {
|
2017-04-14 01:12:13 +02:00
|
|
|
return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftSelf);
|
2016-03-29 19:37:21 +02:00
|
|
|
}
|
|
|
|
|
2016-04-01 23:41:15 +02:00
|
|
|
bool Argument::hasSwiftErrorAttr() const {
|
2017-04-14 01:12:13 +02:00
|
|
|
return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftError);
|
2016-04-01 23:41:15 +02:00
|
|
|
}
|
|
|
|
|
2013-12-19 03:14:12 +01:00
|
|
|
bool Argument::hasInAllocaAttr() const {
|
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::InAlloca);
|
2013-12-19 03:14:12 +01:00
|
|
|
}
|
|
|
|
|
Reland [X86] Codegen for preallocated
See https://reviews.llvm.org/D74651 for the preallocated IR constructs
and LangRef changes.
In X86TargetLowering::LowerCall(), if a call is preallocated, record
each argument's offset from the stack pointer and the total stack
adjustment. Associate the call Value with an integer index. Store the
info in X86MachineFunctionInfo with the integer index as the key.
This adds two new target independent ISDOpcodes and two new target
dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}.
The setup ISelDAG node takes in a chain and outputs a chain and a
SrcValue of the preallocated call Value. It is lowered to a target
dependent node with the SrcValue replaced with the integer index key by
looking in X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an
%esp adjustment, the exact amount determined by looking in
X86MachineFunctionInfo with the integer index key.
The arg ISelDAG node takes in a chain, a SrcValue of the preallocated
call Value, and the arg index int constant. It produces a chain and the
pointer fo the arg. It is lowered to a target dependent node with the
SrcValue replaced with the integer index key by looking in
X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a
lea of the stack pointer plus an offset determined by looking in
X86MachineFunctionInfo with the integer index key.
Force any function containing a preallocated call to use the frame
pointer.
Does not yet handle a setup without a call, or a conditional call.
Does not yet handle musttail. That requires a LangRef change first.
Tried to look at all references to inalloca and see if they apply to
preallocated. I've made preallocated versions of tests testing inalloca
whenever possible and when they make sense (e.g. not alloca related,
inalloca edge cases).
Aside from the tests added here, I checked that this codegen produces
correct code for something like
```
struct A {
A();
A(A&&);
~A();
};
void bar() {
foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8);
}
```
by replacing the inalloca version of the .ll file with the appropriate
preallocated code. Running the executable produces the same results as
using the current inalloca implementation.
Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland.
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77689
2020-03-16 20:32:36 +01:00
|
|
|
bool Argument::hasPreallocatedAttr() const {
|
|
|
|
if (!getType()->isPointerTy())
|
|
|
|
return false;
|
|
|
|
return hasAttribute(Attribute::Preallocated);
|
|
|
|
}
|
|
|
|
|
2020-06-29 21:13:32 +02:00
|
|
|
bool Argument::hasPassPointeeByValueCopyAttr() const {
|
2014-01-18 00:58:17 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList Attrs = getParent()->getAttributes();
|
2017-04-14 01:12:13 +02:00
|
|
|
return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
|
2020-04-30 03:50:17 +02:00
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
|
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated);
|
2014-01-18 00:58:17 +01:00
|
|
|
}
|
|
|
|
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
bool Argument::hasPointeeInMemoryValueAttr() const {
|
|
|
|
if (!getType()->isPointerTy())
|
|
|
|
return false;
|
|
|
|
AttributeList Attrs = getParent()->getAttributes();
|
|
|
|
return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
|
2020-09-23 17:03:38 +02:00
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::StructRet) ||
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
|
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated) ||
|
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::ByRef);
|
|
|
|
}
|
2020-06-26 17:36:22 +02:00
|
|
|
|
2020-09-23 17:03:38 +02:00
|
|
|
/// For a byval, sret, inalloca, or preallocated parameter, get the in-memory
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
/// parameter type.
|
|
|
|
static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
|
2020-06-26 17:36:22 +02:00
|
|
|
// FIXME: All the type carrying attributes are mutually exclusive, so there
|
|
|
|
// should be a single query to get the stored type that handles any of them.
|
|
|
|
if (Type *ByValTy = ParamAttrs.getByValType())
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
return ByValTy;
|
|
|
|
if (Type *ByRefTy = ParamAttrs.getByRefType())
|
|
|
|
return ByRefTy;
|
2020-06-26 17:36:22 +02:00
|
|
|
if (Type *PreAllocTy = ParamAttrs.getPreallocatedType())
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
return PreAllocTy;
|
2021-03-29 14:42:23 +02:00
|
|
|
if (Type *InAllocaTy = ParamAttrs.getInAllocaType())
|
|
|
|
return InAllocaTy;
|
2021-06-12 00:59:20 +02:00
|
|
|
if (Type *SRetTy = ParamAttrs.getStructRetType())
|
|
|
|
return SRetTy;
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
2020-06-26 17:36:22 +02:00
|
|
|
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
uint64_t Argument::getPassPointeeByValueCopySize(const DataLayout &DL) const {
|
|
|
|
AttributeSet ParamAttrs =
|
|
|
|
getParent()->getAttributes().getParamAttributes(getArgNo());
|
|
|
|
if (Type *MemTy = getMemoryParamAllocType(ParamAttrs, getType()))
|
|
|
|
return DL.getTypeAllocSize(MemTy);
|
2020-06-26 17:36:22 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
Type *Argument::getPointeeInMemoryValueType() const {
|
|
|
|
AttributeSet ParamAttrs =
|
|
|
|
getParent()->getAttributes().getParamAttributes(getArgNo());
|
|
|
|
return getMemoryParamAllocType(ParamAttrs, getType());
|
|
|
|
}
|
|
|
|
|
2011-05-23 01:57:23 +02:00
|
|
|
unsigned Argument::getParamAlignment() const {
|
|
|
|
assert(getType()->isPointerTy() && "Only pointers have alignments");
|
2017-04-28 22:34:27 +02:00
|
|
|
return getParent()->getParamAlignment(getArgNo());
|
2011-05-23 01:57:23 +02:00
|
|
|
}
|
|
|
|
|
2019-12-16 15:24:13 +01:00
|
|
|
MaybeAlign Argument::getParamAlign() const {
|
|
|
|
assert(getType()->isPointerTy() && "Only pointers have alignments");
|
|
|
|
return getParent()->getParamAlign(getArgNo());
|
|
|
|
}
|
|
|
|
|
[clang][AArch64] Correctly align HFA arguments when passed on the stack
When we pass a AArch64 Homogeneous Floating-Point
Aggregate (HFA) argument with increased alignment
requirements, for example
struct S {
__attribute__ ((__aligned__(16))) double v[4];
};
Clang uses `[4 x double]` for the parameter, which is passed
on the stack at alignment 8, whereas it should be at
alignment 16, following Rule C.4 in
AAPCS (https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#642parameter-passing-rules)
Currently we don't have a way to express in LLVM IR the
alignment requirements of the function arguments. The align
attribute is applicable to pointers only, and only for some
special ways of passing arguments (e..g byval). When
implementing AAPCS32/AAPCS64, clang resorts to dubious hacks
of coercing to types, which naturally have the needed
alignment. We don't have enough types to cover all the
cases, though.
This patch introduces a new use of the stackalign attribute
to control stack slot alignment, when and if an argument is
passed in memory.
The attribute align is left as an optimizer hint - it still
applies to pointer types only and pertains to the content of
the pointer, whereas the alignment of the pointer itself is
determined by the stackalign attribute.
For byval arguments, the stackalign attribute assumes the
role, previously perfomed by align, falling back to align if
stackalign` is absent.
On the clang side, when passing arguments using the "direct"
style (cf. `ABIArgInfo::Kind`), now we can optionally
specify an alignment, which is emitted as the new
`stackalign` attribute.
Patch by Momchil Velikov and Lucas Prates.
Differential Revision: https://reviews.llvm.org/D98794
2021-04-15 20:58:54 +02:00
|
|
|
MaybeAlign Argument::getParamStackAlign() const {
|
|
|
|
return getParent()->getParamStackAlign(getArgNo());
|
|
|
|
}
|
|
|
|
|
2019-05-30 20:48:23 +02:00
|
|
|
Type *Argument::getParamByValType() const {
|
|
|
|
assert(getType()->isPointerTy() && "Only pointers have byval types");
|
|
|
|
return getParent()->getParamByValType(getArgNo());
|
|
|
|
}
|
|
|
|
|
2020-09-23 17:03:38 +02:00
|
|
|
Type *Argument::getParamStructRetType() const {
|
|
|
|
assert(getType()->isPointerTy() && "Only pointers have sret types");
|
|
|
|
return getParent()->getParamStructRetType(getArgNo());
|
|
|
|
}
|
|
|
|
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
Type *Argument::getParamByRefType() const {
|
2021-03-07 15:59:11 +01:00
|
|
|
assert(getType()->isPointerTy() && "Only pointers have byref types");
|
IR: Define byref parameter attribute
This allows tracking the in-memory type of a pointer argument to a
function for ABI purposes. This is essentially a stripped down version
of byval to remove some of the stack-copy implications in its
definition.
This includes the base IR changes, and some tests for places where it
should be treated similarly to byval. Codegen support will be in a
future patch.
My original attempt at solving some of these problems was to repurpose
byval with a different address space from the stack. However, it is
technically permitted for the callee to introduce a write to the
argument, although nothing does this in reality. There is also talk of
removing and replacing the byval attribute, so a new attribute would
need to take its place anyway.
This is intended avoid some optimization issues with the current
handling of aggregate arguments, as well as fixes inflexibilty in how
frontends can specify the kernel ABI. The most honest representation
of the amdgpu_kernel convention is to expose all kernel arguments as
loads from constant memory. Today, these are raw, SSA Argument values
and codegen is responsible for turning these into loads.
Background:
There currently isn't a satisfactory way to represent how arguments
for the amdgpu_kernel calling convention are passed. In reality,
arguments are passed in a single, flat, constant memory buffer
implicitly passed to the function. It is also illegal to call this
function in the IR, and this is only ever invoked by a driver of some
kind.
It does not make sense to have a stack passed parameter in this
context as is implied by byval. It is never valid to write to the
kernel arguments, as this would corrupt the inputs seen by other
dispatches of the kernel. These argumets are also not in the same
address space as the stack, so a copy is needed to an alloca. From a
source C-like language, the kernel parameters are invisible.
Semantically, a copy is always required from the constant argument
memory to a mutable variable.
The current clang calling convention lowering emits raw values,
including aggregates into the function argument list, since using
byval would not make sense. This has some unfortunate consequences for
the optimizer. In the aggregate case, we end up with an aggregate
store to alloca, which both SROA and instcombine turn into a store of
each aggregate field. The optimizer never pieces this back together to
see that this is really just a copy from constant memory, so we end up
stuck with expensive stack usage.
This also means the backend dictates the alignment of arguments, and
arbitrarily picks the LLVM IR ABI type alignment. By allowing an
explicit alignment, frontends can make better decisions. For example,
there's real no advantage to an aligment higher than 4, so a frontend
could choose to compact the argument layout. Similarly, there is a
high penalty to using an alignment lower than 4, so a frontend could
opt into more padding for small arguments.
Another design consideration is when it is appropriate to expose the
fact that these arguments are all really passed in adjacent
memory. Currently we have a late IR optimization pass in codegen to
rewrite the kernel argument values into explicit loads to enable
vectorization. In most programs, unrelated argument loads can be
merged together. However, exposing this property directly from the
frontend has some disadvantages. We still need a way to track the
original argument sizes and alignments to report to the driver. I find
using some side-channel, metadata mechanism to track this
unappealing. If the kernel arguments were exposed as a single buffer
to begin with, alias analysis would be unaware that the padding bits
betewen arguments are meaningless. Another family of problems is there
are still some gaps in replacing all of the available parameter
attributes with metadata equivalents once lowered to loads.
The immediate plan is to start using this new attribute to handle all
aggregate argumets for kernels. Long term, it makes sense to migrate
all kernel arguments, including scalars, to be passed indirectly in
the same manner.
Additional context is in D79744.
2020-06-05 22:58:47 +02:00
|
|
|
return getParent()->getParamByRefType(getArgNo());
|
|
|
|
}
|
|
|
|
|
2021-03-29 14:42:23 +02:00
|
|
|
Type *Argument::getParamInAllocaType() const {
|
|
|
|
assert(getType()->isPointerTy() && "Only pointers have inalloca types");
|
|
|
|
return getParent()->getParamInAllocaType(getArgNo());
|
|
|
|
}
|
|
|
|
|
2014-07-18 17:51:28 +02:00
|
|
|
uint64_t Argument::getDereferenceableBytes() const {
|
|
|
|
assert(getType()->isPointerTy() &&
|
|
|
|
"Only pointers have dereferenceable bytes");
|
2017-05-31 21:23:09 +02:00
|
|
|
return getParent()->getParamDereferenceableBytes(getArgNo());
|
2014-07-18 17:51:28 +02:00
|
|
|
}
|
|
|
|
|
2015-05-06 19:41:54 +02:00
|
|
|
uint64_t Argument::getDereferenceableOrNullBytes() const {
|
|
|
|
assert(getType()->isPointerTy() &&
|
|
|
|
"Only pointers have dereferenceable bytes");
|
2017-05-31 21:23:09 +02:00
|
|
|
return getParent()->getParamDereferenceableOrNullBytes(getArgNo());
|
2015-05-06 19:41:54 +02:00
|
|
|
}
|
|
|
|
|
2009-12-11 09:36:17 +01:00
|
|
|
bool Argument::hasNestAttr() const {
|
2010-02-16 12:11:14 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::Nest);
|
2009-12-11 09:36:17 +01:00
|
|
|
}
|
|
|
|
|
2008-01-24 18:47:11 +01:00
|
|
|
bool Argument::hasNoAliasAttr() const {
|
2010-02-16 12:11:14 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::NoAlias);
|
2008-01-24 18:47:11 +01:00
|
|
|
}
|
|
|
|
|
2008-12-31 19:08:59 +01:00
|
|
|
bool Argument::hasNoCaptureAttr() const {
|
2010-02-16 12:11:14 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::NoCapture);
|
2008-12-31 19:08:59 +01:00
|
|
|
}
|
|
|
|
|
2021-03-19 01:33:12 +01:00
|
|
|
bool Argument::hasNoFreeAttr() const {
|
|
|
|
if (!getType()->isPointerTy()) return false;
|
|
|
|
return hasAttribute(Attribute::NoFree);
|
|
|
|
}
|
|
|
|
|
2008-02-18 00:22:28 +01:00
|
|
|
bool Argument::hasStructRetAttr() const {
|
2010-02-16 12:11:14 +01:00
|
|
|
if (!getType()->isPointerTy()) return false;
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::StructRet);
|
2008-02-18 00:22:28 +01:00
|
|
|
}
|
|
|
|
|
2019-05-03 23:12:36 +02:00
|
|
|
bool Argument::hasInRegAttr() const {
|
|
|
|
return hasAttribute(Attribute::InReg);
|
|
|
|
}
|
|
|
|
|
2013-04-20 07:14:40 +02:00
|
|
|
bool Argument::hasReturnedAttr() const {
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::Returned);
|
2013-04-20 07:14:40 +02:00
|
|
|
}
|
|
|
|
|
2014-08-05 07:43:41 +02:00
|
|
|
bool Argument::hasZExtAttr() const {
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::ZExt);
|
2014-08-05 07:43:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool Argument::hasSExtAttr() const {
|
2016-03-14 02:37:29 +01:00
|
|
|
return hasAttribute(Attribute::SExt);
|
2014-08-05 07:43:41 +02:00
|
|
|
}
|
|
|
|
|
2013-07-06 02:29:58 +02:00
|
|
|
bool Argument::onlyReadsMemory() const {
|
2017-04-14 01:12:13 +02:00
|
|
|
AttributeList Attrs = getParent()->getAttributes();
|
|
|
|
return Attrs.hasParamAttribute(getArgNo(), Attribute::ReadOnly) ||
|
|
|
|
Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone);
|
2013-07-06 02:29:58 +02:00
|
|
|
}
|
|
|
|
|
2017-04-19 19:28:52 +02:00
|
|
|
void Argument::addAttrs(AttrBuilder &B) {
|
|
|
|
AttributeList AL = getParent()->getAttributes();
|
2017-05-31 21:23:09 +02:00
|
|
|
AL = AL.addParamAttributes(Parent->getContext(), getArgNo(), B);
|
2017-04-19 19:28:52 +02:00
|
|
|
getParent()->setAttributes(AL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Argument::addAttr(Attribute::AttrKind Kind) {
|
2017-05-31 21:23:09 +02:00
|
|
|
getParent()->addParamAttr(getArgNo(), Kind);
|
2017-04-19 19:28:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void Argument::addAttr(Attribute Attr) {
|
2017-05-31 21:23:09 +02:00
|
|
|
getParent()->addParamAttr(getArgNo(), Attr);
|
2008-04-28 19:37:06 +02:00
|
|
|
}
|
2008-07-08 11:41:30 +02:00
|
|
|
|
2017-04-19 19:28:52 +02:00
|
|
|
void Argument::removeAttr(Attribute::AttrKind Kind) {
|
2017-05-31 21:23:09 +02:00
|
|
|
getParent()->removeParamAttr(getArgNo(), Kind);
|
2017-04-19 19:28:52 +02:00
|
|
|
}
|
|
|
|
|
2021-05-20 18:53:55 +02:00
|
|
|
void Argument::removeAttrs(const AttrBuilder &B) {
|
|
|
|
AttributeList AL = getParent()->getAttributes();
|
|
|
|
AL = AL.removeParamAttributes(Parent->getContext(), getArgNo(), B);
|
|
|
|
getParent()->setAttributes(AL);
|
|
|
|
}
|
|
|
|
|
2016-03-14 02:37:29 +01:00
|
|
|
bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
|
2017-04-14 01:12:13 +02:00
|
|
|
return getParent()->hasParamAttribute(getArgNo(), Kind);
|
2016-03-14 02:37:29 +01:00
|
|
|
}
|
|
|
|
|
2019-06-05 22:37:47 +02:00
|
|
|
Attribute Argument::getAttribute(Attribute::AttrKind Kind) const {
|
|
|
|
return getParent()->getParamAttribute(getArgNo(), Kind);
|
|
|
|
}
|
|
|
|
|
2007-04-09 17:01:12 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
2008-01-03 00:42:30 +01:00
|
|
|
// Helper Methods in Function
|
2007-04-09 17:01:12 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2009-07-22 02:24:57 +02:00
|
|
|
LLVMContext &Function::getContext() const {
|
|
|
|
return getType()->getContext();
|
2009-07-02 20:03:58 +02:00
|
|
|
}
|
|
|
|
|
2018-10-18 21:49:44 +02:00
|
|
|
unsigned Function::getInstructionCount() const {
|
2018-05-18 19:26:39 +02:00
|
|
|
unsigned NumInstrs = 0;
|
2018-10-18 21:49:44 +02:00
|
|
|
for (const BasicBlock &BB : BasicBlocks)
|
2018-05-18 19:26:39 +02:00
|
|
|
NumInstrs += std::distance(BB.instructionsWithoutDebug().begin(),
|
|
|
|
BB.instructionsWithoutDebug().end());
|
|
|
|
return NumInstrs;
|
|
|
|
}
|
|
|
|
|
2018-08-23 11:25:17 +02:00
|
|
|
Function *Function::Create(FunctionType *Ty, LinkageTypes Linkage,
|
|
|
|
const Twine &N, Module &M) {
|
|
|
|
return Create(Ty, Linkage, M.getDataLayout().getProgramAddressSpace(), N, &M);
|
|
|
|
}
|
|
|
|
|
[IR][sanitizer] Set nounwind on module ctor/dtor, additionally set uwtable if -fasynchronous-unwind-tables
On ELF targets, if a function has uwtable or personality, or does not have
nounwind (`needsUnwindTableEntry`), it marks that `.eh_frame` is needed in the module.
Then, a function gets `.eh_frame` if `needsUnwindTableEntry` or `-g[123]` is specified.
(i.e. If -g[123], every function gets `.eh_frame`.
This behavior is strange but that is the status quo on GCC and Clang.)
Let's take asan as an example. Other sanitizers are similar.
`asan.module_[cd]tor` has no attribute. `needsUnwindTableEntry` returns true,
so every function gets `.eh_frame` if `-g[123]` is specified.
This is the root cause that
`-fno-exceptions -fno-asynchronous-unwind-tables -g` produces .debug_frame
while
`-fno-exceptions -fno-asynchronous-unwind-tables -g -fsanitize=address` produces .eh_frame.
This patch
* sets the nounwind attribute on sanitizer module ctor/dtor.
* let Clang emit a module flag metadata "uwtable" for -fasynchronous-unwind-tables. If "uwtable" is set, sanitizer module ctor/dtor additionally get the uwtable attribute.
The "uwtable" mechanism is generic: synthesized functions not cloned/specialized
from existing ones should consider `Function::createWithDefaultAttr` instead of
`Function::create` if they want to get some default attributes which
have more of module semantics.
Other candidates: "frame-pointer" (https://github.com/ClangBuiltLinux/linux/issues/955
https://github.com/ClangBuiltLinux/linux/issues/1238), dso_local, etc.
Differential Revision: https://reviews.llvm.org/D100251
2021-04-22 00:58:20 +02:00
|
|
|
Function *Function::createWithDefaultAttr(FunctionType *Ty,
|
|
|
|
LinkageTypes Linkage,
|
|
|
|
unsigned AddrSpace, const Twine &N,
|
|
|
|
Module *M) {
|
|
|
|
auto *F = new Function(Ty, Linkage, AddrSpace, N, M);
|
2021-04-23 03:07:29 +02:00
|
|
|
AttrBuilder B;
|
[IR][sanitizer] Set nounwind on module ctor/dtor, additionally set uwtable if -fasynchronous-unwind-tables
On ELF targets, if a function has uwtable or personality, or does not have
nounwind (`needsUnwindTableEntry`), it marks that `.eh_frame` is needed in the module.
Then, a function gets `.eh_frame` if `needsUnwindTableEntry` or `-g[123]` is specified.
(i.e. If -g[123], every function gets `.eh_frame`.
This behavior is strange but that is the status quo on GCC and Clang.)
Let's take asan as an example. Other sanitizers are similar.
`asan.module_[cd]tor` has no attribute. `needsUnwindTableEntry` returns true,
so every function gets `.eh_frame` if `-g[123]` is specified.
This is the root cause that
`-fno-exceptions -fno-asynchronous-unwind-tables -g` produces .debug_frame
while
`-fno-exceptions -fno-asynchronous-unwind-tables -g -fsanitize=address` produces .eh_frame.
This patch
* sets the nounwind attribute on sanitizer module ctor/dtor.
* let Clang emit a module flag metadata "uwtable" for -fasynchronous-unwind-tables. If "uwtable" is set, sanitizer module ctor/dtor additionally get the uwtable attribute.
The "uwtable" mechanism is generic: synthesized functions not cloned/specialized
from existing ones should consider `Function::createWithDefaultAttr` instead of
`Function::create` if they want to get some default attributes which
have more of module semantics.
Other candidates: "frame-pointer" (https://github.com/ClangBuiltLinux/linux/issues/955
https://github.com/ClangBuiltLinux/linux/issues/1238), dso_local, etc.
Differential Revision: https://reviews.llvm.org/D100251
2021-04-22 00:58:20 +02:00
|
|
|
if (M->getUwtable())
|
2021-04-23 03:07:29 +02:00
|
|
|
B.addAttribute(Attribute::UWTable);
|
|
|
|
switch (M->getFramePointer()) {
|
|
|
|
case FramePointerKind::None:
|
|
|
|
// 0 ("none") is the default.
|
|
|
|
break;
|
|
|
|
case FramePointerKind::NonLeaf:
|
|
|
|
B.addAttribute("frame-pointer", "non-leaf");
|
|
|
|
break;
|
|
|
|
case FramePointerKind::All:
|
|
|
|
B.addAttribute("frame-pointer", "all");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
F->addAttributes(AttributeList::FunctionIndex, B);
|
[IR][sanitizer] Set nounwind on module ctor/dtor, additionally set uwtable if -fasynchronous-unwind-tables
On ELF targets, if a function has uwtable or personality, or does not have
nounwind (`needsUnwindTableEntry`), it marks that `.eh_frame` is needed in the module.
Then, a function gets `.eh_frame` if `needsUnwindTableEntry` or `-g[123]` is specified.
(i.e. If -g[123], every function gets `.eh_frame`.
This behavior is strange but that is the status quo on GCC and Clang.)
Let's take asan as an example. Other sanitizers are similar.
`asan.module_[cd]tor` has no attribute. `needsUnwindTableEntry` returns true,
so every function gets `.eh_frame` if `-g[123]` is specified.
This is the root cause that
`-fno-exceptions -fno-asynchronous-unwind-tables -g` produces .debug_frame
while
`-fno-exceptions -fno-asynchronous-unwind-tables -g -fsanitize=address` produces .eh_frame.
This patch
* sets the nounwind attribute on sanitizer module ctor/dtor.
* let Clang emit a module flag metadata "uwtable" for -fasynchronous-unwind-tables. If "uwtable" is set, sanitizer module ctor/dtor additionally get the uwtable attribute.
The "uwtable" mechanism is generic: synthesized functions not cloned/specialized
from existing ones should consider `Function::createWithDefaultAttr` instead of
`Function::create` if they want to get some default attributes which
have more of module semantics.
Other candidates: "frame-pointer" (https://github.com/ClangBuiltLinux/linux/issues/955
https://github.com/ClangBuiltLinux/linux/issues/1238), dso_local, etc.
Differential Revision: https://reviews.llvm.org/D100251
2021-04-22 00:58:20 +02:00
|
|
|
return F;
|
|
|
|
}
|
|
|
|
|
2008-01-03 00:42:30 +01:00
|
|
|
void Function::removeFromParent() {
|
2015-10-09 01:49:46 +02:00
|
|
|
getParent()->getFunctionList().remove(getIterator());
|
2007-11-25 15:10:56 +01:00
|
|
|
}
|
|
|
|
|
2008-01-03 00:42:30 +01:00
|
|
|
void Function::eraseFromParent() {
|
2015-10-09 01:49:46 +02:00
|
|
|
getParent()->getFunctionList().erase(getIterator());
|
2007-04-09 17:01:12 +02:00
|
|
|
}
|
|
|
|
|
2001-09-10 09:58:01 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
2002-03-26 19:01:55 +01:00
|
|
|
// Function Implementation
|
2001-09-10 09:58:01 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-08-23 11:25:17 +02:00
|
|
|
static unsigned computeAddrSpace(unsigned AddrSpace, Module *M) {
|
|
|
|
// If AS == -1 and we are passed a valid module pointer we place the function
|
|
|
|
// in the program address space. Otherwise we default to AS0.
|
|
|
|
if (AddrSpace == static_cast<unsigned>(-1))
|
|
|
|
return M ? M->getDataLayout().getProgramAddressSpace() : 0;
|
|
|
|
return AddrSpace;
|
|
|
|
}
|
|
|
|
|
|
|
|
Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
|
|
|
|
const Twine &name, Module *ParentModule)
|
2015-08-21 23:35:28 +02:00
|
|
|
: GlobalObject(Ty, Value::FunctionVal,
|
2018-08-23 11:25:17 +02:00
|
|
|
OperandTraits<Function>::op_begin(this), 0, Linkage, name,
|
|
|
|
computeAddrSpace(AddrSpace, ParentModule)),
|
2017-05-11 01:41:30 +02:00
|
|
|
NumArgs(Ty->getNumParams()) {
|
2009-01-05 08:58:59 +01:00
|
|
|
assert(FunctionType::isValidReturnType(getReturnType()) &&
|
Land the long talked about "type system rewrite" patch. This
patch brings numerous advantages to LLVM. One way to look at it
is through diffstat:
109 files changed, 3005 insertions(+), 5906 deletions(-)
Removing almost 3K lines of code is a good thing. Other advantages
include:
1. Value::getType() is a simple load that can be CSE'd, not a mutating
union-find operation.
2. Types a uniqued and never move once created, defining away PATypeHolder.
3. Structs can be "named" now, and their name is part of the identity that
uniques them. This means that the compiler doesn't merge them structurally
which makes the IR much less confusing.
4. Now that there is no way to get a cycle in a type graph without a named
struct type, "upreferences" go away.
5. Type refinement is completely gone, which should make LTO much MUCH faster
in some common cases with C++ code.
6. Types are now generally immutable, so we can use "Type *" instead
"const Type *" everywhere.
Downsides of this patch are that it removes some functions from the C API,
so people using those will have to upgrade to (not yet added) new API.
"LLVM 3.0" is the right time to do this.
There are still some cleanups pending after this, this patch is large enough
as-is.
llvm-svn: 134829
2011-07-09 19:41:24 +02:00
|
|
|
"invalid return type");
|
2015-04-24 22:47:23 +02:00
|
|
|
setGlobalObjectSubClassData(0);
|
2016-09-17 08:00:02 +02:00
|
|
|
|
|
|
|
// We only need a symbol table for a function if the context keeps value names
|
|
|
|
if (!getContext().shouldDiscardValueNames())
|
2021-05-27 06:01:20 +02:00
|
|
|
SymTab = std::make_unique<ValueSymbolTable>(NonGlobalValueMaxNameSize);
|
2002-09-06 22:46:32 +02:00
|
|
|
|
2007-08-18 08:14:52 +02:00
|
|
|
// If the function has arguments, mark them as lazily built.
|
|
|
|
if (Ty->getNumParams())
|
2009-12-29 03:14:09 +01:00
|
|
|
setValueSubclassData(1); // Set the "has lazy arguments" bit.
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2002-09-06 22:46:32 +02:00
|
|
|
if (ParentModule)
|
|
|
|
ParentModule->getFunctionList().push_back(this);
|
2008-04-07 15:39:11 +02:00
|
|
|
|
2016-12-28 23:59:45 +01:00
|
|
|
HasLLVMReservedName = getName().startswith("llvm.");
|
2008-04-07 15:39:11 +02:00
|
|
|
// Ensure intrinsics have the right parameter attributes.
|
2015-05-19 02:24:26 +02:00
|
|
|
// Note, the IntID field will have been set in Value::setName if this function
|
|
|
|
// name is a valid intrinsic ID.
|
|
|
|
if (IntID)
|
|
|
|
setAttributes(Intrinsic::getAttributes(getContext(), IntID));
|
2001-06-06 22:29:01 +02:00
|
|
|
}
|
|
|
|
|
2007-12-10 03:14:30 +01:00
|
|
|
Function::~Function() {
|
|
|
|
dropAllReferences(); // After this it is safe to delete instructions.
|
2001-06-06 22:29:01 +02:00
|
|
|
|
|
|
|
// Delete all of the method arguments and unlink from symbol table...
|
2017-03-17 18:16:39 +01:00
|
|
|
if (Arguments)
|
|
|
|
clearArguments();
|
2007-04-22 19:28:03 +02:00
|
|
|
|
2008-08-17 20:44:35 +02:00
|
|
|
// Remove the function from the on-the-side GC table.
|
|
|
|
clearGC();
|
2001-06-06 22:29:01 +02:00
|
|
|
}
|
|
|
|
|
2007-08-18 08:14:52 +02:00
|
|
|
void Function::BuildLazyArguments() const {
|
|
|
|
// Create the arguments vector, all arguments start out unnamed.
|
2017-03-17 18:16:39 +01:00
|
|
|
auto *FT = getFunctionType();
|
|
|
|
if (NumArgs > 0) {
|
|
|
|
Arguments = std::allocator<Argument>().allocate(NumArgs);
|
|
|
|
for (unsigned i = 0, e = NumArgs; i != e; ++i) {
|
|
|
|
Type *ArgTy = FT->getParamType(i);
|
|
|
|
assert(!ArgTy->isVoidTy() && "Cannot have void typed arguments!");
|
|
|
|
new (Arguments + i) Argument(ArgTy, "", const_cast<Function *>(this), i);
|
|
|
|
}
|
2007-08-18 08:14:52 +02:00
|
|
|
}
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2007-08-18 08:14:52 +02:00
|
|
|
// Clear the lazy arguments bit.
|
2009-12-29 03:14:09 +01:00
|
|
|
unsigned SDC = getSubclassDataFromValue();
|
2019-09-23 14:49:39 +02:00
|
|
|
SDC &= ~(1 << 0);
|
|
|
|
const_cast<Function*>(this)->setValueSubclassData(SDC);
|
2017-03-17 18:16:39 +01:00
|
|
|
assert(!hasLazyArguments());
|
|
|
|
}
|
|
|
|
|
|
|
|
static MutableArrayRef<Argument> makeArgArray(Argument *Args, size_t Count) {
|
|
|
|
return MutableArrayRef<Argument>(Args, Count);
|
|
|
|
}
|
|
|
|
|
2020-04-10 18:35:42 +02:00
|
|
|
bool Function::isConstrainedFPIntrinsic() const {
|
|
|
|
switch (getIntrinsicID()) {
|
|
|
|
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
|
|
|
|
case Intrinsic::INTRINSIC:
|
|
|
|
#include "llvm/IR/ConstrainedOps.def"
|
|
|
|
return true;
|
|
|
|
#undef INSTRUCTION
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-17 18:16:39 +01:00
|
|
|
void Function::clearArguments() {
|
|
|
|
for (Argument &A : makeArgArray(Arguments, NumArgs)) {
|
|
|
|
A.setName("");
|
|
|
|
A.~Argument();
|
|
|
|
}
|
|
|
|
std::allocator<Argument>().deallocate(Arguments, NumArgs);
|
|
|
|
Arguments = nullptr;
|
2007-08-18 08:14:52 +02:00
|
|
|
}
|
|
|
|
|
2016-04-06 08:38:15 +02:00
|
|
|
void Function::stealArgumentListFrom(Function &Src) {
|
|
|
|
assert(isDeclaration() && "Expected no references to current arguments");
|
|
|
|
|
|
|
|
// Drop the current arguments, if any, and set the lazy argument bit.
|
|
|
|
if (!hasLazyArguments()) {
|
2017-03-17 18:16:39 +01:00
|
|
|
assert(llvm::all_of(makeArgArray(Arguments, NumArgs),
|
2016-04-06 08:38:15 +02:00
|
|
|
[](const Argument &A) { return A.use_empty(); }) &&
|
|
|
|
"Expected arguments to be unused in declaration");
|
2017-03-17 18:16:39 +01:00
|
|
|
clearArguments();
|
2016-04-06 08:38:15 +02:00
|
|
|
setValueSubclassData(getSubclassDataFromValue() | (1 << 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Nothing to steal if Src has lazy arguments.
|
|
|
|
if (Src.hasLazyArguments())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Steal arguments from Src, and fix the lazy argument bits.
|
2017-03-17 18:16:39 +01:00
|
|
|
assert(arg_size() == Src.arg_size());
|
|
|
|
Arguments = Src.Arguments;
|
|
|
|
Src.Arguments = nullptr;
|
|
|
|
for (Argument &A : makeArgArray(Arguments, NumArgs)) {
|
|
|
|
// FIXME: This does the work of transferNodesFromList inefficiently.
|
|
|
|
SmallString<128> Name;
|
|
|
|
if (A.hasName())
|
|
|
|
Name = A.getName();
|
|
|
|
if (!Name.empty())
|
|
|
|
A.setName("");
|
|
|
|
A.setParent(this);
|
|
|
|
if (!Name.empty())
|
|
|
|
A.setName(Name);
|
|
|
|
}
|
|
|
|
|
2016-04-06 08:38:15 +02:00
|
|
|
setValueSubclassData(getSubclassDataFromValue() & ~(1 << 0));
|
2017-03-17 18:16:39 +01:00
|
|
|
assert(!hasLazyArguments());
|
2016-04-06 08:38:15 +02:00
|
|
|
Src.setValueSubclassData(Src.getSubclassDataFromValue() | (1 << 0));
|
|
|
|
}
|
|
|
|
|
2001-06-06 22:29:01 +02:00
|
|
|
// dropAllReferences() - This function causes all the subinstructions to "let
|
|
|
|
// go" of all references that they are maintaining. This allows one to
|
|
|
|
// 'delete' a whole class at a time, even though there may be circular
|
|
|
|
// references... first all references are dropped, and all use counts go to
|
2003-10-10 19:54:14 +02:00
|
|
|
// zero. Then everything is deleted for real. Note that no operations are
|
2005-04-22 01:48:37 +02:00
|
|
|
// valid on an object that has "dropped all references", except operator
|
2001-06-06 22:29:01 +02:00
|
|
|
// delete.
|
|
|
|
//
|
2002-03-23 23:51:58 +01:00
|
|
|
void Function::dropAllReferences() {
|
2014-10-24 20:13:04 +02:00
|
|
|
setIsMaterializable(false);
|
|
|
|
|
2016-06-26 16:10:56 +02:00
|
|
|
for (BasicBlock &BB : *this)
|
|
|
|
BB.dropAllReferences();
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2010-12-07 20:56:51 +01:00
|
|
|
// Delete all basic blocks. They are now unused, except possibly by
|
|
|
|
// blockaddresses, but BasicBlock's destructor takes care of those.
|
|
|
|
while (!BasicBlocks.empty())
|
|
|
|
BasicBlocks.begin()->eraseFromParent();
|
2013-09-16 03:08:15 +02:00
|
|
|
|
2015-12-19 09:52:49 +01:00
|
|
|
// Drop uses of any optional data (real or placeholder).
|
|
|
|
if (getNumOperands()) {
|
|
|
|
User::dropAllReferences();
|
|
|
|
setNumHungOffUseOperands(0);
|
|
|
|
setValueSubclassData(getSubclassDataFromValue() & ~0xe);
|
|
|
|
}
|
2015-04-24 23:51:02 +02:00
|
|
|
|
|
|
|
// Metadata is stored in a side-table.
|
|
|
|
clearMetadata();
|
2001-06-06 22:29:01 +02:00
|
|
|
}
|
2001-09-10 09:58:01 +02:00
|
|
|
|
2016-06-14 22:27:35 +02:00
|
|
|
void Function::addAttribute(unsigned i, Attribute::AttrKind Kind) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-14 22:27:35 +02:00
|
|
|
PAL = PAL.addAttribute(getContext(), i, Kind);
|
2013-01-23 01:20:53 +01:00
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2016-06-12 08:17:24 +02:00
|
|
|
void Function::addAttribute(unsigned i, Attribute Attr) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-12 08:17:24 +02:00
|
|
|
PAL = PAL.addAttribute(getContext(), i, Attr);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2017-05-03 00:07:37 +02:00
|
|
|
void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-14 22:27:35 +02:00
|
|
|
PAL = PAL.addAttributes(getContext(), i, Attrs);
|
2008-09-25 23:00:45 +02:00
|
|
|
setAttributes(PAL);
|
2008-04-08 09:23:58 +02:00
|
|
|
}
|
|
|
|
|
2017-05-31 21:23:09 +02:00
|
|
|
void Function::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::addParamAttr(unsigned ArgNo, Attribute Attr) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.addParamAttributes(getContext(), ArgNo, Attrs);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2016-06-14 22:27:35 +02:00
|
|
|
void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-14 22:27:35 +02:00
|
|
|
PAL = PAL.removeAttribute(getContext(), i, Kind);
|
2016-03-14 02:37:29 +01:00
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2016-06-15 19:50:39 +02:00
|
|
|
void Function::removeAttribute(unsigned i, StringRef Kind) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-15 19:50:39 +02:00
|
|
|
PAL = PAL.removeAttribute(getContext(), i, Kind);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2017-05-03 00:07:37 +02:00
|
|
|
void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2016-06-14 22:27:35 +02:00
|
|
|
PAL = PAL.removeAttributes(getContext(), i, Attrs);
|
2008-09-25 23:00:45 +02:00
|
|
|
setAttributes(PAL);
|
2008-05-16 22:39:43 +02:00
|
|
|
}
|
|
|
|
|
2017-05-31 21:23:09 +02:00
|
|
|
void Function::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2015-02-14 20:37:54 +01:00
|
|
|
void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2015-02-14 20:37:54 +01:00
|
|
|
PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2017-05-31 21:23:09 +02:00
|
|
|
void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.addDereferenceableParamAttr(getContext(), ArgNo, Bytes);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2015-04-16 22:29:50 +02:00
|
|
|
void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-21 17:57:19 +01:00
|
|
|
AttributeList PAL = getAttributes();
|
2015-04-16 22:29:50 +02:00
|
|
|
PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2017-05-31 21:23:09 +02:00
|
|
|
void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
|
|
|
|
uint64_t Bytes) {
|
|
|
|
AttributeList PAL = getAttributes();
|
|
|
|
PAL = PAL.addDereferenceableOrNullParamAttr(getContext(), ArgNo, Bytes);
|
|
|
|
setAttributes(PAL);
|
|
|
|
}
|
|
|
|
|
2020-09-17 23:50:42 +02:00
|
|
|
DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
|
|
|
|
if (&FPType == &APFloat::IEEEsingle()) {
|
|
|
|
Attribute Attr = getFnAttribute("denormal-fp-math-f32");
|
|
|
|
StringRef Val = Attr.getValueAsString();
|
|
|
|
if (!Val.empty())
|
|
|
|
return parseDenormalFPAttribute(Val);
|
|
|
|
|
|
|
|
// If the f32 variant of the attribute isn't specified, try to use the
|
|
|
|
// generic one.
|
|
|
|
}
|
|
|
|
|
|
|
|
Attribute Attr = getFnAttribute("denormal-fp-math");
|
|
|
|
return parseDenormalFPAttribute(Attr.getValueAsString());
|
|
|
|
}
|
|
|
|
|
2016-01-08 03:28:20 +01:00
|
|
|
const std::string &Function::getGC() const {
|
2008-08-17 20:44:35 +02:00
|
|
|
assert(hasGC() && "Function has no collector");
|
2016-01-08 03:28:20 +01:00
|
|
|
return getContext().getGC(*this);
|
2007-12-10 04:18:06 +01:00
|
|
|
}
|
|
|
|
|
2016-05-29 12:46:35 +02:00
|
|
|
void Function::setGC(std::string Str) {
|
2016-01-08 03:28:20 +01:00
|
|
|
setValueSubclassDataBit(14, !Str.empty());
|
|
|
|
getContext().setGC(*this, std::move(Str));
|
2007-12-10 04:18:06 +01:00
|
|
|
}
|
|
|
|
|
2008-08-17 20:44:35 +02:00
|
|
|
void Function::clearGC() {
|
2016-01-08 03:28:20 +01:00
|
|
|
if (!hasGC())
|
|
|
|
return;
|
|
|
|
getContext().deleteGC(*this);
|
|
|
|
setValueSubclassDataBit(14, false);
|
2007-12-10 04:18:06 +01:00
|
|
|
}
|
|
|
|
|
2020-12-02 19:44:35 +01:00
|
|
|
bool Function::hasStackProtectorFnAttr() const {
|
|
|
|
return hasFnAttribute(Attribute::StackProtect) ||
|
|
|
|
hasFnAttribute(Attribute::StackProtectStrong) ||
|
|
|
|
hasFnAttribute(Attribute::StackProtectReq);
|
|
|
|
}
|
|
|
|
|
2015-12-02 21:03:17 +01:00
|
|
|
/// Copy all additional attributes (those not needed to create a Function) from
|
|
|
|
/// the Function Src to this one.
|
2017-05-11 23:14:29 +02:00
|
|
|
void Function::copyAttributesFrom(const Function *Src) {
|
2014-05-13 20:45:48 +02:00
|
|
|
GlobalObject::copyAttributesFrom(Src);
|
2017-05-11 23:14:29 +02:00
|
|
|
setCallingConv(Src->getCallingConv());
|
|
|
|
setAttributes(Src->getAttributes());
|
|
|
|
if (Src->hasGC())
|
|
|
|
setGC(Src->getGC());
|
2008-08-17 20:44:35 +02:00
|
|
|
else
|
|
|
|
clearGC();
|
2017-05-11 23:14:29 +02:00
|
|
|
if (Src->hasPersonalityFn())
|
|
|
|
setPersonalityFn(Src->getPersonalityFn());
|
|
|
|
if (Src->hasPrefixData())
|
|
|
|
setPrefixData(Src->getPrefixData());
|
|
|
|
if (Src->hasPrologueData())
|
|
|
|
setPrologueData(Src->getPrologueData());
|
2008-05-26 21:58:59 +02:00
|
|
|
}
|
|
|
|
|
2016-01-26 03:06:41 +01:00
|
|
|
/// Table of string intrinsic names indexed by enum value.
|
|
|
|
static const char * const IntrinsicNameTable[] = {
|
|
|
|
"not_intrinsic",
|
|
|
|
#define GET_INTRINSIC_NAME_TABLE
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2016-01-26 03:06:41 +01:00
|
|
|
#undef GET_INTRINSIC_NAME_TABLE
|
|
|
|
};
|
|
|
|
|
2016-07-15 18:31:37 +02:00
|
|
|
/// Table of per-target intrinsic name tables.
|
|
|
|
#define GET_INTRINSIC_TARGET_DATA
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2016-07-15 18:31:37 +02:00
|
|
|
#undef GET_INTRINSIC_TARGET_DATA
|
|
|
|
|
2020-11-03 10:58:28 +01:00
|
|
|
bool Function::isTargetIntrinsic(Intrinsic::ID IID) {
|
|
|
|
return IID > TargetInfos[0].Count;
|
|
|
|
}
|
|
|
|
|
2020-06-03 15:56:40 +02:00
|
|
|
bool Function::isTargetIntrinsic() const {
|
2020-11-03 10:58:28 +01:00
|
|
|
return isTargetIntrinsic(IntID);
|
2020-06-03 15:56:40 +02:00
|
|
|
}
|
|
|
|
|
2016-07-15 18:31:37 +02:00
|
|
|
/// Find the segment of \c IntrinsicNameTable for intrinsics with the same
|
|
|
|
/// target as \c Name, or the generic table if \c Name is not target specific.
|
|
|
|
///
|
|
|
|
/// Returns the relevant slice of \c IntrinsicNameTable
|
|
|
|
static ArrayRef<const char *> findTargetSubtable(StringRef Name) {
|
|
|
|
assert(Name.startswith("llvm."));
|
|
|
|
|
|
|
|
ArrayRef<IntrinsicTargetInfo> Targets(TargetInfos);
|
|
|
|
// Drop "llvm." and take the first dotted component. That will be the target
|
|
|
|
// if this is target specific.
|
|
|
|
StringRef Target = Name.drop_front(5).split('.').first;
|
2019-06-30 13:19:56 +02:00
|
|
|
auto It = partition_point(
|
|
|
|
Targets, [=](const IntrinsicTargetInfo &TI) { return TI.Name < Target; });
|
2016-07-15 18:31:37 +02:00
|
|
|
// We've either found the target or just fall back to the generic set, which
|
|
|
|
// is always first.
|
|
|
|
const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
|
|
|
|
return makeArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count);
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// This does the actual lookup of an intrinsic ID which
|
2015-05-19 02:02:25 +02:00
|
|
|
/// matches the given function name.
|
2016-07-29 22:32:59 +02:00
|
|
|
Intrinsic::ID Function::lookupIntrinsicID(StringRef Name) {
|
2016-07-15 18:31:37 +02:00
|
|
|
ArrayRef<const char *> NameTable = findTargetSubtable(Name);
|
2016-01-26 23:33:19 +01:00
|
|
|
int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
|
2016-07-15 18:31:37 +02:00
|
|
|
if (Idx == -1)
|
|
|
|
return Intrinsic::not_intrinsic;
|
|
|
|
|
|
|
|
// Intrinsic IDs correspond to the location in IntrinsicNameTable, but we have
|
|
|
|
// an index into a sub-table.
|
|
|
|
int Adjust = NameTable.data() - IntrinsicNameTable;
|
|
|
|
Intrinsic::ID ID = static_cast<Intrinsic::ID>(Idx + Adjust);
|
2015-05-19 02:02:25 +02:00
|
|
|
|
2016-01-26 03:06:41 +01:00
|
|
|
// If the intrinsic is not overloaded, require an exact match. If it is
|
[MIRParser] Accept overloaded intrinsic names w/o type suffixes
Function::lookupIntrinsicID is somewhat forgiving as it comes to
overloaded intrinsics' names: it returns an ID as soon as the name
provided has a prefix that matches a registered intrinsic's name w/o
actually checking that the rest of the name encodes all the concrete arg
types, let alone that those types are compatible with the intrinsic's
definition.
That's probably fine and comes in handy in MIR serialization: we don't
care about IR types at MIR level and every intrinsic should be
selectable based on its ID and low-level types (LLTs) of its operands,
including the overloaded ones, so there is no point in serializing
mangled IR types as part of the intrinsic's name.
However, lookupIntrinsicID is somewhat inconsistent in its forgiveness:
if the name provided is actually an exact match, it will refuse to
return the ID if the intrinsic is overloaded. There is probably no
real reason for that and it renders MIRParser incapable to deserialize
MIR MIRPrinter serialized.
This commit fixes it.
Reviewers: rnk, aditya_nandakumar, qcolombet, thegameg, dsanders,
marcello.maggioni
Reviewed By: bogner
Subscribers: javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D43267
llvm-svn: 326387
2018-03-01 00:51:49 +01:00
|
|
|
// overloaded, require either exact or prefix match.
|
|
|
|
const auto MatchSize = strlen(NameTable[Idx]);
|
|
|
|
assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
|
|
|
|
bool IsExactMatch = Name.size() == MatchSize;
|
2019-12-11 16:55:26 +01:00
|
|
|
return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
|
|
|
|
: Intrinsic::not_intrinsic;
|
2015-05-19 02:02:25 +02:00
|
|
|
}
|
|
|
|
|
2015-05-19 02:24:26 +02:00
|
|
|
void Function::recalculateIntrinsicID() {
|
2016-12-28 23:59:45 +01:00
|
|
|
StringRef Name = getName();
|
|
|
|
if (!Name.startswith("llvm.")) {
|
|
|
|
HasLLVMReservedName = false;
|
2015-05-19 02:24:26 +02:00
|
|
|
IntID = Intrinsic::not_intrinsic;
|
|
|
|
return;
|
2013-03-01 19:48:54 +01:00
|
|
|
}
|
2016-12-28 23:59:45 +01:00
|
|
|
HasLLVMReservedName = true;
|
|
|
|
IntID = lookupIntrinsicID(Name);
|
2013-03-01 19:48:54 +01:00
|
|
|
}
|
|
|
|
|
2014-11-12 01:21:51 +01:00
|
|
|
/// Returns a stable mangling for the type specified for use in the name
|
2014-11-25 00:24:24 +01:00
|
|
|
/// mangling scheme used by 'any' types in intrinsic signatures. The mangling
|
|
|
|
/// of named types is simply their name. Manglings for unnamed types consist
|
|
|
|
/// of a prefix ('p' for pointers, 'a' for arrays, 'f_' for functions)
|
|
|
|
/// combined with the mangling of their component types. A vararg function
|
|
|
|
/// type will have a suffix of 'vararg'. Since function types can contain
|
|
|
|
/// other function types, we close a function type mangling with suffix 'f'
|
|
|
|
/// which can't be confused with it's prefix. This ensures we don't have
|
|
|
|
/// collisions between two unrelated function types. Otherwise, you might
|
|
|
|
/// parse ffXX as f(fXX) or f(fX)X. (X is a placeholder for any other type.)
|
2021-03-19 14:34:25 +01:00
|
|
|
/// The HasUnnamedType boolean is set if an unnamed type was encountered,
|
|
|
|
/// indicating that extra care must be taken to ensure a unique name.
|
|
|
|
static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) {
|
2014-11-12 01:21:51 +01:00
|
|
|
std::string Result;
|
2021-06-15 23:59:51 +02:00
|
|
|
if (PointerType *PTyp = dyn_cast<PointerType>(Ty)) {
|
|
|
|
Result += "p" + utostr(PTyp->getAddressSpace());
|
|
|
|
// Opaque pointer doesn't have pointee type information, so we just mangle
|
|
|
|
// address space for opaque pointer.
|
|
|
|
if (!PTyp->isOpaque())
|
|
|
|
Result += getMangledTypeStr(PTyp->getElementType(), HasUnnamedType);
|
|
|
|
} else if (ArrayType *ATyp = dyn_cast<ArrayType>(Ty)) {
|
2017-05-11 01:41:30 +02:00
|
|
|
Result += "a" + utostr(ATyp->getNumElements()) +
|
2021-03-19 14:34:25 +01:00
|
|
|
getMangledTypeStr(ATyp->getElementType(), HasUnnamedType);
|
2017-02-16 00:16:20 +01:00
|
|
|
} else if (StructType *STyp = dyn_cast<StructType>(Ty)) {
|
|
|
|
if (!STyp->isLiteral()) {
|
|
|
|
Result += "s_";
|
2021-03-19 14:34:25 +01:00
|
|
|
if (STyp->hasName())
|
|
|
|
Result += STyp->getName();
|
|
|
|
else
|
|
|
|
HasUnnamedType = true;
|
2017-02-16 00:16:20 +01:00
|
|
|
} else {
|
|
|
|
Result += "sl_";
|
|
|
|
for (auto Elem : STyp->elements())
|
2021-03-19 14:34:25 +01:00
|
|
|
Result += getMangledTypeStr(Elem, HasUnnamedType);
|
2017-02-16 00:16:20 +01:00
|
|
|
}
|
|
|
|
// Ensure nested structs are distinguishable.
|
|
|
|
Result += "s";
|
|
|
|
} else if (FunctionType *FT = dyn_cast<FunctionType>(Ty)) {
|
2021-03-19 14:34:25 +01:00
|
|
|
Result += "f_" + getMangledTypeStr(FT->getReturnType(), HasUnnamedType);
|
2014-11-12 01:21:51 +01:00
|
|
|
for (size_t i = 0; i < FT->getNumParams(); i++)
|
2021-03-19 14:34:25 +01:00
|
|
|
Result += getMangledTypeStr(FT->getParamType(i), HasUnnamedType);
|
2014-11-12 01:21:51 +01:00
|
|
|
if (FT->isVarArg())
|
|
|
|
Result += "vararg";
|
2014-11-25 00:24:24 +01:00
|
|
|
// Ensure nested function types are distinguishable.
|
2018-07-30 21:41:25 +02:00
|
|
|
Result += "f";
|
2021-06-15 23:59:51 +02:00
|
|
|
} else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
|
2020-04-23 18:52:49 +02:00
|
|
|
ElementCount EC = VTy->getElementCount();
|
2020-08-14 13:15:59 +02:00
|
|
|
if (EC.isScalable())
|
2019-08-27 14:57:09 +02:00
|
|
|
Result += "nx";
|
2020-08-14 13:15:59 +02:00
|
|
|
Result += "v" + utostr(EC.getKnownMinValue()) +
|
2021-03-19 14:34:25 +01:00
|
|
|
getMangledTypeStr(VTy->getElementType(), HasUnnamedType);
|
2018-03-29 19:21:10 +02:00
|
|
|
} else if (Ty) {
|
|
|
|
switch (Ty->getTypeID()) {
|
|
|
|
default: llvm_unreachable("Unhandled type");
|
|
|
|
case Type::VoidTyID: Result += "isVoid"; break;
|
|
|
|
case Type::MetadataTyID: Result += "Metadata"; break;
|
|
|
|
case Type::HalfTyID: Result += "f16"; break;
|
[IR][BFloat] Add BFloat IR type
Summary:
The BFloat IR type is introduced to provide support for, initially, the BFloat16
datatype introduced with the Armv8.6 architecture (optional from Armv8.2
onwards). It has an 8-bit exponent and a 7-bit mantissa and behaves like an IEEE
754 floating point IR type.
This is part of a patch series upstreaming Armv8.6 features. Subsequent patches
will upstream intrinsics support and C-lang support for BFloat.
Reviewers: SjoerdMeijer, rjmccall, rsmith, liutianle, RKSimon, craig.topper, jfb, LukeGeeson, sdesmalen, deadalnix, ctetreau
Subscribers: hiraditya, llvm-commits, danielkiss, arphaman, kristof.beyls, dexonsmith
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78190
2020-04-01 00:49:38 +02:00
|
|
|
case Type::BFloatTyID: Result += "bf16"; break;
|
2018-03-29 19:21:10 +02:00
|
|
|
case Type::FloatTyID: Result += "f32"; break;
|
|
|
|
case Type::DoubleTyID: Result += "f64"; break;
|
|
|
|
case Type::X86_FP80TyID: Result += "f80"; break;
|
|
|
|
case Type::FP128TyID: Result += "f128"; break;
|
|
|
|
case Type::PPC_FP128TyID: Result += "ppcf128"; break;
|
|
|
|
case Type::X86_MMXTyID: Result += "x86mmx"; break;
|
2020-11-20 08:19:34 +01:00
|
|
|
case Type::X86_AMXTyID: Result += "x86amx"; break;
|
2018-03-29 19:21:10 +02:00
|
|
|
case Type::IntegerTyID:
|
|
|
|
Result += "i" + utostr(cast<IntegerType>(Ty)->getBitWidth());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-11-12 01:21:51 +01:00
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2021-06-14 14:52:29 +02:00
|
|
|
StringRef Intrinsic::getBaseName(ID id) {
|
|
|
|
assert(id < num_intrinsics && "Invalid intrinsic ID!");
|
|
|
|
return IntrinsicNameTable[id];
|
|
|
|
}
|
|
|
|
|
2016-08-18 20:30:54 +02:00
|
|
|
StringRef Intrinsic::getName(ID id) {
|
|
|
|
assert(id < num_intrinsics && "Invalid intrinsic ID!");
|
2019-12-11 16:55:26 +01:00
|
|
|
assert(!Intrinsic::isOverloaded(id) &&
|
2016-08-22 22:18:28 +02:00
|
|
|
"This version of getName does not support overloading");
|
2021-06-14 14:52:29 +02:00
|
|
|
return getBaseName(id);
|
2016-08-18 20:30:54 +02:00
|
|
|
}
|
|
|
|
|
2021-06-14 14:52:29 +02:00
|
|
|
static std::string getIntrinsicNameImpl(Intrinsic::ID Id, ArrayRef<Type *> Tys,
|
|
|
|
Module *M, FunctionType *FT,
|
|
|
|
bool EarlyModuleCheck) {
|
|
|
|
|
|
|
|
assert(Id < Intrinsic::num_intrinsics && "Invalid intrinsic ID!");
|
2021-03-19 14:34:25 +01:00
|
|
|
assert((Tys.empty() || Intrinsic::isOverloaded(Id)) &&
|
2020-12-03 01:49:12 +01:00
|
|
|
"This version of getName is for overloaded intrinsics only");
|
2021-06-14 14:52:29 +02:00
|
|
|
(void)EarlyModuleCheck;
|
|
|
|
assert((!EarlyModuleCheck || M ||
|
|
|
|
!any_of(Tys, [](Type *T) { return isa<PointerType>(T); })) &&
|
|
|
|
"Intrinsic overloading on pointer types need to provide a Module");
|
2021-03-19 14:34:25 +01:00
|
|
|
bool HasUnnamedType = false;
|
2021-06-14 14:52:29 +02:00
|
|
|
std::string Result(Intrinsic::getBaseName(Id));
|
|
|
|
for (Type *Ty : Tys)
|
2021-03-19 14:34:25 +01:00
|
|
|
Result += "." + getMangledTypeStr(Ty, HasUnnamedType);
|
2021-06-14 14:52:29 +02:00
|
|
|
if (HasUnnamedType) {
|
|
|
|
assert(M && "unnamed types need a module");
|
2021-03-19 14:34:25 +01:00
|
|
|
if (!FT)
|
2021-06-14 14:52:29 +02:00
|
|
|
FT = Intrinsic::getType(M->getContext(), Id, Tys);
|
2021-03-19 14:34:25 +01:00
|
|
|
else
|
2021-06-14 14:52:29 +02:00
|
|
|
assert((FT == Intrinsic::getType(M->getContext(), Id, Tys)) &&
|
2021-03-19 14:34:25 +01:00
|
|
|
"Provided FunctionType must match arguments");
|
|
|
|
return M->getUniqueIntrinsicName(Result, Id, FT);
|
2008-07-30 06:36:53 +02:00
|
|
|
}
|
2007-04-01 09:25:33 +02:00
|
|
|
return Result;
|
2006-03-25 07:32:47 +01:00
|
|
|
}
|
|
|
|
|
2021-06-14 14:52:29 +02:00
|
|
|
std::string Intrinsic::getName(ID Id, ArrayRef<Type *> Tys, Module *M,
|
|
|
|
FunctionType *FT) {
|
|
|
|
assert(M && "We need to have a Module");
|
|
|
|
return getIntrinsicNameImpl(Id, Tys, M, FT, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Intrinsic::getNameNoUnnamedTypes(ID Id, ArrayRef<Type *> Tys) {
|
|
|
|
return getIntrinsicNameImpl(Id, Tys, nullptr, nullptr, false);
|
2021-03-19 14:34:25 +01:00
|
|
|
}
|
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
/// IIT_Info - These are enumerators that describe the entries returned by the
|
|
|
|
/// getIntrinsicInfoTableEntries function.
|
|
|
|
///
|
|
|
|
/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
|
|
|
|
enum IIT_Info {
|
2014-10-20 21:25:05 +02:00
|
|
|
// Common values should be encoded with 0-15.
|
2012-05-27 20:28:35 +02:00
|
|
|
IIT_Done = 0,
|
|
|
|
IIT_I1 = 1,
|
|
|
|
IIT_I8 = 2,
|
|
|
|
IIT_I16 = 3,
|
|
|
|
IIT_I32 = 4,
|
|
|
|
IIT_I64 = 5,
|
2013-01-11 02:45:05 +01:00
|
|
|
IIT_F16 = 6,
|
|
|
|
IIT_F32 = 7,
|
|
|
|
IIT_F64 = 8,
|
|
|
|
IIT_V2 = 9,
|
|
|
|
IIT_V4 = 10,
|
|
|
|
IIT_V8 = 11,
|
|
|
|
IIT_V16 = 12,
|
|
|
|
IIT_V32 = 13,
|
2014-10-20 21:25:05 +02:00
|
|
|
IIT_PTR = 14,
|
|
|
|
IIT_ARG = 15,
|
2014-09-30 13:32:22 +02:00
|
|
|
|
2014-10-20 21:25:05 +02:00
|
|
|
// Values from 16+ are only encodable with the inefficient encoding.
|
|
|
|
IIT_V64 = 16,
|
2014-09-30 13:32:22 +02:00
|
|
|
IIT_MMX = 17,
|
2015-09-02 15:36:25 +02:00
|
|
|
IIT_TOKEN = 18,
|
|
|
|
IIT_METADATA = 19,
|
|
|
|
IIT_EMPTYSTRUCT = 20,
|
|
|
|
IIT_STRUCT2 = 21,
|
|
|
|
IIT_STRUCT3 = 22,
|
|
|
|
IIT_STRUCT4 = 23,
|
|
|
|
IIT_STRUCT5 = 24,
|
|
|
|
IIT_EXTEND_ARG = 25,
|
|
|
|
IIT_TRUNC_ARG = 26,
|
|
|
|
IIT_ANYPTR = 27,
|
|
|
|
IIT_V1 = 28,
|
|
|
|
IIT_VARARG = 29,
|
|
|
|
IIT_HALF_VEC_ARG = 30,
|
|
|
|
IIT_SAME_VEC_WIDTH_ARG = 31,
|
|
|
|
IIT_PTR_TO_ARG = 32,
|
2016-11-03 04:23:55 +01:00
|
|
|
IIT_PTR_TO_ELT = 33,
|
2017-05-03 14:28:54 +02:00
|
|
|
IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
|
2016-11-03 04:23:55 +01:00
|
|
|
IIT_I128 = 35,
|
|
|
|
IIT_V512 = 36,
|
2017-10-12 19:40:00 +02:00
|
|
|
IIT_V1024 = 37,
|
|
|
|
IIT_STRUCT6 = 38,
|
|
|
|
IIT_STRUCT7 = 39,
|
2018-07-09 20:50:06 +02:00
|
|
|
IIT_STRUCT8 = 40,
|
2019-06-13 11:37:38 +02:00
|
|
|
IIT_F128 = 41,
|
2019-08-27 14:57:09 +02:00
|
|
|
IIT_VEC_ELEMENT = 42,
|
2019-09-20 11:48:21 +02:00
|
|
|
IIT_SCALABLE_VEC = 43,
|
|
|
|
IIT_SUBDIVIDE2_ARG = 44,
|
2019-10-02 11:25:02 +02:00
|
|
|
IIT_SUBDIVIDE4_ARG = 45,
|
2020-02-07 16:30:31 +01:00
|
|
|
IIT_VEC_OF_BITCASTS_TO_INT = 46,
|
[IR][BFloat] add BFloat IR intrinsics support
Summary:
This patch is part of a series that adds support for the Bfloat16 extension of
the Armv8.6-a architecture, as detailed here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
Reviewers: scanon, fpetrogalli, sdesmalen, craig.topper, LukeGeeson
Reviewed By: fpetrogalli
Subscribers: LukeGeeson, pbarrio, kristof.beyls, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79707
2020-05-27 15:00:33 +02:00
|
|
|
IIT_V128 = 47,
|
2020-09-30 12:01:15 +02:00
|
|
|
IIT_BF16 = 48,
|
2020-10-02 16:47:43 +02:00
|
|
|
IIT_STRUCT9 = 49,
|
2020-11-20 08:19:34 +01:00
|
|
|
IIT_V256 = 50,
|
|
|
|
IIT_AMX = 51
|
2012-05-27 20:28:35 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
|
2020-06-02 15:44:24 +02:00
|
|
|
IIT_Info LastInfo,
|
2012-05-27 20:28:35 +02:00
|
|
|
SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
|
2017-05-11 01:41:30 +02:00
|
|
|
using namespace Intrinsic;
|
|
|
|
|
2020-06-02 15:44:24 +02:00
|
|
|
bool IsScalableVector = (LastInfo == IIT_SCALABLE_VEC);
|
2020-05-15 15:31:13 +02:00
|
|
|
|
2012-05-17 07:13:57 +02:00
|
|
|
IIT_Info Info = IIT_Info(Infos[NextElt++]);
|
2012-05-17 07:03:24 +02:00
|
|
|
unsigned StructElts = 2;
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-17 07:13:57 +02:00
|
|
|
switch (Info) {
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_Done:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0));
|
|
|
|
return;
|
2013-10-31 18:18:11 +01:00
|
|
|
case IIT_VARARG:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::VarArg, 0));
|
|
|
|
return;
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_MMX:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
|
|
|
|
return;
|
2020-11-20 08:19:34 +01:00
|
|
|
case IIT_AMX:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::AMX, 0));
|
|
|
|
return;
|
2015-09-02 15:36:25 +02:00
|
|
|
case IIT_TOKEN:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0));
|
|
|
|
return;
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_METADATA:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
|
|
|
|
return;
|
2013-01-11 02:45:05 +01:00
|
|
|
case IIT_F16:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Half, 0));
|
|
|
|
return;
|
[IR][BFloat] add BFloat IR intrinsics support
Summary:
This patch is part of a series that adds support for the Bfloat16 extension of
the Armv8.6-a architecture, as detailed here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
Reviewers: scanon, fpetrogalli, sdesmalen, craig.topper, LukeGeeson
Reviewed By: fpetrogalli
Subscribers: LukeGeeson, pbarrio, kristof.beyls, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79707
2020-05-27 15:00:33 +02:00
|
|
|
case IIT_BF16:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::BFloat, 0));
|
|
|
|
return;
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_F32:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0));
|
|
|
|
return;
|
|
|
|
case IIT_F64:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0));
|
|
|
|
return;
|
2018-07-09 20:50:06 +02:00
|
|
|
case IIT_F128:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Quad, 0));
|
|
|
|
return;
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_I1:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
|
|
|
|
return;
|
|
|
|
case IIT_I8:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
|
|
|
|
return;
|
|
|
|
case IIT_I16:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16));
|
|
|
|
return;
|
|
|
|
case IIT_I32:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32));
|
|
|
|
return;
|
|
|
|
case IIT_I64:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
|
|
|
|
return;
|
2015-05-25 17:49:26 +02:00
|
|
|
case IIT_I128:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 128));
|
|
|
|
return;
|
2013-09-24 04:47:27 +02:00
|
|
|
case IIT_V1:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(1, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2013-09-24 04:47:27 +02:00
|
|
|
return;
|
2012-05-17 06:30:58 +02:00
|
|
|
case IIT_V2:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(2, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-17 06:30:58 +02:00
|
|
|
case IIT_V4:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(4, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-17 06:30:58 +02:00
|
|
|
case IIT_V8:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(8, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-17 06:30:58 +02:00
|
|
|
case IIT_V16:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(16, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-17 07:03:24 +02:00
|
|
|
case IIT_V32:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(32, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2014-09-30 13:32:22 +02:00
|
|
|
case IIT_V64:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(64, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2014-09-30 13:32:22 +02:00
|
|
|
return;
|
2020-02-07 16:30:31 +01:00
|
|
|
case IIT_V128:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2020-02-07 16:30:31 +01:00
|
|
|
return;
|
2020-10-02 16:47:43 +02:00
|
|
|
case IIT_V256:
|
|
|
|
OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector));
|
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
|
|
|
return;
|
2015-11-24 17:28:14 +01:00
|
|
|
case IIT_V512:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2015-11-24 17:28:14 +01:00
|
|
|
return;
|
|
|
|
case IIT_V1024:
|
2020-05-15 15:31:13 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::getVector(1024, IsScalableVector));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2015-11-24 17:28:14 +01:00
|
|
|
return;
|
2012-05-23 07:19:18 +02:00
|
|
|
case IIT_PTR:
|
2012-05-27 20:28:35 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-23 07:19:18 +02:00
|
|
|
case IIT_ANYPTR: { // [ANYPTR addrspace, subtype]
|
2012-12-17 21:37:55 +01:00
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer,
|
2012-05-27 20:28:35 +02:00
|
|
|
Infos[NextElt++]));
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
case IIT_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2014-03-28 13:31:39 +01:00
|
|
|
case IIT_EXTEND_ARG: {
|
2012-05-27 20:28:35 +02:00
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
2014-03-28 13:31:39 +01:00
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendArgument,
|
2012-05-27 20:28:35 +02:00
|
|
|
ArgInfo));
|
|
|
|
return;
|
2012-05-22 01:21:28 +02:00
|
|
|
}
|
2014-03-28 13:31:39 +01:00
|
|
|
case IIT_TRUNC_ARG: {
|
2012-05-27 20:28:35 +02:00
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
2014-03-28 13:31:39 +01:00
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncArgument,
|
2012-05-27 20:28:35 +02:00
|
|
|
ArgInfo));
|
|
|
|
return;
|
2012-05-17 07:03:24 +02:00
|
|
|
}
|
2014-03-29 08:04:54 +01:00
|
|
|
case IIT_HALF_VEC_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::HalfVecArgument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2014-12-04 10:40:44 +01:00
|
|
|
case IIT_SAME_VEC_WIDTH_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::SameVecWidthArgument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2014-12-25 08:49:20 +01:00
|
|
|
case IIT_PTR_TO_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToArgument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2016-11-03 04:23:55 +01:00
|
|
|
case IIT_PTR_TO_ELT: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2017-05-03 14:28:54 +02:00
|
|
|
case IIT_VEC_OF_ANYPTRS_TO_ELT: {
|
|
|
|
unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(
|
|
|
|
IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo));
|
Masked Gather and Scatter Intrinsics.
Gather and Scatter are new introduced intrinsics, comming after recently implemented masked load and store.
This is the first patch for Gather and Scatter intrinsics. It includes only the syntax, parsing and verification.
Gather and Scatter intrinsics allow to perform multiple memory accesses (read/write) in one vector instruction.
The intrinsics are not target specific and will have the following syntax:
Gather:
declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> <vector of ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x i32> <passthru>)
declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*><vector of ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float><passthru>)
Scatter:
declare void @llvm.masked.scatter.v8i32(<8 x i32><vector value to be stored> , <8 x i32*><vector of ptrs> , i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.scatter.v16i32(<16 x i32> <vector value to be stored> , <16 x i32*> <vector of ptrs>, i32 <alignment>, <16 x i1><mask> )
Vector of ptrs - a set of source/destination addresses, to load/store the value.
Mask - switches on/off vector lanes to prevent memory access for switched-off lanes
vector of ptrs, value and mask should have the same vector width.
These are code examples where gather / scatter should be used and will allow function vectorization
;void foo1(int * restrict A, int * restrict B, int * restrict C) {
; for (int i=0; i<SIZE; i++) {
; A[i] = B[C[i]];
; }
;}
;void foo3(int * restrict A, int * restrict B) {
; for (int i=0; i<SIZE; i++) {
; A[B[i]] = i+5;
; }
;}
Tests will come in the following patches, with CodeGen and Vectorizer.
http://reviews.llvm.org/D7433
llvm-svn: 228521
2015-02-08 09:27:19 +01:00
|
|
|
return;
|
|
|
|
}
|
2012-05-27 20:28:35 +02:00
|
|
|
case IIT_EMPTYSTRUCT:
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
|
|
|
|
return;
|
2020-09-30 12:01:15 +02:00
|
|
|
case IIT_STRUCT9: ++StructElts; LLVM_FALLTHROUGH;
|
2017-10-12 19:40:00 +02:00
|
|
|
case IIT_STRUCT8: ++StructElts; LLVM_FALLTHROUGH;
|
|
|
|
case IIT_STRUCT7: ++StructElts; LLVM_FALLTHROUGH;
|
|
|
|
case IIT_STRUCT6: ++StructElts; LLVM_FALLTHROUGH;
|
2016-08-17 22:30:52 +02:00
|
|
|
case IIT_STRUCT5: ++StructElts; LLVM_FALLTHROUGH;
|
|
|
|
case IIT_STRUCT4: ++StructElts; LLVM_FALLTHROUGH;
|
|
|
|
case IIT_STRUCT3: ++StructElts; LLVM_FALLTHROUGH;
|
2012-05-17 07:03:24 +02:00
|
|
|
case IIT_STRUCT2: {
|
2012-05-27 20:28:35 +02:00
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts));
|
|
|
|
|
2012-05-17 07:03:24 +02:00
|
|
|
for (unsigned i = 0; i != StructElts; ++i)
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2012-05-27 20:28:35 +02:00
|
|
|
return;
|
2012-05-17 06:30:58 +02:00
|
|
|
}
|
2019-09-20 11:48:21 +02:00
|
|
|
case IIT_SUBDIVIDE2_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide2Argument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case IIT_SUBDIVIDE4_ARG: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide4Argument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2019-06-13 11:37:38 +02:00
|
|
|
case IIT_VEC_ELEMENT: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2019-08-27 14:57:09 +02:00
|
|
|
case IIT_SCALABLE_VEC: {
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
2019-08-27 14:57:09 +02:00
|
|
|
return;
|
|
|
|
}
|
2019-10-02 11:25:02 +02:00
|
|
|
case IIT_VEC_OF_BITCASTS_TO_INT: {
|
|
|
|
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
|
|
|
|
OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfBitcastsToInt,
|
|
|
|
ArgInfo));
|
|
|
|
return;
|
|
|
|
}
|
2012-05-16 08:34:44 +02:00
|
|
|
}
|
|
|
|
llvm_unreachable("unhandled");
|
|
|
|
}
|
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
#define GET_INTRINSIC_GENERATOR_GLOBAL
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2012-05-27 20:28:35 +02:00
|
|
|
#undef GET_INTRINSIC_GENERATOR_GLOBAL
|
|
|
|
|
2012-12-17 21:37:55 +01:00
|
|
|
void Intrinsic::getIntrinsicInfoTableEntries(ID id,
|
2012-05-27 20:28:35 +02:00
|
|
|
SmallVectorImpl<IITDescriptor> &T){
|
2012-05-16 08:34:44 +02:00
|
|
|
// Check to see if the intrinsic's type was expressible by the table.
|
|
|
|
unsigned TableVal = IIT_Table[id-1];
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-17 17:55:41 +02:00
|
|
|
// Decode the TableVal into an array of IITValues.
|
|
|
|
SmallVector<unsigned char, 8> IITValues;
|
|
|
|
ArrayRef<unsigned char> IITEntries;
|
|
|
|
unsigned NextElt = 0;
|
|
|
|
if ((TableVal >> 31) != 0) {
|
|
|
|
// This is an offset into the IIT_LongEncodingTable.
|
|
|
|
IITEntries = IIT_LongEncodingTable;
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-17 17:55:41 +02:00
|
|
|
// Strip sentinel bit.
|
|
|
|
NextElt = (TableVal << 1) >> 1;
|
|
|
|
} else {
|
2012-05-27 20:28:35 +02:00
|
|
|
// Decode the TableVal into an array of IITValues. If the entry was encoded
|
|
|
|
// into a single word in the table itself, decode it now.
|
2012-05-17 07:13:57 +02:00
|
|
|
do {
|
|
|
|
IITValues.push_back(TableVal & 0xF);
|
|
|
|
TableVal >>= 4;
|
|
|
|
} while (TableVal);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-17 17:55:41 +02:00
|
|
|
IITEntries = IITValues;
|
|
|
|
NextElt = 0;
|
2012-05-16 08:34:44 +02:00
|
|
|
}
|
2012-05-27 20:28:35 +02:00
|
|
|
|
|
|
|
// Okay, decode the table into the output vector of IITDescriptors.
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, IITEntries, IIT_Done, T);
|
2012-05-27 20:28:35 +02:00
|
|
|
while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0)
|
2020-06-02 15:44:24 +02:00
|
|
|
DecodeIITType(NextElt, IITEntries, IIT_Done, T);
|
2012-05-27 20:28:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
|
|
|
|
ArrayRef<Type*> Tys, LLVMContext &Context) {
|
|
|
|
using namespace Intrinsic;
|
2017-05-11 01:41:30 +02:00
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
IITDescriptor D = Infos.front();
|
|
|
|
Infos = Infos.slice(1);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
switch (D.Kind) {
|
|
|
|
case IITDescriptor::Void: return Type::getVoidTy(Context);
|
2013-10-31 18:18:11 +01:00
|
|
|
case IITDescriptor::VarArg: return Type::getVoidTy(Context);
|
2012-05-27 20:28:35 +02:00
|
|
|
case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
|
2020-11-20 08:19:34 +01:00
|
|
|
case IITDescriptor::AMX: return Type::getX86_AMXTy(Context);
|
2015-09-02 15:36:25 +02:00
|
|
|
case IITDescriptor::Token: return Type::getTokenTy(Context);
|
2012-05-27 20:28:35 +02:00
|
|
|
case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
|
2013-01-11 02:45:05 +01:00
|
|
|
case IITDescriptor::Half: return Type::getHalfTy(Context);
|
[IR][BFloat] add BFloat IR intrinsics support
Summary:
This patch is part of a series that adds support for the Bfloat16 extension of
the Armv8.6-a architecture, as detailed here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
Reviewers: scanon, fpetrogalli, sdesmalen, craig.topper, LukeGeeson
Reviewed By: fpetrogalli
Subscribers: LukeGeeson, pbarrio, kristof.beyls, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79707
2020-05-27 15:00:33 +02:00
|
|
|
case IITDescriptor::BFloat: return Type::getBFloatTy(Context);
|
2012-05-27 20:28:35 +02:00
|
|
|
case IITDescriptor::Float: return Type::getFloatTy(Context);
|
|
|
|
case IITDescriptor::Double: return Type::getDoubleTy(Context);
|
2018-07-09 20:50:06 +02:00
|
|
|
case IITDescriptor::Quad: return Type::getFP128Ty(Context);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
case IITDescriptor::Integer:
|
|
|
|
return IntegerType::get(Context, D.Integer_Width);
|
|
|
|
case IITDescriptor::Vector:
|
2020-05-15 15:31:13 +02:00
|
|
|
return VectorType::get(DecodeFixedType(Infos, Tys, Context),
|
|
|
|
D.Vector_Width);
|
2012-05-27 20:28:35 +02:00
|
|
|
case IITDescriptor::Pointer:
|
|
|
|
return PointerType::get(DecodeFixedType(Infos, Tys, Context),
|
|
|
|
D.Pointer_AddressSpace);
|
|
|
|
case IITDescriptor::Struct: {
|
2017-10-12 19:40:00 +02:00
|
|
|
SmallVector<Type *, 8> Elts;
|
2012-05-27 20:28:35 +02:00
|
|
|
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
|
2017-10-12 19:40:00 +02:00
|
|
|
Elts.push_back(DecodeFixedType(Infos, Tys, Context));
|
|
|
|
return StructType::get(Context, Elts);
|
2012-05-27 20:28:35 +02:00
|
|
|
}
|
|
|
|
case IITDescriptor::Argument:
|
|
|
|
return Tys[D.getArgumentNumber()];
|
2014-03-28 13:31:39 +01:00
|
|
|
case IITDescriptor::ExtendArgument: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
|
|
|
|
return VectorType::getExtendedElementVectorType(VTy);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2014-03-28 13:31:39 +01:00
|
|
|
return IntegerType::get(Context, 2 * cast<IntegerType>(Ty)->getBitWidth());
|
|
|
|
}
|
|
|
|
case IITDescriptor::TruncArgument: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
|
|
|
|
return VectorType::getTruncatedElementVectorType(VTy);
|
|
|
|
|
|
|
|
IntegerType *ITy = cast<IntegerType>(Ty);
|
|
|
|
assert(ITy->getBitWidth() % 2 == 0);
|
|
|
|
return IntegerType::get(Context, ITy->getBitWidth() / 2);
|
|
|
|
}
|
2019-09-20 11:48:21 +02:00
|
|
|
case IITDescriptor::Subdivide2Argument:
|
|
|
|
case IITDescriptor::Subdivide4Argument: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
VectorType *VTy = dyn_cast<VectorType>(Ty);
|
|
|
|
assert(VTy && "Expected an argument of Vector Type");
|
|
|
|
int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2;
|
|
|
|
return VectorType::getSubdividedVectorType(VTy, SubDivs);
|
|
|
|
}
|
2014-03-29 08:04:54 +01:00
|
|
|
case IITDescriptor::HalfVecArgument:
|
|
|
|
return VectorType::getHalfElementsVectorType(cast<VectorType>(
|
|
|
|
Tys[D.getArgumentNumber()]));
|
2014-12-25 08:49:20 +01:00
|
|
|
case IITDescriptor::SameVecWidthArgument: {
|
2014-12-04 10:40:44 +01:00
|
|
|
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
2019-01-23 17:00:22 +01:00
|
|
|
if (auto *VTy = dyn_cast<VectorType>(Ty))
|
2019-08-27 14:57:09 +02:00
|
|
|
return VectorType::get(EltTy, VTy->getElementCount());
|
2019-01-23 17:00:22 +01:00
|
|
|
return EltTy;
|
2014-12-25 08:49:20 +01:00
|
|
|
}
|
|
|
|
case IITDescriptor::PtrToArgument: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
return PointerType::getUnqual(Ty);
|
|
|
|
}
|
2016-11-03 04:23:55 +01:00
|
|
|
case IITDescriptor::PtrToElt: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
VectorType *VTy = dyn_cast<VectorType>(Ty);
|
|
|
|
if (!VTy)
|
|
|
|
llvm_unreachable("Expected an argument of Vector Type");
|
2020-04-10 22:59:26 +02:00
|
|
|
Type *EltTy = VTy->getElementType();
|
2016-11-03 04:23:55 +01:00
|
|
|
return PointerType::getUnqual(EltTy);
|
|
|
|
}
|
2019-06-13 11:37:38 +02:00
|
|
|
case IITDescriptor::VecElementArgument: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
|
|
|
|
return VTy->getElementType();
|
|
|
|
llvm_unreachable("Expected an argument of Vector Type");
|
|
|
|
}
|
2019-10-02 11:25:02 +02:00
|
|
|
case IITDescriptor::VecOfBitcastsToInt: {
|
|
|
|
Type *Ty = Tys[D.getArgumentNumber()];
|
|
|
|
VectorType *VTy = dyn_cast<VectorType>(Ty);
|
|
|
|
assert(VTy && "Expected an argument of Vector Type");
|
|
|
|
return VectorType::getInteger(VTy);
|
|
|
|
}
|
2017-05-03 14:28:54 +02:00
|
|
|
case IITDescriptor::VecOfAnyPtrsToElt:
|
|
|
|
// Return the overloaded type (which determines the pointers address space)
|
|
|
|
return Tys[D.getOverloadArgNumber()];
|
2017-05-11 01:41:30 +02:00
|
|
|
}
|
2012-05-27 20:28:35 +02:00
|
|
|
llvm_unreachable("unhandled");
|
|
|
|
}
|
|
|
|
|
|
|
|
FunctionType *Intrinsic::getType(LLVMContext &Context,
|
|
|
|
ID id, ArrayRef<Type*> Tys) {
|
|
|
|
SmallVector<IITDescriptor, 8> Table;
|
|
|
|
getIntrinsicInfoTableEntries(id, Table);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-27 20:28:35 +02:00
|
|
|
ArrayRef<IITDescriptor> TableRef = Table;
|
|
|
|
Type *ResultTy = DecodeFixedType(TableRef, Tys, Context);
|
2012-12-17 21:37:55 +01:00
|
|
|
|
2012-05-17 17:55:41 +02:00
|
|
|
SmallVector<Type*, 8> ArgTys;
|
2012-05-27 20:28:35 +02:00
|
|
|
while (!TableRef.empty())
|
|
|
|
ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context));
|
2007-02-07 21:38:26 +01:00
|
|
|
|
2014-10-20 17:47:24 +02:00
|
|
|
// DecodeFixedType returns Void for IITDescriptor::Void and IITDescriptor::VarArg
|
|
|
|
// If we see void type as the type of the last argument, it is vararg intrinsic
|
|
|
|
if (!ArgTys.empty() && ArgTys.back()->isVoidTy()) {
|
|
|
|
ArgTys.pop_back();
|
|
|
|
return FunctionType::get(ResultTy, ArgTys, true);
|
|
|
|
}
|
2012-12-17 21:37:55 +01:00
|
|
|
return FunctionType::get(ResultTy, ArgTys, false);
|
2007-02-07 21:38:26 +01:00
|
|
|
}
|
|
|
|
|
2009-02-25 00:17:49 +01:00
|
|
|
bool Intrinsic::isOverloaded(ID id) {
|
|
|
|
#define GET_INTRINSIC_OVERLOAD_TABLE
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2009-02-25 00:17:49 +01:00
|
|
|
#undef GET_INTRINSIC_OVERLOAD_TABLE
|
|
|
|
}
|
|
|
|
|
2015-06-18 21:28:26 +02:00
|
|
|
bool Intrinsic::isLeaf(ID id) {
|
|
|
|
switch (id) {
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case Intrinsic::experimental_gc_statepoint:
|
|
|
|
case Intrinsic::experimental_patchpoint_void:
|
|
|
|
case Intrinsic::experimental_patchpoint_i64:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-12 02:18:58 +01:00
|
|
|
/// This defines the "Intrinsic::getAttributes(ID id)" method.
|
2007-12-03 21:06:50 +01:00
|
|
|
#define GET_INTRINSIC_ATTRIBUTES
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2007-12-03 21:06:50 +01:00
|
|
|
#undef GET_INTRINSIC_ATTRIBUTES
|
|
|
|
|
2011-07-14 19:45:39 +02:00
|
|
|
Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
|
2007-12-03 21:06:50 +01:00
|
|
|
// There can never be multiple globals with the same name of different types,
|
|
|
|
// because intrinsics must be a specific type.
|
2021-03-19 14:34:25 +01:00
|
|
|
auto *FT = getType(M->getContext(), id, Tys);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
return cast<Function>(
|
2021-03-19 14:34:25 +01:00
|
|
|
M->getOrInsertFunction(Tys.empty() ? getName(id)
|
|
|
|
: getName(id, Tys, M, FT),
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
getType(M->getContext(), id, Tys))
|
|
|
|
.getCallee());
|
2007-02-07 21:38:26 +01:00
|
|
|
}
|
|
|
|
|
2009-02-05 02:49:45 +01:00
|
|
|
// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
|
|
|
|
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2009-02-05 02:49:45 +01:00
|
|
|
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
|
|
|
|
2014-07-04 20:42:25 +02:00
|
|
|
// This defines the "Intrinsic::getIntrinsicForMSBuiltin()" method.
|
|
|
|
#define GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
|
2018-06-23 04:02:38 +02:00
|
|
|
#include "llvm/IR/IntrinsicImpl.inc"
|
2014-07-04 20:42:25 +02:00
|
|
|
#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
|
|
|
|
|
2019-06-13 10:19:33 +02:00
|
|
|
using DeferredIntrinsicMatchPair =
|
|
|
|
std::pair<Type *, ArrayRef<Intrinsic::IITDescriptor>>;
|
|
|
|
|
|
|
|
static bool matchIntrinsicType(
|
|
|
|
Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
|
|
|
|
SmallVectorImpl<Type *> &ArgTys,
|
|
|
|
SmallVectorImpl<DeferredIntrinsicMatchPair> &DeferredChecks,
|
|
|
|
bool IsDeferredCheck) {
|
2016-06-22 16:56:33 +02:00
|
|
|
using namespace Intrinsic;
|
|
|
|
|
|
|
|
// If we ran out of descriptors, there are too many arguments.
|
|
|
|
if (Infos.empty()) return true;
|
2019-06-13 10:19:33 +02:00
|
|
|
|
|
|
|
// Do this before slicing off the 'front' part
|
|
|
|
auto InfosRef = Infos;
|
|
|
|
auto DeferCheck = [&DeferredChecks, &InfosRef](Type *T) {
|
|
|
|
DeferredChecks.emplace_back(T, InfosRef);
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2016-06-22 16:56:33 +02:00
|
|
|
IITDescriptor D = Infos.front();
|
|
|
|
Infos = Infos.slice(1);
|
|
|
|
|
|
|
|
switch (D.Kind) {
|
|
|
|
case IITDescriptor::Void: return !Ty->isVoidTy();
|
|
|
|
case IITDescriptor::VarArg: return true;
|
|
|
|
case IITDescriptor::MMX: return !Ty->isX86_MMXTy();
|
2020-11-20 08:19:34 +01:00
|
|
|
case IITDescriptor::AMX: return !Ty->isX86_AMXTy();
|
2016-06-22 16:56:33 +02:00
|
|
|
case IITDescriptor::Token: return !Ty->isTokenTy();
|
|
|
|
case IITDescriptor::Metadata: return !Ty->isMetadataTy();
|
|
|
|
case IITDescriptor::Half: return !Ty->isHalfTy();
|
[IR][BFloat] add BFloat IR intrinsics support
Summary:
This patch is part of a series that adds support for the Bfloat16 extension of
the Armv8.6-a architecture, as detailed here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
Reviewers: scanon, fpetrogalli, sdesmalen, craig.topper, LukeGeeson
Reviewed By: fpetrogalli
Subscribers: LukeGeeson, pbarrio, kristof.beyls, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79707
2020-05-27 15:00:33 +02:00
|
|
|
case IITDescriptor::BFloat: return !Ty->isBFloatTy();
|
2016-06-22 16:56:33 +02:00
|
|
|
case IITDescriptor::Float: return !Ty->isFloatTy();
|
|
|
|
case IITDescriptor::Double: return !Ty->isDoubleTy();
|
2018-07-09 20:50:06 +02:00
|
|
|
case IITDescriptor::Quad: return !Ty->isFP128Ty();
|
2016-06-22 16:56:33 +02:00
|
|
|
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
|
|
|
|
case IITDescriptor::Vector: {
|
2020-05-15 15:31:13 +02:00
|
|
|
VectorType *VT = dyn_cast<VectorType>(Ty);
|
|
|
|
return !VT || VT->getElementCount() != D.Vector_Width ||
|
2019-06-13 10:19:33 +02:00
|
|
|
matchIntrinsicType(VT->getElementType(), Infos, ArgTys,
|
|
|
|
DeferredChecks, IsDeferredCheck);
|
2016-06-22 16:56:33 +02:00
|
|
|
}
|
|
|
|
case IITDescriptor::Pointer: {
|
|
|
|
PointerType *PT = dyn_cast<PointerType>(Ty);
|
2021-06-29 23:30:27 +02:00
|
|
|
if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
|
|
|
|
return true;
|
|
|
|
if (!PT->isOpaque())
|
|
|
|
return matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
|
|
|
|
DeferredChecks, IsDeferredCheck);
|
|
|
|
// If typed pointers are supported, do not allow using opaque pointer in
|
|
|
|
// place of fixed pointer type. This would make the intrinsic signature
|
|
|
|
// non-unique.
|
|
|
|
if (Ty->getContext().supportsTypedPointers())
|
|
|
|
return true;
|
|
|
|
// Consume IIT descriptors relating to the pointer element type.
|
|
|
|
while (Infos.front().Kind == IITDescriptor::Pointer)
|
|
|
|
Infos = Infos.slice(1);
|
|
|
|
Infos = Infos.slice(1);
|
|
|
|
return false;
|
2016-06-22 16:56:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
case IITDescriptor::Struct: {
|
|
|
|
StructType *ST = dyn_cast<StructType>(Ty);
|
|
|
|
if (!ST || ST->getNumElements() != D.Struct_NumElements)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
|
2019-06-13 10:19:33 +02:00
|
|
|
if (matchIntrinsicType(ST->getElementType(i), Infos, ArgTys,
|
|
|
|
DeferredChecks, IsDeferredCheck))
|
2016-06-22 16:56:33 +02:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IITDescriptor::Argument:
|
2019-06-13 10:19:33 +02:00
|
|
|
// If this is the second occurrence of an argument,
|
|
|
|
// verify that the later instance matches the previous instance.
|
2016-06-22 16:56:33 +02:00
|
|
|
if (D.getArgumentNumber() < ArgTys.size())
|
|
|
|
return Ty != ArgTys[D.getArgumentNumber()];
|
|
|
|
|
2019-06-13 10:19:33 +02:00
|
|
|
if (D.getArgumentNumber() > ArgTys.size() ||
|
|
|
|
D.getArgumentKind() == IITDescriptor::AK_MatchType)
|
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
|
|
|
|
|
|
|
assert(D.getArgumentNumber() == ArgTys.size() && !IsDeferredCheck &&
|
|
|
|
"Table consistency error");
|
2019-01-23 17:01:19 +01:00
|
|
|
ArgTys.push_back(Ty);
|
|
|
|
|
|
|
|
switch (D.getArgumentKind()) {
|
|
|
|
case IITDescriptor::AK_Any: return false; // Success
|
|
|
|
case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
|
|
|
|
case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy();
|
|
|
|
case IITDescriptor::AK_AnyVector: return !isa<VectorType>(Ty);
|
|
|
|
case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
|
2019-06-13 10:19:33 +02:00
|
|
|
default: break;
|
2019-01-23 17:01:19 +01:00
|
|
|
}
|
|
|
|
llvm_unreachable("all argument kinds not covered");
|
2016-06-22 16:56:33 +02:00
|
|
|
|
|
|
|
case IITDescriptor::ExtendArgument: {
|
2019-06-13 10:19:33 +02:00
|
|
|
// If this is a forward reference, defer the check for later.
|
2016-06-22 16:56:33 +02:00
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
2019-06-13 10:19:33 +02:00
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
2016-06-22 16:56:33 +02:00
|
|
|
|
|
|
|
Type *NewTy = ArgTys[D.getArgumentNumber()];
|
|
|
|
if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
|
|
|
|
NewTy = VectorType::getExtendedElementVectorType(VTy);
|
|
|
|
else if (IntegerType *ITy = dyn_cast<IntegerType>(NewTy))
|
|
|
|
NewTy = IntegerType::get(ITy->getContext(), 2 * ITy->getBitWidth());
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return Ty != NewTy;
|
|
|
|
}
|
|
|
|
case IITDescriptor::TruncArgument: {
|
2019-06-13 10:19:33 +02:00
|
|
|
// If this is a forward reference, defer the check for later.
|
2016-06-22 16:56:33 +02:00
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
2019-06-13 10:19:33 +02:00
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
2016-06-22 16:56:33 +02:00
|
|
|
|
|
|
|
Type *NewTy = ArgTys[D.getArgumentNumber()];
|
|
|
|
if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
|
|
|
|
NewTy = VectorType::getTruncatedElementVectorType(VTy);
|
|
|
|
else if (IntegerType *ITy = dyn_cast<IntegerType>(NewTy))
|
|
|
|
NewTy = IntegerType::get(ITy->getContext(), ITy->getBitWidth() / 2);
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return Ty != NewTy;
|
|
|
|
}
|
|
|
|
case IITDescriptor::HalfVecArgument:
|
2019-06-13 10:19:33 +02:00
|
|
|
// If this is a forward reference, defer the check for later.
|
2019-09-30 19:10:21 +02:00
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
|
|
|
return !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
|
2016-06-22 16:56:33 +02:00
|
|
|
VectorType::getHalfElementsVectorType(
|
|
|
|
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
|
|
|
|
case IITDescriptor::SameVecWidthArgument: {
|
2019-06-13 10:19:33 +02:00
|
|
|
if (D.getArgumentNumber() >= ArgTys.size()) {
|
|
|
|
// Defer check and subsequent check for the vector element type.
|
|
|
|
Infos = Infos.slice(1);
|
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
|
|
|
}
|
2019-01-23 17:00:22 +01:00
|
|
|
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
|
|
|
|
auto *ThisArgType = dyn_cast<VectorType>(Ty);
|
|
|
|
// Both must be vectors of the same number of elements or neither.
|
|
|
|
if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
|
2016-06-22 16:56:33 +02:00
|
|
|
return true;
|
2019-01-23 17:00:22 +01:00
|
|
|
Type *EltTy = Ty;
|
|
|
|
if (ThisArgType) {
|
2019-08-27 14:57:09 +02:00
|
|
|
if (ReferenceType->getElementCount() !=
|
|
|
|
ThisArgType->getElementCount())
|
2019-01-23 17:00:22 +01:00
|
|
|
return true;
|
2020-04-10 22:59:26 +02:00
|
|
|
EltTy = ThisArgType->getElementType();
|
2019-01-23 17:00:22 +01:00
|
|
|
}
|
2019-06-13 10:19:33 +02:00
|
|
|
return matchIntrinsicType(EltTy, Infos, ArgTys, DeferredChecks,
|
|
|
|
IsDeferredCheck);
|
2016-06-22 16:56:33 +02:00
|
|
|
}
|
|
|
|
case IITDescriptor::PtrToArgument: {
|
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
2019-06-13 10:19:33 +02:00
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
2016-06-22 16:56:33 +02:00
|
|
|
Type * ReferenceType = ArgTys[D.getArgumentNumber()];
|
|
|
|
PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
|
|
|
|
return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
|
|
|
|
}
|
2016-11-03 04:23:55 +01:00
|
|
|
case IITDescriptor::PtrToElt: {
|
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
2019-06-13 10:19:33 +02:00
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
2016-11-03 04:23:55 +01:00
|
|
|
VectorType * ReferenceType =
|
|
|
|
dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
|
|
|
|
PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
|
|
|
|
|
2021-06-29 23:30:27 +02:00
|
|
|
if (!ThisArgType || !ReferenceType)
|
|
|
|
return true;
|
|
|
|
if (!ThisArgType->isOpaque())
|
|
|
|
return ThisArgType->getElementType() != ReferenceType->getElementType();
|
|
|
|
// If typed pointers are supported, do not allow opaque pointer to ensure
|
|
|
|
// uniqueness.
|
|
|
|
return Ty->getContext().supportsTypedPointers();
|
2016-11-03 04:23:55 +01:00
|
|
|
}
|
2017-05-03 14:28:54 +02:00
|
|
|
case IITDescriptor::VecOfAnyPtrsToElt: {
|
|
|
|
unsigned RefArgNumber = D.getRefArgNumber();
|
2019-06-13 10:19:33 +02:00
|
|
|
if (RefArgNumber >= ArgTys.size()) {
|
|
|
|
if (IsDeferredCheck)
|
|
|
|
return true;
|
|
|
|
// If forward referencing, already add the pointer-vector type and
|
|
|
|
// defer the checks for later.
|
|
|
|
ArgTys.push_back(Ty);
|
|
|
|
return DeferCheck(Ty);
|
|
|
|
}
|
2017-05-03 14:28:54 +02:00
|
|
|
|
2019-06-13 10:19:33 +02:00
|
|
|
if (!IsDeferredCheck){
|
|
|
|
assert(D.getOverloadArgNumber() == ArgTys.size() &&
|
|
|
|
"Table consistency error");
|
|
|
|
ArgTys.push_back(Ty);
|
|
|
|
}
|
2017-05-03 14:28:54 +02:00
|
|
|
|
|
|
|
// Verify the overloaded type "matches" the Ref type.
|
|
|
|
// i.e. Ty is a vector with the same width as Ref.
|
|
|
|
// Composed of pointers to the same element type as Ref.
|
2020-08-27 19:39:18 +02:00
|
|
|
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
|
|
|
|
auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
|
2016-06-22 16:56:33 +02:00
|
|
|
if (!ThisArgVecTy || !ReferenceType ||
|
2020-09-08 10:08:59 +02:00
|
|
|
(ReferenceType->getElementCount() != ThisArgVecTy->getElementCount()))
|
2016-06-22 16:56:33 +02:00
|
|
|
return true;
|
|
|
|
PointerType *ThisArgEltTy =
|
2020-04-10 22:59:26 +02:00
|
|
|
dyn_cast<PointerType>(ThisArgVecTy->getElementType());
|
2016-06-22 16:56:33 +02:00
|
|
|
if (!ThisArgEltTy)
|
|
|
|
return true;
|
2021-07-01 20:32:10 +02:00
|
|
|
return !ThisArgEltTy->isOpaqueOrPointeeTypeMatches(
|
|
|
|
ReferenceType->getElementType());
|
2016-06-22 16:56:33 +02:00
|
|
|
}
|
2019-06-13 11:37:38 +02:00
|
|
|
case IITDescriptor::VecElementArgument: {
|
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
|
|
|
return IsDeferredCheck ? true : DeferCheck(Ty);
|
|
|
|
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
|
|
|
|
return !ReferenceType || Ty != ReferenceType->getElementType();
|
|
|
|
}
|
2019-09-20 11:48:21 +02:00
|
|
|
case IITDescriptor::Subdivide2Argument:
|
|
|
|
case IITDescriptor::Subdivide4Argument: {
|
|
|
|
// If this is a forward reference, defer the check for later.
|
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
|
|
|
|
|
|
|
Type *NewTy = ArgTys[D.getArgumentNumber()];
|
|
|
|
if (auto *VTy = dyn_cast<VectorType>(NewTy)) {
|
|
|
|
int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2;
|
|
|
|
NewTy = VectorType::getSubdividedVectorType(VTy, SubDivs);
|
|
|
|
return Ty != NewTy;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2019-10-02 11:25:02 +02:00
|
|
|
case IITDescriptor::VecOfBitcastsToInt: {
|
|
|
|
if (D.getArgumentNumber() >= ArgTys.size())
|
|
|
|
return IsDeferredCheck || DeferCheck(Ty);
|
|
|
|
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
|
|
|
|
auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
|
|
|
|
if (!ThisArgVecTy || !ReferenceType)
|
|
|
|
return true;
|
|
|
|
return ThisArgVecTy != VectorType::getInteger(ReferenceType);
|
|
|
|
}
|
2016-06-22 16:56:33 +02:00
|
|
|
}
|
|
|
|
llvm_unreachable("unhandled");
|
|
|
|
}
|
|
|
|
|
2019-06-13 10:19:33 +02:00
|
|
|
Intrinsic::MatchIntrinsicTypesResult
|
|
|
|
Intrinsic::matchIntrinsicSignature(FunctionType *FTy,
|
|
|
|
ArrayRef<Intrinsic::IITDescriptor> &Infos,
|
|
|
|
SmallVectorImpl<Type *> &ArgTys) {
|
|
|
|
SmallVector<DeferredIntrinsicMatchPair, 2> DeferredChecks;
|
|
|
|
if (matchIntrinsicType(FTy->getReturnType(), Infos, ArgTys, DeferredChecks,
|
|
|
|
false))
|
|
|
|
return MatchIntrinsicTypes_NoMatchRet;
|
|
|
|
|
|
|
|
unsigned NumDeferredReturnChecks = DeferredChecks.size();
|
|
|
|
|
|
|
|
for (auto Ty : FTy->params())
|
|
|
|
if (matchIntrinsicType(Ty, Infos, ArgTys, DeferredChecks, false))
|
|
|
|
return MatchIntrinsicTypes_NoMatchArg;
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = DeferredChecks.size(); I != E; ++I) {
|
|
|
|
DeferredIntrinsicMatchPair &Check = DeferredChecks[I];
|
|
|
|
if (matchIntrinsicType(Check.first, Check.second, ArgTys, DeferredChecks,
|
|
|
|
true))
|
|
|
|
return I < NumDeferredReturnChecks ? MatchIntrinsicTypes_NoMatchRet
|
|
|
|
: MatchIntrinsicTypes_NoMatchArg;
|
|
|
|
}
|
|
|
|
|
|
|
|
return MatchIntrinsicTypes_Match;
|
|
|
|
}
|
|
|
|
|
2016-06-24 16:47:27 +02:00
|
|
|
bool
|
|
|
|
Intrinsic::matchIntrinsicVarArg(bool isVarArg,
|
|
|
|
ArrayRef<Intrinsic::IITDescriptor> &Infos) {
|
|
|
|
// If there are no descriptors left, then it can't be a vararg.
|
|
|
|
if (Infos.empty())
|
|
|
|
return isVarArg;
|
|
|
|
|
|
|
|
// There should be only one descriptor remaining at this point.
|
|
|
|
if (Infos.size() != 1)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Check and verify the descriptor.
|
|
|
|
IITDescriptor D = Infos.front();
|
|
|
|
Infos = Infos.slice(1);
|
|
|
|
if (D.Kind == IITDescriptor::VarArg)
|
|
|
|
return !isVarArg;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-06-12 11:48:36 +02:00
|
|
|
bool Intrinsic::getIntrinsicSignature(Function *F,
|
|
|
|
SmallVectorImpl<Type *> &ArgTys) {
|
2016-06-24 17:10:29 +02:00
|
|
|
Intrinsic::ID ID = F->getIntrinsicID();
|
|
|
|
if (!ID)
|
2020-06-12 11:48:36 +02:00
|
|
|
return false;
|
2016-06-24 17:10:29 +02:00
|
|
|
|
2020-06-12 11:48:36 +02:00
|
|
|
SmallVector<Intrinsic::IITDescriptor, 8> Table;
|
|
|
|
getIntrinsicInfoTableEntries(ID, Table);
|
|
|
|
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
|
|
|
|
|
|
|
|
if (Intrinsic::matchIntrinsicSignature(F->getFunctionType(), TableRef,
|
|
|
|
ArgTys) !=
|
|
|
|
Intrinsic::MatchIntrinsicTypesResult::MatchIntrinsicTypes_Match) {
|
|
|
|
return false;
|
2016-06-24 17:10:29 +02:00
|
|
|
}
|
2020-06-12 11:48:36 +02:00
|
|
|
if (Intrinsic::matchIntrinsicVarArg(F->getFunctionType()->isVarArg(),
|
|
|
|
TableRef))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
2016-06-24 17:10:29 +02:00
|
|
|
|
2020-06-12 11:48:36 +02:00
|
|
|
Optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
|
|
|
|
SmallVector<Type *, 4> ArgTys;
|
|
|
|
if (!getIntrinsicSignature(F, ArgTys))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
Intrinsic::ID ID = F->getIntrinsicID();
|
2016-06-24 17:10:29 +02:00
|
|
|
StringRef Name = F->getName();
|
2021-07-13 10:37:45 +02:00
|
|
|
std::string WantedName =
|
|
|
|
Intrinsic::getName(ID, ArgTys, F->getParent(), F->getFunctionType());
|
|
|
|
if (Name == WantedName)
|
2016-06-24 17:10:29 +02:00
|
|
|
return None;
|
|
|
|
|
2021-07-13 10:37:45 +02:00
|
|
|
Function *NewDecl = [&] {
|
|
|
|
if (auto *ExistingGV = F->getParent()->getNamedValue(WantedName)) {
|
|
|
|
if (auto *ExistingF = dyn_cast<Function>(ExistingGV))
|
|
|
|
if (ExistingF->getFunctionType() == F->getFunctionType())
|
|
|
|
return ExistingF;
|
|
|
|
|
|
|
|
// The name already exists, but is not a function or has the wrong
|
|
|
|
// prototype. Make place for the new one by renaming the old version.
|
|
|
|
// Either this old version will be removed later on or the module is
|
|
|
|
// invalid and we'll get an error.
|
|
|
|
ExistingGV->setName(WantedName + ".renamed");
|
|
|
|
}
|
|
|
|
return Intrinsic::getDeclaration(F->getParent(), ID, ArgTys);
|
|
|
|
}();
|
|
|
|
|
2016-06-24 17:10:29 +02:00
|
|
|
NewDecl->setCallingConv(F->getCallingConv());
|
2020-06-29 20:39:49 +02:00
|
|
|
assert(NewDecl->getFunctionType() == F->getFunctionType() &&
|
|
|
|
"Shouldn't change the signature");
|
2016-06-24 17:10:29 +02:00
|
|
|
return NewDecl;
|
|
|
|
}
|
|
|
|
|
2010-03-23 15:40:20 +01:00
|
|
|
/// hasAddressTaken - returns true if there are any uses of this function
|
2020-07-08 07:43:24 +02:00
|
|
|
/// other than direct calls or invokes to it. Optionally ignores callback
|
2021-02-05 00:35:43 +01:00
|
|
|
/// uses, assume like pointer annotation calls, and references in llvm.used
|
|
|
|
/// and llvm.compiler.used variables.
|
2020-07-08 07:43:24 +02:00
|
|
|
bool Function::hasAddressTaken(const User **PutOffender,
|
2021-02-04 23:38:40 +01:00
|
|
|
bool IgnoreCallbackUses,
|
2021-02-05 00:35:43 +01:00
|
|
|
bool IgnoreAssumeLikeCalls,
|
|
|
|
bool IgnoreLLVMUsed) const {
|
2014-03-09 04:16:01 +01:00
|
|
|
for (const Use &U : uses()) {
|
|
|
|
const User *FU = U.getUser();
|
|
|
|
if (isa<BlockAddress>(FU))
|
2012-05-12 10:30:16 +02:00
|
|
|
continue;
|
2020-07-08 07:43:24 +02:00
|
|
|
|
|
|
|
if (IgnoreCallbackUses) {
|
|
|
|
AbstractCallSite ACS(&U);
|
|
|
|
if (ACS && ACS.isCallbackCall())
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-01-07 08:31:49 +01:00
|
|
|
const auto *Call = dyn_cast<CallBase>(FU);
|
|
|
|
if (!Call) {
|
2021-02-04 23:38:40 +01:00
|
|
|
if (IgnoreAssumeLikeCalls) {
|
|
|
|
if (const auto *FI = dyn_cast<Instruction>(FU)) {
|
|
|
|
if (FI->isCast() && !FI->user_empty() &&
|
|
|
|
llvm::all_of(FU->users(), [](const User *U) {
|
|
|
|
if (const auto *I = dyn_cast<IntrinsicInst>(U))
|
|
|
|
return I->isAssumeLikeIntrinsic();
|
|
|
|
return false;
|
|
|
|
}))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2021-02-05 00:35:43 +01:00
|
|
|
if (IgnoreLLVMUsed && !FU->user_empty()) {
|
|
|
|
const User *FUU = FU;
|
|
|
|
if (isa<BitCastOperator>(FU) && FU->hasOneUse() &&
|
|
|
|
!FU->user_begin()->user_empty())
|
|
|
|
FUU = *FU->user_begin();
|
|
|
|
if (llvm::all_of(FUU->users(), [](const User *U) {
|
|
|
|
if (const auto *GV = dyn_cast<GlobalVariable>(U))
|
|
|
|
return GV->hasName() &&
|
|
|
|
(GV->getName().equals("llvm.compiler.used") ||
|
|
|
|
GV->getName().equals("llvm.used"));
|
|
|
|
return false;
|
|
|
|
}))
|
|
|
|
continue;
|
|
|
|
}
|
2016-02-18 23:09:30 +01:00
|
|
|
if (PutOffender)
|
|
|
|
*PutOffender = FU;
|
|
|
|
return true;
|
|
|
|
}
|
2019-01-07 08:31:49 +01:00
|
|
|
if (!Call->isCallee(&U)) {
|
2016-02-18 23:09:30 +01:00
|
|
|
if (PutOffender)
|
|
|
|
*PutOffender = FU;
|
|
|
|
return true;
|
|
|
|
}
|
2009-06-10 10:41:11 +02:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-10-20 07:23:42 +02:00
|
|
|
bool Function::isDefTriviallyDead() const {
|
|
|
|
// Check the linkage
|
|
|
|
if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
|
|
|
|
!hasAvailableExternallyLinkage())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if the function is used by anything other than a blockaddress.
|
2014-03-09 04:16:01 +01:00
|
|
|
for (const User *U : users())
|
|
|
|
if (!isa<BlockAddress>(U))
|
2011-10-20 07:23:42 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-10-17 20:43:40 +02:00
|
|
|
/// callsFunctionThatReturnsTwice - Return true if the function has a call to
|
|
|
|
/// setjmp or other function that gcc recognizes as "returning twice".
|
|
|
|
bool Function::callsFunctionThatReturnsTwice() const {
|
2019-01-07 08:31:49 +01:00
|
|
|
for (const Instruction &I : instructions(this))
|
|
|
|
if (const auto *Call = dyn_cast<CallBase>(&I))
|
|
|
|
if (Call->hasFnAttr(Attribute::ReturnsTwice))
|
|
|
|
return true;
|
2011-10-17 20:43:40 +02:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2013-09-16 03:08:15 +02:00
|
|
|
|
2015-12-19 09:52:49 +01:00
|
|
|
Constant *Function::getPersonalityFn() const {
|
|
|
|
assert(hasPersonalityFn() && getNumOperands());
|
|
|
|
return cast<Constant>(Op<0>());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::setPersonalityFn(Constant *Fn) {
|
2015-12-23 19:27:23 +01:00
|
|
|
setHungoffOperand<0>(Fn);
|
2015-12-19 09:52:49 +01:00
|
|
|
setValueSubclassDataBit(3, Fn != nullptr);
|
2015-10-06 22:31:57 +02:00
|
|
|
}
|
|
|
|
|
2013-09-16 03:08:15 +02:00
|
|
|
Constant *Function::getPrefixData() const {
|
2015-12-19 09:52:49 +01:00
|
|
|
assert(hasPrefixData() && getNumOperands());
|
|
|
|
return cast<Constant>(Op<1>());
|
2013-09-16 03:08:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void Function::setPrefixData(Constant *PrefixData) {
|
2015-12-23 19:27:23 +01:00
|
|
|
setHungoffOperand<1>(PrefixData);
|
2015-12-19 09:52:49 +01:00
|
|
|
setValueSubclassDataBit(1, PrefixData != nullptr);
|
2013-09-16 03:08:15 +02:00
|
|
|
}
|
2014-12-03 03:08:38 +01:00
|
|
|
|
|
|
|
Constant *Function::getPrologueData() const {
|
2015-12-19 09:52:49 +01:00
|
|
|
assert(hasPrologueData() && getNumOperands());
|
|
|
|
return cast<Constant>(Op<2>());
|
2014-12-03 03:08:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Function::setPrologueData(Constant *PrologueData) {
|
2015-12-23 19:27:23 +01:00
|
|
|
setHungoffOperand<2>(PrologueData);
|
2015-12-19 09:52:49 +01:00
|
|
|
setValueSubclassDataBit(2, PrologueData != nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Function::allocHungoffUselist() {
|
|
|
|
// If we've already allocated a uselist, stop here.
|
|
|
|
if (getNumOperands())
|
2014-12-03 03:08:38 +01:00
|
|
|
return;
|
|
|
|
|
2015-12-19 09:52:49 +01:00
|
|
|
allocHungoffUses(3, /*IsPhi=*/ false);
|
|
|
|
setNumHungOffUseOperands(3);
|
|
|
|
|
|
|
|
// Initialize the uselist with placeholder operands to allow traversal.
|
|
|
|
auto *CPN = ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0));
|
|
|
|
Op<0>().set(CPN);
|
|
|
|
Op<1>().set(CPN);
|
|
|
|
Op<2>().set(CPN);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <int Idx>
|
|
|
|
void Function::setHungoffOperand(Constant *C) {
|
2015-12-23 19:27:23 +01:00
|
|
|
if (C) {
|
|
|
|
allocHungoffUselist();
|
|
|
|
Op<Idx>().set(C);
|
|
|
|
} else if (getNumOperands()) {
|
|
|
|
Op<Idx>().set(
|
|
|
|
ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0)));
|
|
|
|
}
|
2015-12-19 09:52:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
|
|
|
|
assert(Bit < 16 && "SubclassData contains only 16 bits");
|
|
|
|
if (On)
|
|
|
|
setValueSubclassData(getSubclassDataFromValue() | (1 << Bit));
|
|
|
|
else
|
|
|
|
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
|
2014-12-03 03:08:38 +01:00
|
|
|
}
|
2015-05-07 01:54:14 +02:00
|
|
|
|
2018-01-17 23:24:23 +01:00
|
|
|
void Function::setEntryCount(ProfileCount Count,
|
2017-02-28 19:09:44 +01:00
|
|
|
const DenseSet<GlobalValue::GUID> *S) {
|
2018-01-17 23:24:23 +01:00
|
|
|
assert(Count.hasValue());
|
|
|
|
#if !defined(NDEBUG)
|
|
|
|
auto PrevCount = getEntryCount();
|
|
|
|
assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType());
|
|
|
|
#endif
|
2019-11-01 20:57:23 +01:00
|
|
|
|
|
|
|
auto ImportGUIDs = getImportGUIDs();
|
|
|
|
if (S == nullptr && ImportGUIDs.size())
|
|
|
|
S = &ImportGUIDs;
|
|
|
|
|
2015-05-13 17:13:45 +02:00
|
|
|
MDBuilder MDB(getContext());
|
2018-01-17 23:24:23 +01:00
|
|
|
setMetadata(
|
|
|
|
LLVMContext::MD_prof,
|
|
|
|
MDB.createFunctionEntryCount(Count.getCount(), Count.isSynthetic(), S));
|
2015-05-13 17:13:45 +02:00
|
|
|
}
|
|
|
|
|
2018-01-17 23:24:23 +01:00
|
|
|
void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type,
|
|
|
|
const DenseSet<GlobalValue::GUID> *Imports) {
|
|
|
|
setEntryCount(ProfileCount(Count, Type), Imports);
|
|
|
|
}
|
|
|
|
|
2019-04-24 21:51:16 +02:00
|
|
|
ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
|
2015-05-13 17:13:45 +02:00
|
|
|
MDNode *MD = getMetadata(LLVMContext::MD_prof);
|
|
|
|
if (MD && MD->getOperand(0))
|
2018-01-17 23:24:23 +01:00
|
|
|
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
|
2015-05-13 17:13:45 +02:00
|
|
|
if (MDS->getString().equals("function_entry_count")) {
|
|
|
|
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
|
2016-08-05 20:38:19 +02:00
|
|
|
uint64_t Count = CI->getValue().getZExtValue();
|
2017-12-18 21:02:43 +01:00
|
|
|
// A value of -1 is used for SamplePGO when there were no samples.
|
|
|
|
// Treat this the same as unknown.
|
|
|
|
if (Count == (uint64_t)-1)
|
2018-01-17 23:24:23 +01:00
|
|
|
return ProfileCount::getInvalid();
|
|
|
|
return ProfileCount(Count, PCT_Real);
|
2019-04-24 21:51:16 +02:00
|
|
|
} else if (AllowSynthetic &&
|
|
|
|
MDS->getString().equals("synthetic_function_entry_count")) {
|
2018-01-17 23:24:23 +01:00
|
|
|
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
|
|
|
|
uint64_t Count = CI->getValue().getZExtValue();
|
|
|
|
return ProfileCount(Count, PCT_Synthetic);
|
2015-05-13 17:13:45 +02:00
|
|
|
}
|
2018-01-17 23:24:23 +01:00
|
|
|
}
|
|
|
|
return ProfileCount::getInvalid();
|
2015-05-13 17:13:45 +02:00
|
|
|
}
|
2016-10-18 22:42:47 +02:00
|
|
|
|
2017-02-28 19:09:44 +01:00
|
|
|
DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
|
|
|
|
DenseSet<GlobalValue::GUID> R;
|
|
|
|
if (MDNode *MD = getMetadata(LLVMContext::MD_prof))
|
|
|
|
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
|
|
|
|
if (MDS->getString().equals("function_entry_count"))
|
|
|
|
for (unsigned i = 2; i < MD->getNumOperands(); i++)
|
|
|
|
R.insert(mdconst::extract<ConstantInt>(MD->getOperand(i))
|
|
|
|
->getValue()
|
|
|
|
.getZExtValue());
|
|
|
|
return R;
|
|
|
|
}
|
|
|
|
|
2016-10-18 22:42:47 +02:00
|
|
|
void Function::setSectionPrefix(StringRef Prefix) {
|
|
|
|
MDBuilder MDB(getContext());
|
|
|
|
setMetadata(LLVMContext::MD_section_prefix,
|
|
|
|
MDB.createFunctionSectionPrefix(Prefix));
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional<StringRef> Function::getSectionPrefix() const {
|
|
|
|
if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
|
2018-05-05 03:57:00 +02:00
|
|
|
assert(cast<MDString>(MD->getOperand(0))
|
2016-10-18 22:42:47 +02:00
|
|
|
->getString()
|
|
|
|
.equals("function_section_prefix") &&
|
|
|
|
"Metadata not match");
|
2018-05-05 03:57:00 +02:00
|
|
|
return cast<MDString>(MD->getOperand(1))->getString();
|
2016-10-18 22:42:47 +02:00
|
|
|
}
|
|
|
|
return None;
|
|
|
|
}
|
llvm: Add support for "-fno-delete-null-pointer-checks"
Summary:
Support for this option is needed for building Linux kernel.
This is a very frequently requested feature by kernel developers.
More details : https://lkml.org/lkml/2018/4/4/601
GCC option description for -fdelete-null-pointer-checks:
This Assume that programs cannot safely dereference null pointers,
and that no code or data element resides at address zero.
-fno-delete-null-pointer-checks is the inverse of this implying that
null pointer dereferencing is not undefined.
This feature is implemented in LLVM IR in this CL as the function attribute
"null-pointer-is-valid"="true" in IR (Under review at D47894).
The CL updates several passes that assumed null pointer dereferencing is
undefined to not optimize when the "null-pointer-is-valid"="true"
attribute is present.
Reviewers: t.p.northover, efriedma, jyknight, chandlerc, rnk, srhines, void, george.burgess.iv
Reviewed By: efriedma, george.burgess.iv
Subscribers: eraman, haicheng, george.burgess.iv, drinkcat, theraven, reames, sanjoy, xbolva00, llvm-commits
Differential Revision: https://reviews.llvm.org/D47895
llvm-svn: 336613
2018-07-10 00:27:23 +02:00
|
|
|
|
|
|
|
bool Function::nullPointerIsDefined() const {
|
2020-04-25 12:57:07 +02:00
|
|
|
return hasFnAttribute(Attribute::NullPointerIsValid);
|
llvm: Add support for "-fno-delete-null-pointer-checks"
Summary:
Support for this option is needed for building Linux kernel.
This is a very frequently requested feature by kernel developers.
More details : https://lkml.org/lkml/2018/4/4/601
GCC option description for -fdelete-null-pointer-checks:
This Assume that programs cannot safely dereference null pointers,
and that no code or data element resides at address zero.
-fno-delete-null-pointer-checks is the inverse of this implying that
null pointer dereferencing is not undefined.
This feature is implemented in LLVM IR in this CL as the function attribute
"null-pointer-is-valid"="true" in IR (Under review at D47894).
The CL updates several passes that assumed null pointer dereferencing is
undefined to not optimize when the "null-pointer-is-valid"="true"
attribute is present.
Reviewers: t.p.northover, efriedma, jyknight, chandlerc, rnk, srhines, void, george.burgess.iv
Reviewed By: efriedma, george.burgess.iv
Subscribers: eraman, haicheng, george.burgess.iv, drinkcat, theraven, reames, sanjoy, xbolva00, llvm-commits
Differential Revision: https://reviews.llvm.org/D47895
llvm-svn: 336613
2018-07-10 00:27:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) {
|
|
|
|
if (F && F->nullPointerIsDefined())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (AS != 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|