1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

Add element atomic memset intrinsic

Summary: Continuing the work from https://reviews.llvm.org/D33240, this change introduces an element unordered-atomic memset intrinsic. This intrinsic is essentially memset with the implementation requirement that all stores used for the assignment are done with unordered-atomic stores of a given element size.

Reviewers: eli.friedman, reames, mkazantsev, skatkov

Reviewed By: reames

Subscribers: jfb, dschuff, sbc100, jgravelle-google, aheejin, efriedma, llvm-commits

Differential Revision: https://reviews.llvm.org/D34885

llvm-svn: 307854
This commit is contained in:
Daniel Neilson 2017-07-12 21:57:23 +00:00
parent 7d0fdbaded
commit 84653da20b
10 changed files with 359 additions and 1 deletions

View File

@ -10339,6 +10339,8 @@ copies "len" bytes of memory over. If the argument is known to be
aligned to some boundary, this can be specified as the fourth argument,
otherwise it should be set to 0 or 1 (both meaning no alignment).
.. _int_memset:
'``llvm.memset.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -14258,3 +14260,76 @@ In the most general case call to the
actual element size.
The optimizer is allowed to inline the memory copy when it's profitable to do so.
.. _int_memset_element_unordered_atomic:
'``llvm.memset.element.unordered.atomic``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on
any integer bit width and for different address spaces. Not all targets
support all bit widths however.
::
declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* <dest>,
i8 <value>,
i32 <len>,
i32 <element_size>)
declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* <dest>,
i8 <value>,
i64 <len>,
i32 <element_size>)
Overview:
"""""""""
The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the
'``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array
with elements that are exactly ``element_size`` bytes, and the assignment to that array
uses uses a sequence of :ref:`unordered atomic <ordering>` store operations
that are a positive integer multiple of the ``element_size`` in size.
Arguments:
""""""""""
The first three arguments are the same as they are in the :ref:`@llvm.memset <int_memset>`
intrinsic, with the added constraint that ``len`` is required to be a positive integer
multiple of the ``element_size``. If ``len`` is not a positive integer multiple of
``element_size``, then the behaviour of the intrinsic is undefined.
``element_size`` must be a compile-time constant positive power of two no greater than
target-specific atomic access size limit.
The ``dest`` input pointer must have the ``align`` parameter attribute specified. It
must be a power of two no less than the ``element_size``. Caller guarantees that
the destination pointer is aligned to that boundary.
Semantics:
""""""""""
The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of
memory starting at the destination location to the given ``value``. The memory is
set with a sequence of store operations where each access is guaranteed to be a
multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary.
The order of the assignment is unspecified. Only one write is issued to the
destination buffer per element. It is well defined to have concurrent reads and
writes to the destination provided those reads and writes are unordered atomic
when specified.
This intrinsic does not provide any additional ordering guarantees over those
provided by a set of unordered stores to the destination.
Lowering:
"""""""""
In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is
lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*'
is replaced with an actual element size.
The optimizer is allowed to inline the memory assignment when it's profitable to do so.

View File

@ -346,6 +346,12 @@ namespace RTLIB {
MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8,
MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16,
MEMSET_ELEMENT_UNORDERED_ATOMIC_1,
MEMSET_ELEMENT_UNORDERED_ATOMIC_2,
MEMSET_ELEMENT_UNORDERED_ATOMIC_4,
MEMSET_ELEMENT_UNORDERED_ATOMIC_8,
MEMSET_ELEMENT_UNORDERED_ATOMIC_16,
// EXCEPTION HANDLING
UNWIND_RESUME,
@ -526,6 +532,12 @@ namespace RTLIB {
/// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
/// UNKNOW_LIBCALL if there is none.
Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
/// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return
/// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
/// UNKNOW_LIBCALL if there is none.
Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
}
}

View File

@ -385,6 +385,86 @@ namespace llvm {
}
};
/// This class represents atomic memset intrinsic
/// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is
/// C&P of all methods from that hierarchy
class ElementUnorderedAtomicMemSetInst : public IntrinsicInst {
private:
enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 };
public:
Value *getRawDest() const {
return const_cast<Value *>(getArgOperand(ARG_DEST));
}
const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); }
Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); }
Value *getValue() const { return const_cast<Value*>(getArgOperand(ARG_VALUE)); }
const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); }
Use &getValueUse() { return getArgOperandUse(ARG_VALUE); }
Value *getLength() const {
return const_cast<Value *>(getArgOperand(ARG_LENGTH));
}
const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); }
Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); }
bool isVolatile() const { return false; }
Value *getRawElementSizeInBytes() const {
return const_cast<Value *>(getArgOperand(ARG_ELEMENTSIZE));
}
ConstantInt *getElementSizeInBytesCst() const {
return cast<ConstantInt>(getRawElementSizeInBytes());
}
uint32_t getElementSizeInBytes() const {
return getElementSizeInBytesCst()->getZExtValue();
}
/// This is just like getRawDest, but it strips off any cast
/// instructions that feed it, giving the original input. The returned
/// value is guaranteed to be a pointer.
Value *getDest() const { return getRawDest()->stripPointerCasts(); }
unsigned getDestAddressSpace() const {
return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
}
/// Set the specified arguments of the instruction.
void setDest(Value *Ptr) {
assert(getRawDest()->getType() == Ptr->getType() &&
"setDest called with pointer of wrong type!");
setArgOperand(ARG_DEST, Ptr);
}
void setValue(Value *Val) {
assert(getValue()->getType() == Val->getType() &&
"setValue called with value of wrong type!");
setArgOperand(ARG_VALUE, Val);
}
void setLength(Value *L) {
assert(getLength()->getType() == L->getType() &&
"setLength called with value of wrong type!");
setArgOperand(ARG_LENGTH, L);
}
void setElementSizeInBytes(Constant *V) {
assert(V->getType() == Type::getInt8Ty(getContext()) &&
"setElementSizeInBytes called with value of wrong type!");
setArgOperand(ARG_ELEMENTSIZE, V);
}
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic;
}
static inline bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This is the common base class for memset/memcpy/memmove.
class MemIntrinsic : public IntrinsicInst {
public:

View File

@ -884,6 +884,10 @@ def int_memmove_element_unordered_atomic
ReadOnly<1>
]>;
// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
def int_memset_element_unordered_atomic
: Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
[ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
//===------------------------ Reduction Intrinsics ------------------------===//
//

View File

@ -5032,6 +5032,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(CallResult.second);
return nullptr;
}
case Intrinsic::memset_element_unordered_atomic: {
auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Args.push_back(Entry);
Entry.Ty = Type::getInt8Ty(*DAG.getContext());
Entry.Node = Val;
Args.push_back(Entry);
Entry.Ty = MI.getLength()->getType();
Entry.Node = Length;
Args.push_back(Entry);
uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
RTLIB::Libcall LibraryCall =
RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
TLI.getLibcallCallingConv(LibraryCall),
Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
TLI.getPointerTy(DAG.getDataLayout())),
std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
DAG.setRoot(CallResult.second);
return nullptr;
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
DILocalVariable *Variable = DI.getVariable();

View File

@ -394,6 +394,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
"__llvm_memmove_element_unordered_atomic_8";
Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
"__llvm_memmove_element_unordered_atomic_16";
Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
"__llvm_memset_element_unordered_atomic_1";
Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
"__llvm_memset_element_unordered_atomic_2";
Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
"__llvm_memset_element_unordered_atomic_4";
Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
"__llvm_memset_element_unordered_atomic_8";
Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
"__llvm_memset_element_unordered_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@ -830,6 +840,23 @@ RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize)
}
}
RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
/// InitCmpLibcallCCs - Set default comparison libcall CC.
///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {

View File

@ -4077,7 +4077,38 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"incorrect alignment of the destination argument", CS);
Assert(IsValidAlignment(SrcAlignment),
"incorrect alignment of the source argument", CS);
break;
}
case Intrinsic::memset_element_unordered_atomic: {
auto *MI = cast<ElementUnorderedAtomicMemSetInst>(CS.getInstruction());
ConstantInt *ElementSizeCI =
dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes());
Assert(ElementSizeCI,
"element size of the element-wise unordered atomic memory "
"intrinsic must be a constant int",
CS);
const APInt &ElementSizeVal = ElementSizeCI->getValue();
Assert(ElementSizeVal.isPowerOf2(),
"element size of the element-wise atomic memory intrinsic "
"must be a power of 2",
CS);
if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) {
uint64_t Length = LengthCI->getZExtValue();
uint64_t ElementSize = MI->getElementSizeInBytes();
Assert((Length % ElementSize) == 0,
"constant length must be a multiple of the element size in the "
"element-wise atomic memory intrinsic",
CS);
}
auto IsValidAlignment = [&](uint64_t Alignment) {
return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
};
uint64_t DstAlignment = CS.getParamAlignment(0);
Assert(IsValidAlignment(DstAlignment),
"incorrect alignment of the destination argument", CS);
break;
}
case Intrinsic::gcroot:

View File

@ -405,6 +405,12 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
// EXCEPTION HANDLING
/* UNWIND_RESUME */ unsupported,
@ -854,6 +860,11 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
/* UNWIND_RESUME */ "_Unwind_Resume",
/* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1",
/* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2",

View File

@ -124,5 +124,66 @@ define void @test_memmove_args(i8** %Storage) {
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void
}
define i8* @test_memset1(i8* %P, i8 %V) {
; CHECK: test_memset
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 1)
ret i8* %P
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $1, %edx
; CHECK: __llvm_memset_element_unordered_atomic_1
}
define i8* @test_memset2(i8* %P, i8 %V) {
; CHECK: test_memset2
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 2, i32 2)
ret i8* %P
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $2, %edx
; CHECK: __llvm_memset_element_unordered_atomic_2
}
define i8* @test_memset4(i8* %P, i8 %V) {
; CHECK: test_memset4
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4)
ret i8* %P
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $4, %edx
; CHECK: __llvm_memset_element_unordered_atomic_4
}
define i8* @test_memset8(i8* %P, i8 %V) {
; CHECK: test_memset8
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 8)
ret i8* %P
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $8, %edx
; CHECK: __llvm_memset_element_unordered_atomic_8
}
define i8* @test_memset16(i8* %P, i8 %V) {
; CHECK: test_memset16
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 16)
ret i8* %P
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $16, %edx
; CHECK: __llvm_memset_element_unordered_atomic_16
}
define void @test_memset_args(i8** %Storage, i8* %V) {
; CHECK: test_memset_args
%Dst = load i8*, i8** %Storage
%Val = load i8, i8* %V
; 1st arg (%rdi)
; CHECK-DAG: movq (%rdi), %rdi
; 2nd arg (%rsi)
; CHECK-DAG: movzbl (%rsi), %esi
; 3rd arg (%edx) -- length
; CHECK-DAG: movl $4, %edx
; CHECK: __llvm_memset_element_unordered_atomic_4
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 4, i32 4) ret void
}
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind

View File

@ -46,4 +46,22 @@ define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
}
declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) {
; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3)
; CHECK: constant length must be a multiple of the element size in the element-wise atomic memory intrinsic
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 7, i32 4)
; CHECK: incorrect alignment of the destination argument
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* %P, i8 %V, i32 1, i32 1)
; CHECK: incorrect alignment of the destination argument
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 4, i32 4)
ret void
}
declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind
; CHECK: input module is broken!