diff --git a/docs/LangRef.rst b/docs/LangRef.rst index e8c2506c639..44efc149806 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -10339,6 +10339,8 @@ copies "len" bytes of memory over. If the argument is known to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memset: + '``llvm.memset.*``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -14258,3 +14260,76 @@ In the most general case call to the actual element size. The optimizer is allowed to inline the memory copy when it's profitable to do so. + +.. _int_memset_element_unordered_atomic: + +'``llvm.memset.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on +any integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* , + i8 , + i32 , + i32 ) + declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* , + i8 , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array +with elements that are exactly ``element_size`` bytes, and the assignment to that array +uses uses a sequence of :ref:`unordered atomic ` store operations +that are a positive integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the :ref:`@llvm.memset ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. + +The ``dest`` input pointer must have the ``align`` parameter attribute specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +the destination pointer is aligned to that boundary. + +Semantics: +"""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of +memory starting at the destination location to the given ``value``. The memory is +set with a sequence of store operations where each access is guaranteed to be a +multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. + +The order of the assignment is unspecified. Only one write is issued to the +destination buffer per element. It is well defined to have concurrent reads and +writes to the destination provided those reads and writes are unordered atomic +when specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered stores to the destination. + +Lowering: +""""""""" + +In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. + +The optimizer is allowed to inline the memory assignment when it's profitable to do so. + diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index b35dded065c..08151be1108 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -346,6 +346,12 @@ namespace RTLIB { MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, + MEMSET_ELEMENT_UNORDERED_ATOMIC_1, + MEMSET_ELEMENT_UNORDERED_ATOMIC_2, + MEMSET_ELEMENT_UNORDERED_ATOMIC_4, + MEMSET_ELEMENT_UNORDERED_ATOMIC_8, + MEMSET_ELEMENT_UNORDERED_ATOMIC_16, + // EXCEPTION HANDLING UNWIND_RESUME, @@ -526,6 +532,12 @@ namespace RTLIB { /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or /// UNKNOW_LIBCALL if there is none. Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + } } diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 30177b51adc..f55d17ec72c 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -385,6 +385,86 @@ namespace llvm { } }; + /// This class represents atomic memset intrinsic + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemSetInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + Value *getValue() const { return const_cast(getArgOperand(ARG_VALUE)); } + const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } + Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This is the common base class for memset/memcpy/memmove. class MemIntrinsic : public IntrinsicInst { public: diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index eedd8333ed3..14c88e51943 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -884,6 +884,10 @@ def int_memmove_element_unordered_atomic ReadOnly<1> ]>; +// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) +def int_memset_element_unordered_atomic + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], + [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index da494a55855..28038484345 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5032,6 +5032,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3e633322f13..3914ee51471 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -394,6 +394,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { "__llvm_memmove_element_unordered_atomic_8"; Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -830,6 +840,23 @@ RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) } } +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 1627a225435..454a56a7692 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -4077,7 +4077,38 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "incorrect alignment of the destination argument", CS); Assert(IsValidAlignment(SrcAlignment), "incorrect alignment of the source argument", CS); - + break; + } + case Intrinsic::memset_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); break; } case Intrinsic::gcroot: diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 426ae65b950..2599064334e 100644 --- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -405,6 +405,12 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = { /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, + // EXCEPTION HANDLING /* UNWIND_RESUME */ unsupported, @@ -854,6 +860,11 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = { /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, /* UNWIND_RESUME */ "_Unwind_Resume", /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", diff --git a/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll b/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll index 755c70865e0..88778b317b9 100644 --- a/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll +++ b/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll @@ -124,5 +124,66 @@ define void @test_memmove_args(i8** %Storage) { call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void } +define i8* @test_memset1(i8* %P, i8 %V) { + ; CHECK: test_memset + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 1) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $1, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_1 +} + +define i8* @test_memset2(i8* %P, i8 %V) { + ; CHECK: test_memset2 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 2, i32 2) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $2, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_2 +} + +define i8* @test_memset4(i8* %P, i8 %V) { + ; CHECK: test_memset4 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $4, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_4 +} + +define i8* @test_memset8(i8* %P, i8 %V) { + ; CHECK: test_memset8 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 8) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $8, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_8 +} + +define i8* @test_memset16(i8* %P, i8 %V) { + ; CHECK: test_memset16 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 16) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $16, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_16 +} + +define void @test_memset_args(i8** %Storage, i8* %V) { + ; CHECK: test_memset_args + %Dst = load i8*, i8** %Storage + %Val = load i8, i8* %V + + ; 1st arg (%rdi) + ; CHECK-DAG: movq (%rdi), %rdi + ; 2nd arg (%rsi) + ; CHECK-DAG: movzbl (%rsi), %esi + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $4, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_4 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 4, i32 4) ret void +} + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind diff --git a/test/Verifier/element-wise-atomic-memory-intrinsics.ll b/test/Verifier/element-wise-atomic-memory-intrinsics.ll index a92b077049f..81c8ba16b97 100644 --- a/test/Verifier/element-wise-atomic-memory-intrinsics.ll +++ b/test/Verifier/element-wise-atomic-memory-intrinsics.ll @@ -46,4 +46,22 @@ define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) { } declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) { + ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3) + + ; CHECK: constant length must be a multiple of the element size in the element-wise atomic memory intrinsic + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 7, i32 4) + + ; CHECK: incorrect alignment of the destination argument + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* %P, i8 %V, i32 1, i32 1) + ; CHECK: incorrect alignment of the destination argument + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 4, i32 4) + + ret void +} +declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind + ; CHECK: input module is broken!