diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 7be5442cc7e..08991d0f8f0 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -4421,6 +4421,50 @@ the loop identifier metadata node directly: The ``llvm.bitsets`` global metadata is used to implement :doc:`bitsets `. +'``invariant.group``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions. +The existence of the ``invariant.group`` metadata on the instruction tells +the optimizer that every ``load`` and ``store`` to the same pointer operand +within the same invariant group can be assumed to load or store the same +value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects +when two pointers are considered the same). + +Examples: + +.. code-block:: llvm + + @unknownPtr = external global i8 + ... + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr + + %unknownValue = load i8, i8* @unknownPtr + store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42 + + call void @foo(i8* %ptr) + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr + + ... + declare void @foo(i8*) + declare i8* @getPointer(i8*) + declare i8* @llvm.invariant.group.barrier(i8*) + + !0 = !{!"magic ptr"} + !1 = !{!"other ptr"} + + + Module Flags Metadata ===================== @@ -6768,8 +6812,8 @@ Syntax: :: - = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !] - = load atomic [volatile] * [singlethread] , align + = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !] + = load atomic [volatile] * [singlethread] , align [, !invariant.group !] ! = !{ i32 1 } Overview: @@ -6825,6 +6869,9 @@ Being invariant does not imply that a location is dereferenceable, but it does imply that once the location is known dereferenceable its value is henceforth unchanging. +The optional ``!invariant.group`` metadata must reference a single metadata name + ```` corresponding to a metadata node. See ``invariant.group`` metadata. + The optional ``!nonnull`` metadata must reference a single metadata name ```` corresponding to a metadata node with no entries. The existence of the ``!nonnull`` metadata on the @@ -6882,8 +6929,8 @@ Syntax: :: - store [volatile] , * [, align ][, !nontemporal !] ; yields void - store atomic [volatile] , * [singlethread] , align ; yields void + store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void + store atomic [volatile] , * [singlethread] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -6930,6 +6977,9 @@ be reused in the cache. The code generator may select special instructions to save cache bandwidth, such as the MOVNT instruction on x86. +The optional ``!invariant.group`` metadata must reference a +single metadata name ````. See ``invariant.group`` metadata. + Semantics: """""""""" @@ -11465,6 +11515,36 @@ Semantics: This intrinsic indicates that the memory is mutable again. +'``llvm.invariant.group.barrier``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.invariant.group.barrier(i8* ) + +Overview: +""""""""" + +The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant +established by invariant.group metadata no longer holds, to obtain a new pointer +value that does not carry the invariant information. + + +Arguments: +"""""""""" + +The ``llvm.invariant.group.barrier`` takes only one argument, which is +the pointer to the memory for which the ``invariant.group`` no longer holds. + +Semantics: +"""""""""" + +Returns another pointer that aliases its argument but which is considered different +for the purposes of ``load``/``store`` ``invariant.group`` metadata. + General Intrinsics ------------------ diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 8795addd1a5..d46514614e4 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" @@ -1634,6 +1635,32 @@ public: Name); } + /// \brief Create an invariant.group.barrier intrinsic call, that stops + /// optimizer to propagate equality using invariant.group metadata. + /// If Ptr type is different from i8*, it's casted to i8* before call + /// and casted back to Ptr type after call. + Value *CreateInvariantGroupBarrier(Value *Ptr) { + Module *M = BB->getParent()->getParent(); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, + Intrinsic::invariant_group_barrier); + + Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); + assert(ArgumentAndReturnType == + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && + "InvariantGroupBarrier should take and return the same type"); + Type *PtrType = Ptr->getType(); + + bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; + if (PtrTypeConversionNeeded) + Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + + CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); + + if (PtrTypeConversionNeeded) + return CreateBitCast(Fn, PtrType); + return Fn; + } + /// \brief Return a vector value that contains \arg V broadcasted to \p /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 8c74b1b12e7..acee3b2897f 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -319,7 +319,7 @@ def int_instrprof_increment : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - + //===------------------- Standard C Library Intrinsics --------------------===// // @@ -530,6 +530,10 @@ def int_invariant_end : Intrinsic<[], llvm_ptr_ty], [IntrReadWriteArgMem, NoCapture<2>]>; +def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty], + [IntrNoMem]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : Intrinsic<[], diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 9ed15fb7753..d0c8fb95d48 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -187,6 +187,7 @@ class TypePromotionTransaction; unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); + void stripInvariantGroupMetadata(Instruction &I); }; } @@ -1411,6 +1412,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; } if (TLI) { @@ -4421,6 +4426,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast(I)) { + stripInvariantGroupMetadata(*LI); if (TLI) { unsigned AS = LI->getPointerAddressSpace(); return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); @@ -4429,6 +4435,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { } if (StoreInst *SI = dyn_cast(I)) { + stripInvariantGroupMetadata(*SI); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); return OptimizeMemoryInst(I, SI->getOperand(1), @@ -4827,3 +4834,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } return MadeChange; } + +void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { + if (auto *InvariantMD = I.getMetadata("invariant.group")) + I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); +} diff --git a/test/Transforms/CodeGenPrepare/invariant.group.ll b/test/Transforms/CodeGenPrepare/invariant.group.ll new file mode 100644 index 00000000000..e8f1e42ddcb --- /dev/null +++ b/test/Transforms/CodeGenPrepare/invariant.group.ll @@ -0,0 +1,23 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +@tmp = global i8 0 + +; CHECK-LABEL: define void @foo() { +define void @foo() { +enter: + ; CHECK-NOT: !invariant.group + ; CHECK-NOT: @llvm.invariant.group.barrier( + ; CHECK: %val = load i8, i8* @tmp + %val = load i8, i8* @tmp, !invariant.group !0 + %ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp) + + ; CHECK: store i8 42, i8* @tmp + store i8 42, i8* %ptr, !invariant.group !0 + + ret void +} +; CHECK-LABEL: } + +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{!"something"} \ No newline at end of file diff --git a/test/Transforms/GlobalOpt/invariant.group.barrier.ll b/test/Transforms/GlobalOpt/invariant.group.barrier.ll new file mode 100644 index 00000000000..54d91d40801 --- /dev/null +++ b/test/Transforms/GlobalOpt/invariant.group.barrier.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s + +; This test is hint, what could globalOpt optimize and what it can't +; FIXME: @tmp and @tmp2 can be safely set to 42 +; CHECK: @tmp = global i32 0 +; CHECK: @tmp2 = global i32 0 +; CHECK: @tmp3 = global i32 0 + +@tmp = global i32 0 +@tmp2 = global i32 0 +@tmp3 = global i32 0 +@ptrToTmp3 = global i32* null + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] + +define i32 @TheAnswerToLifeTheUniverseAndEverything() { + ret i32 42 +} + +define void @_GLOBAL__I_a() { +enter: + call void @_optimizable() + call void @_not_optimizable() + ret void +} + +define void @_optimizable() { +enter: + %valptr = alloca i32 + + %val = call i32 @TheAnswerToLifeTheUniverseAndEverything() + store i32 %val, i32* @tmp + store i32 %val, i32* %valptr + + %0 = bitcast i32* %valptr to i8* + %barr = call i8* @llvm.invariant.group.barrier(i8* %0) + %1 = bitcast i8* %barr to i32* + + %val2 = load i32, i32* %1 + store i32 %val2, i32* @tmp2 + ret void +} + +; We can't step through invariant.group.barrier here, because that would change +; this load in @usage_of_globals() +; val = load i32, i32* %ptrVal, !invariant.group !0 +; into +; %val = load i32, i32* @tmp3, !invariant.group !0 +; and then we could assume that %val and %val2 to be the same, which coud be +; false, because @changeTmp3ValAndCallBarrierInside() may change the value +; of @tmp3. +define void @_not_optimizable() { +enter: + store i32 13, i32* @tmp3, !invariant.group !0 + + %0 = bitcast i32* @tmp3 to i8* + %barr = call i8* @llvm.invariant.group.barrier(i8* %0) + %1 = bitcast i8* %barr to i32* + + store i32* %1, i32** @ptrToTmp3 + store i32 42, i32* %1, !invariant.group !0 + + ret void +} +define void @usage_of_globals() { +entry: + %ptrVal = load i32*, i32** @ptrToTmp3 + %val = load i32, i32* %ptrVal, !invariant.group !0 + + call void @changeTmp3ValAndCallBarrierInside() + %val2 = load i32, i32* @tmp3, !invariant.group !0 + ret void; +} + +declare void @changeTmp3ValAndCallBarrierInside() + +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{!"something"}