From 0f9f9d79b62efc5fbc59cd274b42e2b01e82de06 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 24 Aug 2018 11:40:04 +0000 Subject: [PATCH] [Local] Make DoesKMove required for combineMetadata. This patch makes the DoesKMove argument non-optional, to force people to think about it. Most cases where it is false are either code hoisting or code sinking, where we pick one instruction from a set of equal instructions among different code paths. Reviewers: dberlin, nlopes, efriedma, davide Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D47475 llvm-svn: 340606 --- include/llvm/Transforms/Utils/Local.h | 9 +- .../InstCombineLoadStoreAlloca.cpp | 2 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 2 +- lib/Transforms/Scalar/GVNHoist.cpp | 2 +- lib/Transforms/Scalar/GVNSink.cpp | 2 +- lib/Transforms/Scalar/JumpThreading.cpp | 4 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 2 +- lib/Transforms/Utils/Local.cpp | 5 +- lib/Transforms/Utils/SimplifyCFG.cpp | 4 +- test/Transforms/GVNHoist/hoist-md.ll | 23 ++++ .../GVNSink/sink-combine-metadata.ll | 55 ++++++++ .../load-combine-metadata-dominance.ll | 44 +++++++ .../phi-load-metadata-dominance.ll | 26 ++++ .../JumpThreading/combine-metadata.ll | 122 ++++++++++++++++++ 14 files changed, 288 insertions(+), 14 deletions(-) create mode 100644 test/Transforms/GVNSink/sink-combine-metadata.ll create mode 100644 test/Transforms/InstCombine/load-combine-metadata-dominance.ll create mode 100644 test/Transforms/InstCombine/phi-load-metadata-dominance.ll create mode 100644 test/Transforms/JumpThreading/combine-metadata.ll diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 004b1c19680..8ec689e723c 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -391,13 +391,16 @@ bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, /// /// Metadata not listed as known via KnownIDs is removed void combineMetadata(Instruction *K, const Instruction *J, - ArrayRef KnownIDs, bool DoesKMove = true); + ArrayRef KnownIDs, bool DoesKMove); /// Combine the metadata of two instructions so that K can replace J. This -/// specifically handles the case of CSE-like transformations. +/// specifically handles the case of CSE-like transformations. Some +/// metadata can only be kept if K dominates J. For this to be correct, +/// K cannot be hoisted. /// /// Unknown metadata is removed. -void combineMetadataForCSE(Instruction *K, const Instruction *J); +void combineMetadataForCSE(Instruction *K, const Instruction *J, + bool DoesKMove); /// Patch the replacement so that it is not more restrictive than the value /// being replaced. It assumes that the replacement does not get moved from diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5f0931ead49..1ff7037c8d2 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1026,7 +1026,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (Value *AvailableVal = FindAvailableLoadedValue( &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) { if (IsLoadCSE) - combineMetadataForCSE(cast(AvailableVal), &LI); + combineMetadataForCSE(cast(AvailableVal), &LI, false); return replaceInstUsesWith( LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index e54a1dd05a2..911ece347d3 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -616,7 +616,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // Add all operands to the new PHI and combine TBAA metadata. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = cast(PN.getIncomingValue(i)); - combineMetadata(NewLI, LI, KnownIDs); + combineMetadata(NewLI, LI, KnownIDs, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 6d2b25cf601..00409ac035d 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -247,7 +247,7 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group}; - combineMetadata(ReplInst, I, KnownIDs); + combineMetadata(ReplInst, I, KnownIDs, true); } // This pass hoists common computations across branches sharing common diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 8959038de59..3737831e59b 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -859,7 +859,7 @@ void GVNSink::sinkLastInstruction(ArrayRef Blocks, // Update metadata and IR flags. for (auto *I : Insts) if (I != I0) { - combineMetadataForCSE(I0, I); + combineMetadataForCSE(I0, I, true); I0->andIRFlags(I); } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 7b738fd5086..45e73e98b4c 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1300,7 +1300,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (IsLoadCSE) { LoadInst *NLoadI = cast(AvailableVal); - combineMetadataForCSE(NLoadI, LoadI); + combineMetadataForCSE(NLoadI, LoadI, false); }; // If the returned value is the load itself, replace with an undef. This can @@ -1490,7 +1490,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { } for (LoadInst *PredLoadI : CSELoads) { - combineMetadataForCSE(PredLoadI, LoadI); + combineMetadataForCSE(PredLoadI, LoadI, true); } LoadI->replaceAllUsesWith(PN); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3b74421a47a..0e430eab6b2 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -994,7 +994,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_invariant_group}; - combineMetadata(C, cpy, KnownIDs); + combineMetadata(C, cpy, KnownIDs, true); // Remove the memcpy. MD->removeInstruction(cpy); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index b8aa21be5c5..5ac66742c7c 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -2354,7 +2354,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_invariant_group, JMD); } -void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { +void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, + bool KDominatesJ) { unsigned KnownIDs[] = { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_range, @@ -2362,7 +2363,7 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null}; - combineMetadata(K, J, KnownIDs); + combineMetadata(K, J, KnownIDs, KDominatesJ); } void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 4dbecf97794..28ed77adb18 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1316,7 +1316,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access}; - combineMetadata(I1, I2, KnownIDs); + combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. @@ -1582,7 +1582,7 @@ static bool sinkLastInstruction(ArrayRef Blocks) { // However, as N-way merge for CallInst is rare, so we use simplified API // instead of using complex API for N-way merge. I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); - combineMetadataForCSE(I0, I); + combineMetadataForCSE(I0, I, true); I0->andIRFlags(I); } diff --git a/test/Transforms/GVNHoist/hoist-md.ll b/test/Transforms/GVNHoist/hoist-md.ll index 902bbb7b269..8a7a6a426d4 100644 --- a/test/Transforms/GVNHoist/hoist-md.ll +++ b/test/Transforms/GVNHoist/hoist-md.ll @@ -93,6 +93,29 @@ return: ; preds = %if.end, %if.then ; CHECK: %[[phi:.*]] = phi i32 [ %[[load]], %{{.*}} ], [ %[[load]], %{{.*}} ] ; CHECK: ret i32 %[[phi]] +define i32* @test5(i1 %b, i32** %y) { +entry: + br i1 %b, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = load i32*, i32** %y, align 4, !nonnull !9 + br label %return + +if.end: ; preds = %entry + %1 = load i32*, i32** %y, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32* [ %0, %if.then ], [ %1, %if.end ] + ret i32* %retval.0 +} +; CHECK-LABEL: define i32* @test5( +; CHECK: %[[load:.*]] = load i32*, i32** %y, align 4 +; CHECK-NOT: !nonnull +; CHECK: %[[phi:.*]] = phi i32* [ %[[load]], %{{.*}} ], [ %[[load]], %{{.*}} ] +; CHECK: ret i32* %[[phi]] + !7 = !{i32 0, i32 2} !8 = !{i32 3, i32 4} +!9 = !{} ; CHECK: ![[range_md]] = !{i32 0, i32 2, i32 3, i32 4} diff --git a/test/Transforms/GVNSink/sink-combine-metadata.ll b/test/Transforms/GVNSink/sink-combine-metadata.ll new file mode 100644 index 00000000000..97ee8cf7161 --- /dev/null +++ b/test/Transforms/GVNSink/sink-combine-metadata.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -gvn-sink -S | FileCheck %s + +; Check that nonnull metadata for non-dominating loads is not propagated. +; CHECK-LABEL: @test1( +; CHECK-LABEL: if.end: +; CHECK: %[[ptr:.*]] = phi i32** +; CHECK: %[[load:.*]] = load i32*, i32** %[[ptr]] +; CHECK-NOT: !nonnull +; CHECK: ret i32* %[[load]] +define i32* @test1(i1 zeroext %flag, i32*** %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = load i32**, i32*** %p + %aa = load i32*, i32** %a, !nonnull !0 + br label %if.end + +if.else: + %b = load i32**, i32*** %p + %bb= load i32*, i32** %b + br label %if.end + +if.end: + %c = phi i32* [ %aa, %if.then ], [ %bb, %if.else ] + ret i32* %c +} + +; CHECK-LABEL: @test2( +; CHECK-LABEL: if.end: +; CHECK: %[[ptr:.*]] = phi i32** +; CHECK: %[[load:.*]] = load i32*, i32** %[[ptr]] +; CHECK-NOT: !nonnull +; CHECK: ret i32* %[[load]] +define i32* @test2(i1 zeroext %flag, i32*** %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = load i32**, i32*** %p + %aa = load i32*, i32** %a + br label %if.end + +if.else: + %b = load i32**, i32*** %p + %bb= load i32*, i32** %b, !nonnull !0 + br label %if.end + +if.end: + %c = phi i32* [ %aa, %if.then ], [ %bb, %if.else ] + ret i32* %c +} + + +!0 = !{} diff --git a/test/Transforms/InstCombine/load-combine-metadata-dominance.ll b/test/Transforms/InstCombine/load-combine-metadata-dominance.ll new file mode 100644 index 00000000000..25e352bdb7b --- /dev/null +++ b/test/Transforms/InstCombine/load-combine-metadata-dominance.ll @@ -0,0 +1,44 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Check that nonnull metadata is propagated from dominating load. +; CHECK-LABEL: @combine_metadata_dominance1( +; CHECK-LABEL: bb1: +; CHECK: load i32*, i32** %p, align 8, !nonnull !0 +; CHECK-NOT: load i32*, i32** %p +define void @combine_metadata_dominance1(i32** %p) { +entry: + %a = load i32*, i32** %p, !nonnull !0 + br label %bb1 + +bb1: + %b = load i32*, i32** %p + store i32 0, i32* %a + store i32 0, i32* %b + ret void +} + +declare i32 @use(i32*, i32) readonly + +; Check that nonnull from the dominated load does not get propagated. +; There are some cases where it would be safe to keep it. +; CHECK-LABEL: @combine_metadata_dominance2( +; CHECK-NOT: nonnull +define void @combine_metadata_dominance2(i32** %p) { +entry: + %a = load i32*, i32** %p + br i1 undef, label %bb1, label %bb2 + +bb1: + %b = load i32*, i32** %p, !nonnull !0 + store i32 0, i32* %a + store i32 0, i32* %b + ret void + +bb2: + ret void +} + + +!0 = !{} diff --git a/test/Transforms/InstCombine/phi-load-metadata-dominance.ll b/test/Transforms/InstCombine/phi-load-metadata-dominance.ll new file mode 100644 index 00000000000..0c5aab85890 --- /dev/null +++ b/test/Transforms/InstCombine/phi-load-metadata-dominance.ll @@ -0,0 +1,26 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +declare void @bar() +declare void @baz() + +; Check that nonnull metadata is from non-dominating loads is not propagated. +; CHECK-LABEL: cont: +; CHECK-NOT: !nonnull +define i32* @test_combine_metadata_dominance(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) { + br i1 %c, label %t, label %f +t: + call void @bar() + %v1 = load i32*, i32** %p1, align 8, !nonnull !0 + br label %cont + +f: + call void @baz() + %v2 = load i32*, i32** %p2, align 8 + br label %cont + +cont: + %res = phi i32* [ %v1, %t ], [ %v2, %f ] + ret i32* %res +} + +!0 = !{} diff --git a/test/Transforms/JumpThreading/combine-metadata.ll b/test/Transforms/JumpThreading/combine-metadata.ll new file mode 100644 index 00000000000..6351236aebb --- /dev/null +++ b/test/Transforms/JumpThreading/combine-metadata.ll @@ -0,0 +1,122 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +declare void @use(i32 *) + +; Check that we propagate nonnull to dominated loads, when we find an available +; loaded value. +; CHECK-LABEL: @test1( +; CHECK-LABEL: ret1: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull +; CHECK-NEXT: store i32 1, i32* %[[p1]] +; CHECK-NEXT: tail call void @use(i32* null) +; CHECK-NEXT: ret void + +; CHECK-LABEL: ret2: +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK: tail call void @use(i32* %[[p2]]) +; CHECK-NEXT: ret void +define void @test1(i32** %ptr, i1 %c) { + br i1 %c, label %d1, label %d2 + +d1: + %p1 = load i32*, i32** %ptr, !nonnull !0 + br label %d3 + +d2: + br label %d3 + +d3: + %pm = phi i32* [ null, %d2 ], [ %p1, %d1 ] + %p2 = load i32*, i32** %ptr + store i32 1, i32* %p2 + %c2 = icmp eq i32* %pm, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + tail call void @use(i32* %pm) nounwind + ret void + +ret2: + tail call void @use(i32* %pm) nounwind + ret void +} + +; Check that we propagate nonnull to dominated loads, when we find an available +; loaded value. +; CHECK-LABEL: @test2( +; CHECK-LABEL: d3.thread: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK-NEXT: store i32 1, i32* %[[p1]] +; CHECK-NEXT: br label %ret1 + +; CHECK-LABEL: d3: +; CHECK-NEXT: %[[p_cmp:.*]] = load i32*, i32** %ptr +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK-NEXT: store i32 1, i32* %[[p2]] +; CHECK-NEXT: icmp eq i32* %[[p_cmp]], null +define void @test2(i32** %ptr, i1 %c) { + br i1 %c, label %d1, label %d2 + +d1: + %p1 = load i32*, i32** %ptr + br label %d3 + +d2: + br label %d3 + +d3: + %pm = phi i32* [ null, %d2 ], [ %p1, %d1 ] + %p2 = load i32*, i32** %ptr, !nonnull !0 + store i32 1, i32* %p2 + %c2 = icmp eq i32* %pm, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + tail call void @use(i32* %pm) nounwind + ret void + +ret2: + tail call void @use(i32* %pm) nounwind + ret void +} + +; Check that we do not propagate nonnull to loads predecessors that are combined +; to a PHI node. +; CHECK-LABEL: @test3( +; CHECK-LABEL: d1: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull + +; CHECK-LABEL: d2: +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull + +; CHECK-LABEL: d3: +; CHECK-NEXT: phi i32* [ %[[p2]], %d2 ], [ %[[p1]], %d1 ] +define void @test3(i32** %ptr) { +d1: + %x = load i32*, i32** %ptr, !nonnull !0 + br label %d3 + +d2: + br label %d3 + +d3: + %y = load i32*, i32** %ptr + store i32 1, i32* %y + %c2 = icmp eq i32* %y, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + ret void +} + + +!0 = !{}