From c1758eb14de7e804a903d5bfd413312ce9ebf9aa Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 31 Jul 2019 12:06:38 +0000 Subject: [PATCH] [DivRemPairs] Avoid RAUW pitfalls (PR42823) Summary: `DivRemPairs` internally creates two maps: * {sign, divident, divisor} -> div instruction * {sign, divident, divisor} -> rem instruction Then it iterates over rem map, and looks if there is an entry in div map with the same key. Then depending on some internal logic it may RAUW rem instruction with something else. But if that rem instruction is an input to other div/rem, then it was used as a key in these maps, so the old value (used in key) is now dandling, because RAUW didn't update those maps. And we can't even RAUW map keys in general, there's `ValueMap`, but we don't have a single `Value` as key... The bug was discovered via D65298, and the test there exists. Now, i'm not sure how to expose this issue in trunk. The bug is clearly there if i change the map keys to be `AssertingVH`/`PoisoningVH`, but i guess this didn't miscompiled anything thus far? I really don't think this is benin without that patch. The fix is actually rather straight-forward - instead of trying to somehow shoe-horn `ValueMap` here (doesn't fit, key isn't just `Value`), or writing a new `ValueMap` with key being a struct of `Value`s, we can just have an intermediate data structure - a vector, each entry containing matching `Div, Rem` pair, and pre-filling it before doing any modifications. This way we won't need to query map after doing RAUW, so no bug is possible. Reviewers: spatel, bogner, RKSimon, craig.topper Reviewed By: spatel Subscribers: hiraditya, hans, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65451 llvm-svn: 367417 --- .../Transforms/Utils/BypassSlowDivision.h | 11 +- lib/Transforms/Scalar/DivRemPairs.cpp | 114 ++++++++++++++---- .../PowerPC/div-expanded-rem-pair.ll | 10 +- .../DivRemPairs/PowerPC/div-rem-pairs.ll | 38 +++--- .../DivRemPairs/X86/div-rem-pairs.ll | 4 +- 5 files changed, 121 insertions(+), 56 deletions(-) diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h index 471055921fa..994b6ec9c22 100644 --- a/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/IR/ValueHandle.h" #include namespace llvm { @@ -28,8 +29,8 @@ class Value; struct DivRemMapKey { bool SignedOp; - Value *Dividend; - Value *Divisor; + AssertingVH Dividend; + AssertingVH Divisor; DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor) : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} @@ -50,8 +51,10 @@ template <> struct DenseMapInfo { } static unsigned getHashValue(const DivRemMapKey &Val) { - return (unsigned)(reinterpret_cast(Val.Dividend) ^ - reinterpret_cast(Val.Divisor)) ^ + return (unsigned)(reinterpret_cast( + static_cast(Val.Dividend)) ^ + reinterpret_cast( + static_cast(Val.Divisor))) ^ (unsigned)Val.SignedOp; } }; diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp index 876681b4f9d..e64651d9749 100644 --- a/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/lib/Transforms/Scalar/DivRemPairs.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" + using namespace llvm; #define DEBUG_TYPE "div-rem-pairs" @@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed"); DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", "Controls transformations in div-rem-pairs pass"); -/// Find matching pairs of integer div/rem ops (they have the same numerator, -/// denominator, and signedness). If they exist in different basic blocks, bring -/// them together by hoisting or replace the common division operation that is -/// implicit in the remainder: -/// X % Y <--> X - ((X / Y) * Y). -/// -/// We can largely ignore the normal safety and cost constraints on speculation -/// of these ops when we find a matching pair. This is because we are already -/// guaranteed that any exceptions and most cost are already incurred by the -/// first member of the pair. -/// -/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or -/// SimplifyCFG, but it's split off on its own because it's different enough -/// that it doesn't quite match the stated objectives of those passes. -static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, - const DominatorTree &DT) { - bool Changed = false; +/// A thin wrapper to store two values that we matched as div-rem pair. +/// We want this extra indirection to avoid dealing with RAUW'ing the map keys. +struct DivRemPairWorklistEntry { + /// The actual udiv/sdiv instruction. Source of truth. + AssertingVH DivInst; + /// The instruction that we have matched as a remainder instruction. + /// Should only be used as Value, don't introspect it. + AssertingVH RemInst; + + DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) + : DivInst(DivInst_), RemInst(RemInst_) { + assert((DivInst->getOpcode() == Instruction::UDiv || + DivInst->getOpcode() == Instruction::SDiv) && + "Not a division."); + assert(DivInst->getType() == RemInst->getType() && "Types should match."); + // We can't check anything else about remainder instruction, + // it's not strictly required to be a urem/srem. + } + + /// The type for this pair, identical for both the div and rem. + Type *getType() const { return DivInst->getType(); } + + /// Is this pair signed or unsigned? + bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } + + /// In this pair, what are the divident and divisor? + Value *getDividend() const { return DivInst->getOperand(0); } + Value *getDivisor() const { return DivInst->getOperand(1); } +}; +using DivRemWorklistTy = SmallVector; + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). Place those pairs into a worklist for further +/// processing. This indirection is needed because we have to use TrackingVH<> +/// because we will be doing RAUW, and if one of the rem instructions we change +/// happens to be an input to another div/rem in the maps, we'd have problems. +static DivRemWorklistTy getWorklist(Function &F) { // Insert all divide and remainder instructions into maps keyed by their // operands and opcode (signed or unsigned). DenseMap DivMap; @@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, } } + // We'll accumulate the matching pairs of div-rem instructions here. + DivRemWorklistTy Worklist; + // We can iterate over either map because we are only looking for matched // pairs. Choose remainders for efficiency because they are usually even more // rare than division. @@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, if (!DivInst) continue; - // We have a matching pair of div/rem instructions. If one dominates the - // other, hoist and/or replace one. + // We have a matching pair of div/rem instructions. NumPairs++; Instruction *RemInst = RemPair.second; - bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; - bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); + + // Place it in the worklist. + Worklist.emplace_back(DivInst, RemInst); + } + + return Worklist; +} + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). If they exist in different basic blocks, bring +/// them together by hoisting or replace the common division operation that is +/// implicit in the remainder: +/// X % Y <--> X - ((X / Y) * Y). +/// +/// We can largely ignore the normal safety and cost constraints on speculation +/// of these ops when we find a matching pair. This is because we are already +/// guaranteed that any exceptions and most cost are already incurred by the +/// first member of the pair. +/// +/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or +/// SimplifyCFG, but it's split off on its own because it's different enough +/// that it doesn't quite match the stated objectives of those passes. +static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, + const DominatorTree &DT) { + bool Changed = false; + + // Get the matching pairs of div-rem instructions. We want this extra + // indirection to avoid dealing with having to RAUW the keys of the maps. + DivRemWorklistTy Worklist = getWorklist(F); + + // Process each entry in the worklist. + for (DivRemPairWorklistEntry &E : Worklist) { + bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned()); + + auto &DivInst = E.DivInst; + auto &RemInst = E.RemInst; // If the target supports div+rem and the instructions are in the same block // already, there's nothing to do. The backend should handle this. If the @@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // The target does not have a single div/rem operation. Decompose the // remainder calculation as: // X % Y --> X - ((X / Y) * Y). - Value *X = RemInst->getOperand(0); - Value *Y = RemInst->getOperand(1); + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); Instruction *Sub = BinaryOperator::CreateSub(X, Mul); @@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Now kill the explicit remainder. We have replaced it with: // (sub X, (mul (div X, Y), Y) - RemInst->replaceAllUsesWith(Sub); - RemInst->eraseFromParent(); + Sub->setName(RemInst->getName() + ".decomposed"); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = Sub; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(Sub); + OrigRemInst->eraseFromParent(); NumDecomposed++; } Changed = true; @@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass { return optimizeDivRem(F, TTI, DT); } }; -} +} // namespace char DivRemPairsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", diff --git a/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll b/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll index 8c1482039f0..04a8a7721e9 100644 --- a/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll +++ b/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll @@ -103,12 +103,12 @@ define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) { ; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[X]], [[TMP1]] -; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[TMP2]], [[Y]] +; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]] +; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]] ; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[T4]], [[Y]] -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y]] +; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]] +; CHECK-NEXT: ret i32 [[T6_DECOMPOSED]] ; %t0 = mul nsw i32 %Z, %Y %t1 = sdiv i32 %X, %t0 diff --git a/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll b/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll index 4e004389c99..4e95e0e399e 100644 --- a/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll +++ b/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll @@ -7,8 +7,8 @@ define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) { ; CHECK-LABEL: @decompose_illegal_srem_same_block( ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[A]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[TMP2]], i32 [[DIV]]) +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]] +; CHECK-NEXT: call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]]) ; CHECK-NEXT: ret void ; %rem = srem i32 %a, %b @@ -21,8 +21,8 @@ define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) { ; CHECK-LABEL: @decompose_illegal_urem_same_block( ; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[A]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[TMP2]], i32 [[DIV]]) +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]] +; CHECK-NEXT: call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]]) ; CHECK-NEXT: ret void ; %div = udiv i32 %a, %b @@ -39,8 +39,8 @@ define i32 @hoist_sdiv(i32 %a, i32 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A]], [[TMP0]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 42 +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM_DECOMPOSED]], 42 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: br label [[END]] @@ -69,8 +69,8 @@ define i64 @hoist_udiv(i64 %a, i64 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[A]], [[TMP0]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], 42 +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i64 [[A]], [[TMP0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[REM_DECOMPOSED]], 42 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: br label [[END]] @@ -102,10 +102,10 @@ define i16 @hoist_srem(i16 %a, i16 %b) { ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[A]], [[TMP0]] +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i16 [[A]], [[TMP0]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i16 [[RET]] ; entry: @@ -132,10 +132,10 @@ define i8 @hoist_urem(i8 %a, i8 %b) { ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[TMP0:%.*]] = mul i8 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i8 [[A]], [[TMP0]] +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i8 [[A]], [[TMP0]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[RET]] ; entry: @@ -160,12 +160,12 @@ define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) { ; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[X]], [[TMP1]] -; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[TMP2]], [[Y]] +; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]] +; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]] ; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[T4]], [[Y]] -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y]] +; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]] +; CHECK-NEXT: ret i32 [[T6_DECOMPOSED]] ; %t0 = mul nsw i32 %Z, %Y %t1 = sdiv i32 %X, %t0 @@ -294,10 +294,10 @@ define i128 @dont_hoist_urem(i128 %a, i128 %b) { ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i128 [[A]], [[TMP0]] +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i128 [[RET]] ; entry: diff --git a/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll b/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll index 987ffceb157..711790004c1 100644 --- a/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll +++ b/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll @@ -286,10 +286,10 @@ define i128 @dont_hoist_urem(i128 %a, i128 %b) { ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i128 [[A]], [[TMP0]] +; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i128 [[RET]] ; entry: