mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[SeparateConstOffsetFromGEP] Fix: sext(a) + sext(b) -> sext(a + b) matches add and sub instructions with one another
During the SeparateConstOffsetFromGEP pass, signed extensions are distributed to the values that feed into them and then later recombined. The recombination stage is somewhat problematic- it doesn't differ add and sub instructions from another when matching the sext(a) +/- sext(b) -> sext(a +/- b) pattern in some instances. An example- the IR contains: %unextendedA %unextendedB %subuAuB = unextendedA - unextendedB %extA = extend A %extB = extend B %addeAeB = extA + extB The problematic optimization will transform that into: %unextendedA %unextendedB %subuAuB = unextendedA - unextendedB %extA = extend A %extB = extend B %addeAeB = extend subuAuB ; Obviously not semantically equivalent to the IR input. This patch fixes that. Patch by Drew Wock <drew.wock@sas.com> Differential Revision: https://reviews.llvm.org/D65967
This commit is contained in:
parent
68481bfa6d
commit
161d927dda
@ -431,8 +431,10 @@ private:
|
||||
bool reuniteExts(Instruction *I);
|
||||
|
||||
/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
|
||||
Instruction *findClosestMatchingDominator(const SCEV *Key,
|
||||
Instruction *Dominatee);
|
||||
Instruction *findClosestMatchingDominator(
|
||||
const SCEV *Key, Instruction *Dominatee,
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
|
||||
|
||||
/// Verify F is free of dead code.
|
||||
void verifyNoDeadCode(Function &F);
|
||||
|
||||
@ -456,7 +458,8 @@ private:
|
||||
/// multiple GEPs with a single index.
|
||||
bool LowerGEP;
|
||||
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -1141,7 +1144,8 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
|
||||
}
|
||||
|
||||
Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
|
||||
const SCEV *Key, Instruction *Dominatee) {
|
||||
const SCEV *Key, Instruction *Dominatee,
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
|
||||
auto Pos = DominatingExprs.find(Key);
|
||||
if (Pos == DominatingExprs.end())
|
||||
return nullptr;
|
||||
@ -1169,12 +1173,23 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
|
||||
// If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
|
||||
// TODO: handle zext
|
||||
Value *LHS = nullptr, *RHS = nullptr;
|
||||
if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
|
||||
match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
|
||||
if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
|
||||
if (LHS->getType() == RHS->getType()) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
if (auto *Dom = findClosestMatchingDominator(Key, I)) {
|
||||
if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
|
||||
Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
|
||||
NewSExt->takeName(I);
|
||||
I->replaceAllUsesWith(NewSExt);
|
||||
RecursivelyDeleteTriviallyDeadInstructions(I);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
|
||||
if (LHS->getType() == RHS->getType()) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
|
||||
Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
|
||||
NewSExt->takeName(I);
|
||||
I->replaceAllUsesWith(NewSExt);
|
||||
@ -1185,12 +1200,17 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
|
||||
}
|
||||
|
||||
// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
|
||||
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
|
||||
match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
|
||||
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
|
||||
if (programUndefinedIfFullPoison(I)) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
DominatingExprs[Key].push_back(I);
|
||||
DominatingAdds[Key].push_back(I);
|
||||
}
|
||||
} else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
|
||||
if (programUndefinedIfFullPoison(I)) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
DominatingSubs[Key].push_back(I);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
@ -1198,7 +1218,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
|
||||
|
||||
bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
|
||||
bool Changed = false;
|
||||
DominatingExprs.clear();
|
||||
DominatingAdds.clear();
|
||||
DominatingSubs.clear();
|
||||
for (const auto Node : depth_first(DT)) {
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
for (auto I = BB->begin(); I != BB->end(); ) {
|
||||
|
@ -0,0 +1,31 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -separate-const-offset-from-gep < %s | FileCheck %s
|
||||
|
||||
define void @matchingExtensions(i32* %ap, i32* %bp, i64* %result) {
|
||||
; CHECK-LABEL: @matchingExtensions(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[AP:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[BP:%.*]]
|
||||
; CHECK-NEXT: [[EB:%.*]] = sext i32 [[B]] to i64
|
||||
; CHECK-NEXT: [[SUBAB:%.*]] = sub nsw i32 [[A]], [[B]]
|
||||
; CHECK-NEXT: [[EA:%.*]] = sext i32 [[A]] to i64
|
||||
; CHECK-NEXT: [[ADDEAEB:%.*]] = add nsw i64 [[EA]], [[EB]]
|
||||
; CHECK-NEXT: [[EXTSUB:%.*]] = sext i32 [[SUBAB]] to i64
|
||||
; CHECK-NEXT: [[IDX:%.*]] = getelementptr i32, i32* [[AP]], i64 [[EXTSUB]]
|
||||
; CHECK-NEXT: store i64 [[ADDEAEB]], i64* [[RESULT:%.*]]
|
||||
; CHECK-NEXT: store i32 [[SUBAB]], i32* [[IDX]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = load i32, i32* %ap
|
||||
%b = load i32, i32* %bp
|
||||
%eb = sext i32 %b to i64
|
||||
%subab = sub nsw i32 %a, %b
|
||||
%ea = sext i32 %a to i64
|
||||
%addeaeb = add nsw i64 %ea, %eb
|
||||
%extsub = sext i32 %subab to i64
|
||||
%idx = getelementptr i32, i32* %ap, i64 %extsub
|
||||
store i64 %addeaeb, i64* %result
|
||||
store i32 %subab, i32* %idx
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user