From 6c391c66193c5e6cc30b2eb972f77c4a86ef9bfc Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 28 May 2015 04:56:52 +0000 Subject: [PATCH] [NaryReassociate] Run EarlyCSE after NaryReassociate Summary: This patch made two improvements to NaryReassociate and the NVPTX pipeline 1. Run EarlyCSE/GVN after NaryReassociate to get rid of redundant common expressions. 2. When adding an instruction to SeenExprs, maps both the SCEV before and after reassociation to that instruction. Test Plan: updated @reassociate_gep_nsw in nary-gep.ll Reviewers: meheff, broune Reviewed By: broune Subscribers: dberlin, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D9947 llvm-svn: 238396 --- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 3 ++ lib/Transforms/Scalar/NaryReassociate.cpp | 24 +++++++++++++- .../NaryReassociate/NVPTX/nary-gep.ll | 31 +++++++++++++------ 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ac27c30aaba..c839ef0289b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -181,6 +181,9 @@ void NVPTXPassConfig::addIRPasses() { addPass(createEarlyCSEPass()); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); } bool NVPTXPassConfig::addInstSelector() { diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 5b370e04088..6ac5ff85e32 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -234,6 +234,7 @@ bool NaryReassociate::doOneIteration(Function &F) { BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ++I) { if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) { + const SCEV *OldSCEV = SE->getSCEV(I); if (Instruction *NewI = tryReassociate(I)) { Changed = true; SE->forgetValue(I); @@ -243,7 +244,28 @@ bool NaryReassociate::doOneIteration(Function &F) { } // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. - SeenExprs[SE->getSCEV(I)].push_back(I); + const SCEV *NewSCEV = SE->getSCEV(I); + SeenExprs[NewSCEV].push_back(I); + // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) + // is equivalent to I. However, ScalarEvolution::getSCEV may + // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose + // we reassociate + // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4 + // to + // NewI = &a[sext(i)] + sext(j). + // + // ScalarEvolution computes + // getSCEV(I) = a + 4 * sext(i + j) + // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j) + // which are different SCEVs. + // + // To alleviate this issue of ScalarEvolution not always capturing + // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can + // map both SCEV before and after tryReassociate(I) to I. + // + // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. + if (NewSCEV != OldSCEV) + SeenExprs[OldSCEV].push_back(I); } } } diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll index a620c98ec18..d08c6f60c04 100644 --- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-unknown-unknown" @@ -27,24 +27,37 @@ define void @reassociate_gep(float* %a, i64 %i, i64 %j) { ; foo(&a[sext(j)]); ; foo(&a[sext(i +nsw j)]); +; foo(&a[sext((i +nsw j) +nsw i)]); ; => -; t = &a[sext(j)]; -; foo(t); -; foo(t + sext(i)); +; t1 = &a[sext(j)]; +; foo(t1); +; t2 = t1 + sext(i); +; foo(t2); +; t3 = t2 + sext(i); // sext(i) should be GVN'ed. +; foo(t3); define void @reassociate_gep_nsw(float* %a, i32 %i, i32 %j) { ; CHECK-LABEL: @reassociate_gep_nsw( - %1 = add nsw i32 %i, %j - %idxprom.1 = sext i32 %1 to i64 %idxprom.j = sext i32 %j to i64 - %2 = getelementptr float, float* %a, i64 %idxprom.j + %1 = getelementptr float, float* %a, i64 %idxprom.j ; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j - call void @foo(float* %2) + call void @foo(float* %1) ; CHECK: call void @foo(float* [[t1]]) - %3 = getelementptr float, float* %a, i64 %idxprom.1 + + %2 = add nsw i32 %i, %j + %idxprom.2 = sext i32 %2 to i64 + %3 = getelementptr float, float* %a, i64 %idxprom.2 ; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64 ; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]] call void @foo(float* %3) ; CHECK: call void @foo(float* [[t2]]) + + %4 = add nsw i32 %2, %i + %idxprom.4 = sext i32 %4 to i64 + %5 = getelementptr float, float* %a, i64 %idxprom.4 +; CHECK: [[t3:[^ ]+]] = getelementptr float, float* [[t2]], i64 [[sexti]] + call void @foo(float* %5) +; CHECK: call void @foo(float* [[t3]]) + ret void }