[GVN] Don't perform scalar PRE on GEPs

Partial Redundancy Elimination of GEPs prevents CodeGenPrepare from sinking the addressing mode computation of memory instructions back to its uses. The problem comes from the insertion of PHIs, which confuse CGP and make it bail. I've autogenerated the check lines of an existing test and added a store instruction to demonstrate the motivation behind this change. The store is now using the gep instead of a phi. Differential Revision: https://reviews.llvm.org/D55009 llvm-svn: 348496
2024-11-22 18:54:02 +01:00 · 2018-12-06 16:11:58 +00:00 · 2018-12-06 16:11:58 +00:00 · 9775e7e456
commit 9775e7e456
parent 52ecd327f2
2 changed files with 71 additions and 13 deletions
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@ -2156,6 +2156,16 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
  if (isa<CmpInst>(CurInst))
    return false;

+  // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
+  // sinking the addressing mode computation back to its uses. Extending the
+  // GEP's live range increases the register pressure, and therefore it can
+  // introduce unnecessary spills.
+  //
+  // This doesn't prevent Load PRE. PHI translation will make the GEP available
+  // to the load by moving it to the predecessor block if necessary.
+  if (isa<GetElementPtrInst>(CurInst))
+    return false;
+
  // We don't currently value number ANY inline asm calls.
  if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
    if (CallI->isInlineAsm())
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=gvn -enable-load-pre -S | FileCheck %s

@ -6,11 +7,49 @@ target triple = "aarch64--linux-gnu"

 define double @foo(i32 %stat, i32 %i, double** %p) {
 ; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[STAT:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB]]
+; CHECK-NEXT:    i32 2, label [[ENTRY_SW_BB2_CRIT_EDGE:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       entry.sw.bb2_crit_edge:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load double*, double** [[P:%.*]], align 8
+; CHECK-NEXT:    [[DOTPRE1:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds double, double* [[DOTPRE]], i64 [[DOTPRE1]]
+; CHECK-NEXT:    [[DOTPRE2:%.*]] = load double, double* [[ARRAYIDX5_PHI_TRANS_INSERT]], align 8
+; CHECK-NEXT:    br label [[SW_BB2:%.*]]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[P]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[SUB]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[SW_BB2]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[DOTPRE2]], [[ENTRY_SW_BB2_CRIT_EDGE]] ], [ [[TMP1]], [[IF_END]] ]
+; CHECK-NEXT:    [[IDXPROM3_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE1]], [[ENTRY_SW_BB2_CRIT_EDGE]] ], [ [[IDXPROM]], [[IF_END]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double* [ [[DOTPRE]], [[ENTRY_SW_BB2_CRIT_EDGE]] ], [ [[TMP0]], [[IF_END]] ]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[IDXPROM3_PRE_PHI]]
+; CHECK-NEXT:    [[SUB6:%.*]] = fsub double 3.000000e+00, [[TMP2]]
+; CHECK-NEXT:    store double [[SUB6]], double* [[ARRAYIDX5]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi double [ 0.000000e+00, [[SW_DEFAULT]] ], [ [[SUB6]], [[SW_BB2]] ], [ [[SUB]], [[IF_THEN]] ]
+; CHECK-NEXT:    ret double [[RETVAL_0]]
+;
 entry:
  switch i32 %stat, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb2
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb
+  i32 2, label %sw.bb2
  ]

 sw.bb:                                            ; preds = %entry, %entry
@ -35,11 +74,8 @@ sw.bb2:                                           ; preds = %if.end, %entry
  %2 = load double*, double** %arrayidx4, align 8
  %arrayidx5 = getelementptr inbounds double, double* %2, i64 %idxprom3
  %3 = load double, double* %arrayidx5, align 8
-; CHECK: sw.bb2:
-; CHECK-NOT: sext
-; CHECK: phi double [
-; CHECK-NOT: load
  %sub6 = fsub double 3.000000e+00, %3
+  store double %sub6, double* %arrayidx5
  br label %return

 sw.default:                                       ; preds = %entry
@ -55,12 +91,24 @@ return:                                           ; preds = %sw.default, %sw.bb2
 ; actually processed. Make sure we can deal with the situation.

 define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) {
-; CHECK-LABEL: define void @test_shortcut_safe
-; CHECK: [[SEXT1:%.*]] = sext i32 %p1 to i64
-; CHECK: [[PHI1:%.*]] = phi i64 [ [[SEXT1]], {{%.*}} ], [ [[PHI2:%.*]], {{%.*}} ]
-; CHECK: [[SEXT2:%.*]] = sext i32 %p1 to i64
-; CHECK: [[PHI2]] = phi i64 [ [[SEXT2]], {{.*}} ], [ [[PHI1]], {{%.*}} ]
-; CHECK: getelementptr inbounds i32, i32* %a, i64 [[PHI2]]
+; CHECK-LABEL: @test_shortcut_safe(
+; CHECK-NEXT:    br i1 [[TST:%.*]], label [[SEXT1:%.*]], label [[DOTPRE_DEST_CRIT_EDGE:%.*]]
+; CHECK:       .pre.dest_crit_edge:
+; CHECK-NEXT:    [[DOTPRE1:%.*]] = sext i32 [[P1:%.*]] to i64
+; CHECK-NEXT:    br label [[PRE_DEST:%.*]]
+; CHECK:       pre.dest:
+; CHECK-NEXT:    [[DOTPRE_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE1]], [[DOTPRE_DEST_CRIT_EDGE]] ], [ [[IDXPROM2_PRE_PHI:%.*]], [[SEXT_USE:%.*]] ]
+; CHECK-NEXT:    br label [[SEXT_USE]]
+; CHECK:       sext1:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[P1]] to i64
+; CHECK-NEXT:    br label [[SEXT_USE]]
+; CHECK:       sext.use:
+; CHECK-NEXT:    [[IDXPROM2_PRE_PHI]] = phi i64 [ [[IDXPROM]], [[SEXT1]] ], [ [[DOTPRE_PRE_PHI]], [[PRE_DEST]] ]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM2_PRE_PHI]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    tail call void @g(i32 [[VAL]])
+; CHECK-NEXT:    br label [[PRE_DEST]]
+;

  br i1 %tst, label %sext1, label %pre.dest