mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
cfc48bb77f
Summary: GVNHoist performs all the optimizations that MLSM does to loads, in a more general way, and in a faster time bound (MLSM is N^3 in most cases, N^4 in a few edge cases). This disables the load portion. Note that the way ld_hoist_st_sink.ll is written makes one think that the loads should be moved to the while.preheader block, but 1. Neither MLSM nor GVNHoist do it (they both move them to identical places). 2. MLSM couldn't possibly do it anyway, as the while.preheader block is not the head of the diamond, while.body is. (GVNHoist could do it if it was legal). 3. At a glance, it's not legal anyway because the in-loop load conflict with the in-loop store, so the loads must stay in-loop. I am happy to update the test to use update_test_checks so that checking is tighter, just was going to do it as a followup. Note that i can find no particular benefit to the store portion on any real testcase/benchmark i have (even size-wise). If we really still want it, i am happy to commit to writing a targeted store sinker, just taking the code from the MemorySSA port of MergedLoadStoreMotion (which is N^2 worst case, and N most of the time). We can do what it does in a much better time bound. We also should be both hoisting and sinking stores, not just sinking them, anyway, since whether we should hoist or sink to merge depends basically on luck of the draw of where the blockers are placed. Nonetheless, i have left it alone for now. Reviewers: chandlerc, davide Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D29079 llvm-svn: 292971
65 lines
2.5 KiB
LLVM
65 lines
2.5 KiB
LLVM
; Test load hoist
|
|
; RUN: opt -gvn-hoist -S < %s | FileCheck %s
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-pc_linux"
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger) {
|
|
entry:
|
|
%cmp11 = icmp eq i32 %size, 0
|
|
br i1 %cmp11, label %for.end, label %for.body.lr.ph
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%0 = add i32 %size, -1
|
|
br label %for.body
|
|
|
|
; CHECK-LABEL: for.body
|
|
; CHECK: load
|
|
; CHECK: %2 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
|
|
; CHECK: %3 = load i32, i32* %2, align 4
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.inc
|
|
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
%cmp1 = icmp sgt i32 %1, 0
|
|
br i1 %cmp1, label %if.then, label %if.else
|
|
|
|
; CHECK-LABEL: if.then
|
|
if.then: ; preds = %for.body
|
|
; This load should be hoisted
|
|
%arrayidx3 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
|
|
%2 = load i32, i32* %arrayidx3, align 4
|
|
%conv = sitofp i32 %2 to float
|
|
%add = fadd float %conv, 5.000000e-01
|
|
%arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv
|
|
store float %add, float* %arrayidx5, align 4
|
|
br label %for.inc
|
|
|
|
if.else: ; preds = %for.body
|
|
%arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv
|
|
%3 = load float, float* %arrayidx7, align 4
|
|
%div = fdiv float %3, 3.000000e+00
|
|
store float %div, float* %arrayidx7, align 4
|
|
; This load should be hoisted in spite of store
|
|
%arrayidx9 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
|
|
%4 = load i32, i32* %arrayidx9, align 4
|
|
%conv10 = sitofp i32 %4 to float
|
|
%add13 = fadd float %div, %conv10
|
|
store float %add13, float* %arrayidx7, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %if.then, %if.else
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %0
|
|
br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.inc
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %entry, %for.cond.for.end_crit_edge
|
|
ret float* %out
|
|
}
|
|
|