mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-31 16:02:52 +01:00
09d73ea35b
After collecting chains, check if any should be materialized. If so, hide the chained IV users from the LSR solver. LSR will only solve for the head of the chain. GenerateIVChains will then materialize the chained IV users by computing the IV relative to its previous value in the chain. In theory, chained IV users could be exposed to LSR's solver. This would be considerably complicated to implement and I'm not aware of a case where we need it. In practice it's more important to intelligently prune the search space of nontrivial loops before running the solver, otherwise the solver is often forced to prune the most optimal solutions. Hiding the chained users does this well, so that LSR is more likely to find the best IV for the chain as a whole. llvm-svn: 147801
97 lines
3.5 KiB
LLVM
97 lines
3.5 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
|
|
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
|
|
|
|
; @sharedidx is an unrolled variant of this loop:
|
|
; for (unsigned long i = 0; i < len; i += s) {
|
|
; c[i] = a[i] + b[i];
|
|
; }
|
|
; where 's' cannot be folded into the addressing mode.
|
|
;
|
|
; This is not quite profitable to chain. But with -stress-ivchain, we
|
|
; can form three address chains in place of the shared induction
|
|
; variable.
|
|
|
|
; X64: sharedidx:
|
|
; X64: %for.body.preheader
|
|
; X64-NOT: leal ({{.*}},4)
|
|
; X64: %for.body.1
|
|
|
|
; X32: sharedidx:
|
|
; X32: %for.body.2
|
|
; X32: add
|
|
; X32: add
|
|
; X32: add
|
|
; X32: add
|
|
; X32: add
|
|
; X32: %for.body.3
|
|
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
|
|
entry:
|
|
%cmp8 = icmp eq i32 %len, 0
|
|
br i1 %cmp8, label %for.end, label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body.3
|
|
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i8* %a, i32 %i.09
|
|
%0 = load i8* %arrayidx, align 1
|
|
%conv6 = zext i8 %0 to i32
|
|
%arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
|
|
%1 = load i8* %arrayidx1, align 1
|
|
%conv27 = zext i8 %1 to i32
|
|
%add = add nsw i32 %conv27, %conv6
|
|
%conv3 = trunc i32 %add to i8
|
|
%arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
|
|
store i8 %conv3, i8* %arrayidx4, align 1
|
|
%add5 = add i32 %i.09, %s
|
|
%cmp = icmp ult i32 %add5, %len
|
|
br i1 %cmp, label %for.body.1, label %for.end
|
|
|
|
for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
|
|
ret void
|
|
|
|
for.body.1: ; preds = %for.body
|
|
%arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
|
|
%2 = load i8* %arrayidx.1, align 1
|
|
%conv6.1 = zext i8 %2 to i32
|
|
%arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
|
|
%3 = load i8* %arrayidx1.1, align 1
|
|
%conv27.1 = zext i8 %3 to i32
|
|
%add.1 = add nsw i32 %conv27.1, %conv6.1
|
|
%conv3.1 = trunc i32 %add.1 to i8
|
|
%arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
|
|
store i8 %conv3.1, i8* %arrayidx4.1, align 1
|
|
%add5.1 = add i32 %add5, %s
|
|
%cmp.1 = icmp ult i32 %add5.1, %len
|
|
br i1 %cmp.1, label %for.body.2, label %for.end
|
|
|
|
for.body.2: ; preds = %for.body.1
|
|
%arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
|
|
%4 = load i8* %arrayidx.2, align 1
|
|
%conv6.2 = zext i8 %4 to i32
|
|
%arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
|
|
%5 = load i8* %arrayidx1.2, align 1
|
|
%conv27.2 = zext i8 %5 to i32
|
|
%add.2 = add nsw i32 %conv27.2, %conv6.2
|
|
%conv3.2 = trunc i32 %add.2 to i8
|
|
%arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
|
|
store i8 %conv3.2, i8* %arrayidx4.2, align 1
|
|
%add5.2 = add i32 %add5.1, %s
|
|
%cmp.2 = icmp ult i32 %add5.2, %len
|
|
br i1 %cmp.2, label %for.body.3, label %for.end
|
|
|
|
for.body.3: ; preds = %for.body.2
|
|
%arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
|
|
%6 = load i8* %arrayidx.3, align 1
|
|
%conv6.3 = zext i8 %6 to i32
|
|
%arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
|
|
%7 = load i8* %arrayidx1.3, align 1
|
|
%conv27.3 = zext i8 %7 to i32
|
|
%add.3 = add nsw i32 %conv27.3, %conv6.3
|
|
%conv3.3 = trunc i32 %add.3 to i8
|
|
%arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
|
|
store i8 %conv3.3, i8* %arrayidx4.3, align 1
|
|
%add5.3 = add i32 %add5.2, %s
|
|
%cmp.3 = icmp ult i32 %add5.3, %len
|
|
br i1 %cmp.3, label %for.body, label %for.end
|
|
}
|