mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-30 23:42:52 +01:00
3b65689ab9
misched used GetUnderlyingObject in order to break false load/store dependencies, and the -enable-aa-sched-mi feature similarly relied on GetUnderlyingObject in order to ensure it is safe to use the aliasing analysis. Unfortunately, GetUnderlyingObject does not recurse through phi nodes, and so (especially due to LSR) all of these mechanisms failed for induction-variable-dependent loads and stores inside loops. This change replaces uses of GetUnderlyingObject with GetUnderlyingObjects (which will recurse through phi and select instructions) in misched. Andy reviewed, tested and simplified this patch; Thanks! llvm-svn: 169744
102 lines
5.4 KiB
LLVM
102 lines
5.4 KiB
LLVM
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
|
target triple = "powerpc64-bgq-linux"
|
|
; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s
|
|
; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s
|
|
|
|
@aa = external global [256 x [256 x double]], align 32
|
|
@bb = external global [256 x [256 x double]], align 32
|
|
@cc = external global [256 x [256 x double]], align 32
|
|
@.str1 = external hidden unnamed_addr constant [6 x i8], align 1
|
|
@X = external global [16000 x double], align 32
|
|
@Y = external global [16000 x double], align 32
|
|
@Z = external global [16000 x double], align 32
|
|
@U = external global [16000 x double], align 32
|
|
@V = external global [16000 x double], align 32
|
|
@.str137 = external hidden unnamed_addr constant [14 x i8], align 1
|
|
|
|
declare void @check(i32 signext) nounwind
|
|
|
|
declare signext i32 @printf(i8* nocapture, ...) nounwind
|
|
|
|
declare signext i32 @init(i8*) nounwind
|
|
|
|
define signext i32 @s000() nounwind {
|
|
entry:
|
|
%call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
|
|
%call1 = tail call i64 @clock() nounwind
|
|
br label %for.cond2.preheader
|
|
|
|
; CHECK: @s000
|
|
|
|
for.cond2.preheader: ; preds = %for.end, %entry
|
|
%nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
|
|
br label %for.body4
|
|
|
|
for.body4: ; preds = %for.body4, %for.cond2.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
|
|
%arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
|
|
%arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
|
|
%0 = bitcast double* %arrayidx to <1 x double>*
|
|
%1 = load <1 x double>* %0, align 32, !tbaa !0
|
|
%add = fadd <1 x double> %1, <double 1.000000e+00>
|
|
%2 = bitcast double* %arrayidx6 to <1 x double>*
|
|
store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
|
|
%indvars.iv.next.322 = or i64 %indvars.iv, 4
|
|
%arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
|
|
%arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
|
|
%3 = bitcast double* %arrayidx.4 to <1 x double>*
|
|
%4 = load <1 x double>* %3, align 32, !tbaa !0
|
|
%add.4 = fadd <1 x double> %4, <double 1.000000e+00>
|
|
%5 = bitcast double* %arrayidx6.4 to <1 x double>*
|
|
store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
|
|
%indvars.iv.next.726 = or i64 %indvars.iv, 8
|
|
%arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
|
|
%arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
|
|
%6 = bitcast double* %arrayidx.8 to <1 x double>*
|
|
%7 = load <1 x double>* %6, align 32, !tbaa !0
|
|
%add.8 = fadd <1 x double> %7, <double 1.000000e+00>
|
|
%8 = bitcast double* %arrayidx6.8 to <1 x double>*
|
|
store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
|
|
%indvars.iv.next.1130 = or i64 %indvars.iv, 12
|
|
%arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
|
|
%arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
|
|
%9 = bitcast double* %arrayidx.12 to <1 x double>*
|
|
%10 = load <1 x double>* %9, align 32, !tbaa !0
|
|
%add.12 = fadd <1 x double> %10, <double 1.000000e+00>
|
|
%11 = bitcast double* %arrayidx6.12 to <1 x double>*
|
|
store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
|
|
%indvars.iv.next.15 = add i64 %indvars.iv, 16
|
|
%lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
|
|
%exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
|
|
br i1 %exitcond.15, label %for.end, label %for.body4
|
|
|
|
; All of the loads should come before all of the stores.
|
|
; CHECK: mtctr
|
|
; CHECK: stfd
|
|
; CHECK-NOT: lfd
|
|
; CHECK: bdnz
|
|
|
|
for.end: ; preds = %for.body4
|
|
%call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
|
|
%inc9 = add nsw i32 %nl.018, 1
|
|
%exitcond = icmp eq i32 %inc9, 400000
|
|
br i1 %exitcond, label %for.end10, label %for.cond2.preheader
|
|
|
|
for.end10: ; preds = %for.end
|
|
%call11 = tail call i64 @clock() nounwind
|
|
%sub = sub nsw i64 %call11, %call1
|
|
%conv = sitofp i64 %sub to double
|
|
%div = fdiv double %conv, 1.000000e+06
|
|
%call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
|
|
tail call void @check(i32 signext 1)
|
|
ret i32 0
|
|
}
|
|
|
|
declare i64 @clock() nounwind
|
|
|
|
declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
|
|
|
|
!0 = metadata !{metadata !"double", metadata !1}
|
|
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
|
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|