mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
87ee2e6422
PowerPC has its custom scheduler heuristic. It calls parent classes' tryCandidate in override version, but the function returns void, so this way doesn't actually help. This patch duplicates code from base scheduler into PPC machine scheduler class, which does what we wanted. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D94464
259 lines
13 KiB
LLVM
259 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
; void foo(float *data, float d) {
|
|
; long i;
|
|
; for (i = 0; i < 8000; i++)
|
|
; data[i] = d;
|
|
; }
|
|
;
|
|
; This loop will be unrolled by 96 and vectorized on power9.
|
|
; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
|
|
; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})'
|
|
|
|
define void @foo(float* nocapture %data, float %d) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xscvdpspn 0, 1
|
|
; CHECK-NEXT: li 5, 83
|
|
; CHECK-NEXT: addi 4, 3, 192
|
|
; CHECK-NEXT: mtctr 5
|
|
; CHECK-NEXT: xxspltw 0, 0, 0
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_1: # %vector.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: stxv 0, -192(4)
|
|
; CHECK-NEXT: stxv 0, -176(4)
|
|
; CHECK-NEXT: stxv 0, -160(4)
|
|
; CHECK-NEXT: stxv 0, -144(4)
|
|
; CHECK-NEXT: stxv 0, -128(4)
|
|
; CHECK-NEXT: stxv 0, -112(4)
|
|
; CHECK-NEXT: stxv 0, -96(4)
|
|
; CHECK-NEXT: stxv 0, -80(4)
|
|
; CHECK-NEXT: stxv 0, -64(4)
|
|
; CHECK-NEXT: stxv 0, -48(4)
|
|
; CHECK-NEXT: stxv 0, -32(4)
|
|
; CHECK-NEXT: stxv 0, -16(4)
|
|
; CHECK-NEXT: stxv 0, 0(4)
|
|
; CHECK-NEXT: stxv 0, 16(4)
|
|
; CHECK-NEXT: stxv 0, 32(4)
|
|
; CHECK-NEXT: stxv 0, 48(4)
|
|
; CHECK-NEXT: stxv 0, 64(4)
|
|
; CHECK-NEXT: stxv 0, 80(4)
|
|
; CHECK-NEXT: stxv 0, 96(4)
|
|
; CHECK-NEXT: stxv 0, 112(4)
|
|
; CHECK-NEXT: stxv 0, 128(4)
|
|
; CHECK-NEXT: stxv 0, 144(4)
|
|
; CHECK-NEXT: stxv 0, 160(4)
|
|
; CHECK-NEXT: stxv 0, 176(4)
|
|
; CHECK-NEXT: addi 4, 4, 384
|
|
; CHECK-NEXT: bdnz .LBB0_1
|
|
; CHECK-NEXT: # %bb.2: # %for.body
|
|
; CHECK-NEXT: stfs 1, 31872(3)
|
|
; CHECK-NEXT: stfs 1, 31876(3)
|
|
; CHECK-NEXT: stfs 1, 31880(3)
|
|
; CHECK-NEXT: stfs 1, 31884(3)
|
|
; CHECK-NEXT: stfs 1, 31888(3)
|
|
; CHECK-NEXT: stfs 1, 31892(3)
|
|
; CHECK-NEXT: stfs 1, 31896(3)
|
|
; CHECK-NEXT: stfs 1, 31900(3)
|
|
; CHECK-NEXT: stfs 1, 31904(3)
|
|
; CHECK-NEXT: stfs 1, 31908(3)
|
|
; CHECK-NEXT: stfs 1, 31912(3)
|
|
; CHECK-NEXT: stfs 1, 31916(3)
|
|
; CHECK-NEXT: stfs 1, 31920(3)
|
|
; CHECK-NEXT: stfs 1, 31924(3)
|
|
; CHECK-NEXT: stfs 1, 31928(3)
|
|
; CHECK-NEXT: stfs 1, 31932(3)
|
|
; CHECK-NEXT: stfs 1, 31936(3)
|
|
; CHECK-NEXT: stfs 1, 31940(3)
|
|
; CHECK-NEXT: stfs 1, 31944(3)
|
|
; CHECK-NEXT: stfs 1, 31948(3)
|
|
; CHECK-NEXT: stfs 1, 31952(3)
|
|
; CHECK-NEXT: stfs 1, 31956(3)
|
|
; CHECK-NEXT: stfs 1, 31960(3)
|
|
; CHECK-NEXT: stfs 1, 31964(3)
|
|
; CHECK-NEXT: stfs 1, 31968(3)
|
|
; CHECK-NEXT: stfs 1, 31972(3)
|
|
; CHECK-NEXT: stfs 1, 31976(3)
|
|
; CHECK-NEXT: stfs 1, 31980(3)
|
|
; CHECK-NEXT: stfs 1, 31984(3)
|
|
; CHECK-NEXT: stfs 1, 31988(3)
|
|
; CHECK-NEXT: stfs 1, 31992(3)
|
|
; CHECK-NEXT: stfs 1, 31996(3)
|
|
; CHECK-NEXT: blr
|
|
|
|
entry:
|
|
%broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
|
|
%0 = getelementptr inbounds float, float* %data, i64 %index
|
|
%1 = bitcast float* %0 to <4 x float>*
|
|
store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
|
|
%2 = getelementptr inbounds float, float* %0, i64 4
|
|
%3 = bitcast float* %2 to <4 x float>*
|
|
store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
|
|
%4 = getelementptr inbounds float, float* %0, i64 8
|
|
%5 = bitcast float* %4 to <4 x float>*
|
|
store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
|
|
%6 = getelementptr inbounds float, float* %0, i64 12
|
|
%7 = bitcast float* %6 to <4 x float>*
|
|
store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
|
|
%8 = getelementptr inbounds float, float* %0, i64 16
|
|
%9 = bitcast float* %8 to <4 x float>*
|
|
store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
|
|
%10 = getelementptr inbounds float, float* %0, i64 20
|
|
%11 = bitcast float* %10 to <4 x float>*
|
|
store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
|
|
%12 = getelementptr inbounds float, float* %0, i64 24
|
|
%13 = bitcast float* %12 to <4 x float>*
|
|
store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
|
|
%14 = getelementptr inbounds float, float* %0, i64 28
|
|
%15 = bitcast float* %14 to <4 x float>*
|
|
store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
|
|
%16 = getelementptr inbounds float, float* %0, i64 32
|
|
%17 = bitcast float* %16 to <4 x float>*
|
|
store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
|
|
%18 = getelementptr inbounds float, float* %0, i64 36
|
|
%19 = bitcast float* %18 to <4 x float>*
|
|
store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
|
|
%20 = getelementptr inbounds float, float* %0, i64 40
|
|
%21 = bitcast float* %20 to <4 x float>*
|
|
store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
|
|
%22 = getelementptr inbounds float, float* %0, i64 44
|
|
%23 = bitcast float* %22 to <4 x float>*
|
|
store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
|
|
%index.next = add nuw nsw i64 %index, 48
|
|
%24 = getelementptr inbounds float, float* %data, i64 %index.next
|
|
%25 = bitcast float* %24 to <4 x float>*
|
|
store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
|
|
%26 = getelementptr inbounds float, float* %24, i64 4
|
|
%27 = bitcast float* %26 to <4 x float>*
|
|
store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
|
|
%28 = getelementptr inbounds float, float* %24, i64 8
|
|
%29 = bitcast float* %28 to <4 x float>*
|
|
store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
|
|
%30 = getelementptr inbounds float, float* %24, i64 12
|
|
%31 = bitcast float* %30 to <4 x float>*
|
|
store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
|
|
%32 = getelementptr inbounds float, float* %24, i64 16
|
|
%33 = bitcast float* %32 to <4 x float>*
|
|
store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
|
|
%34 = getelementptr inbounds float, float* %24, i64 20
|
|
%35 = bitcast float* %34 to <4 x float>*
|
|
store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
|
|
%36 = getelementptr inbounds float, float* %24, i64 24
|
|
%37 = bitcast float* %36 to <4 x float>*
|
|
store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
|
|
%38 = getelementptr inbounds float, float* %24, i64 28
|
|
%39 = bitcast float* %38 to <4 x float>*
|
|
store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
|
|
%40 = getelementptr inbounds float, float* %24, i64 32
|
|
%41 = bitcast float* %40 to <4 x float>*
|
|
store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
|
|
%42 = getelementptr inbounds float, float* %24, i64 36
|
|
%43 = bitcast float* %42 to <4 x float>*
|
|
store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
|
|
%44 = getelementptr inbounds float, float* %24, i64 40
|
|
%45 = bitcast float* %44 to <4 x float>*
|
|
store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
|
|
%46 = getelementptr inbounds float, float* %24, i64 44
|
|
%47 = bitcast float* %46 to <4 x float>*
|
|
store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
|
|
%index.next.1 = add nuw nsw i64 %index, 96
|
|
%48 = icmp eq i64 %index.next.1, 7968
|
|
br i1 %48, label %for.body, label %vector.body
|
|
|
|
for.body: ; preds = %vector.body
|
|
%arrayidx = getelementptr inbounds float, float* %data, i64 7968
|
|
store float %d, float* %arrayidx, align 4
|
|
%arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
|
|
store float %d, float* %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
|
|
store float %d, float* %arrayidx.2, align 4
|
|
%arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
|
|
store float %d, float* %arrayidx.3, align 4
|
|
%arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
|
|
store float %d, float* %arrayidx.4, align 4
|
|
%arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
|
|
store float %d, float* %arrayidx.5, align 4
|
|
%arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
|
|
store float %d, float* %arrayidx.6, align 4
|
|
%arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
|
|
store float %d, float* %arrayidx.7, align 4
|
|
%arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
|
|
store float %d, float* %arrayidx.8, align 4
|
|
%arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
|
|
store float %d, float* %arrayidx.9, align 4
|
|
%arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
|
|
store float %d, float* %arrayidx.10, align 4
|
|
%arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
|
|
store float %d, float* %arrayidx.11, align 4
|
|
%arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
|
|
store float %d, float* %arrayidx.12, align 4
|
|
%arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
|
|
store float %d, float* %arrayidx.13, align 4
|
|
%arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
|
|
store float %d, float* %arrayidx.14, align 4
|
|
%arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
|
|
store float %d, float* %arrayidx.15, align 4
|
|
%arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
|
|
store float %d, float* %arrayidx.16, align 4
|
|
%arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
|
|
store float %d, float* %arrayidx.17, align 4
|
|
%arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
|
|
store float %d, float* %arrayidx.18, align 4
|
|
%arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
|
|
store float %d, float* %arrayidx.19, align 4
|
|
%arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
|
|
store float %d, float* %arrayidx.20, align 4
|
|
%arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
|
|
store float %d, float* %arrayidx.21, align 4
|
|
%arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
|
|
store float %d, float* %arrayidx.22, align 4
|
|
%arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
|
|
store float %d, float* %arrayidx.23, align 4
|
|
%arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
|
|
store float %d, float* %arrayidx.24, align 4
|
|
%arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
|
|
store float %d, float* %arrayidx.25, align 4
|
|
%arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
|
|
store float %d, float* %arrayidx.26, align 4
|
|
%arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
|
|
store float %d, float* %arrayidx.27, align 4
|
|
%arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
|
|
store float %d, float* %arrayidx.28, align 4
|
|
%arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
|
|
store float %d, float* %arrayidx.29, align 4
|
|
%arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
|
|
store float %d, float* %arrayidx.30, align 4
|
|
%arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
|
|
store float %d, float* %arrayidx.31, align 4
|
|
ret void
|
|
}
|