mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
3f23d4b8c3
tryLatency compares two sched candidates. For the top zone it prefers the one with lesser depth, but only if that depth is greater than the total latency of the instructions we've already scheduled -- otherwise its latency would be hidden and there would be no stall. Unfortunately it only tests the depth of one of the candidates. This can lead to situations where the TopDepthReduce heuristic does not kick in, but a lower priority heuristic chooses the other candidate, whose depth *is* greater than the already scheduled latency, which causes a stall. The fix is to apply the heuristic if the depth of *either* candidate is greater than the already scheduled latency. All this also applies to the BotHeightReduce heuristic in the bottom zone. Differential Revision: https://reviews.llvm.org/D72392
135 lines
4.4 KiB
LLVM
135 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P9BE
|
|
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P9LE
|
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P8BE
|
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P8LE
|
|
define dso_local <2 x double> @test1(<8 x i16> %a) {
|
|
; P9BE-LABEL: test1:
|
|
; P9BE: # %bb.0: # %entry
|
|
; P9BE-NEXT: li r3, 0
|
|
; P9BE-NEXT: vextuhlx r3, r3, v2
|
|
; P9BE-NEXT: clrlwi r3, r3, 16
|
|
; P9BE-NEXT: mtfprwz f0, r3
|
|
; P9BE-NEXT: li r3, 2
|
|
; P9BE-NEXT: vextuhlx r3, r3, v2
|
|
; P9BE-NEXT: xscvuxddp f0, f0
|
|
; P9BE-NEXT: clrlwi r3, r3, 16
|
|
; P9BE-NEXT: mtfprwz f1, r3
|
|
; P9BE-NEXT: xscvuxddp f1, f1
|
|
; P9BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P9BE-NEXT: blr
|
|
;
|
|
; P9LE-LABEL: test1:
|
|
; P9LE: # %bb.0: # %entry
|
|
; P9LE-NEXT: li r3, 0
|
|
; P9LE-NEXT: vextuhrx r3, r3, v2
|
|
; P9LE-NEXT: clrlwi r3, r3, 16
|
|
; P9LE-NEXT: mtfprwz f0, r3
|
|
; P9LE-NEXT: li r3, 2
|
|
; P9LE-NEXT: vextuhrx r3, r3, v2
|
|
; P9LE-NEXT: xscvuxddp f0, f0
|
|
; P9LE-NEXT: clrlwi r3, r3, 16
|
|
; P9LE-NEXT: mtfprwz f1, r3
|
|
; P9LE-NEXT: xscvuxddp f1, f1
|
|
; P9LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P9LE-NEXT: blr
|
|
;
|
|
; P8BE-LABEL: test1:
|
|
; P8BE: # %bb.0: # %entry
|
|
; P8BE-NEXT: mfvsrd r3, v2
|
|
; P8BE-NEXT: rldicl r4, r3, 16, 48
|
|
; P8BE-NEXT: rldicl r3, r3, 32, 48
|
|
; P8BE-NEXT: clrlwi r4, r4, 16
|
|
; P8BE-NEXT: clrlwi r3, r3, 16
|
|
; P8BE-NEXT: mtfprwz f0, r4
|
|
; P8BE-NEXT: mtfprwz f1, r3
|
|
; P8BE-NEXT: xscvuxddp f0, f0
|
|
; P8BE-NEXT: xscvuxddp f1, f1
|
|
; P8BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P8BE-NEXT: blr
|
|
;
|
|
; P8LE-LABEL: test1:
|
|
; P8LE: # %bb.0: # %entry
|
|
; P8LE-NEXT: xxswapd vs0, v2
|
|
; P8LE-NEXT: mffprd r3, f0
|
|
; P8LE-NEXT: clrldi r4, r3, 48
|
|
; P8LE-NEXT: rldicl r3, r3, 48, 48
|
|
; P8LE-NEXT: clrlwi r4, r4, 16
|
|
; P8LE-NEXT: clrlwi r3, r3, 16
|
|
; P8LE-NEXT: mtfprwz f0, r4
|
|
; P8LE-NEXT: mtfprwz f1, r3
|
|
; P8LE-NEXT: xscvuxddp f0, f0
|
|
; P8LE-NEXT: xscvuxddp f1, f1
|
|
; P8LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P8LE-NEXT: blr
|
|
entry:
|
|
%vecext = extractelement <8 x i16> %a, i32 0
|
|
%conv = uitofp i16 %vecext to double
|
|
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
|
%vecext1 = extractelement <8 x i16> %a, i32 1
|
|
%conv2 = uitofp i16 %vecext1 to double
|
|
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
|
ret <2 x double> %vecinit3
|
|
}
|
|
|
|
define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
|
|
; P9BE-LABEL: test2:
|
|
; P9BE: # %bb.0: # %entry
|
|
; P9BE-NEXT: xxextractuw f0, v2, 0
|
|
; P9BE-NEXT: xxextractuw f1, v3, 4
|
|
; P9BE-NEXT: xscvuxddp f0, f0
|
|
; P9BE-NEXT: xscvuxddp f1, f1
|
|
; P9BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P9BE-NEXT: blr
|
|
;
|
|
; P9LE-LABEL: test2:
|
|
; P9LE: # %bb.0: # %entry
|
|
; P9LE-NEXT: xxextractuw f0, v2, 12
|
|
; P9LE-NEXT: xxextractuw f1, v3, 8
|
|
; P9LE-NEXT: xscvuxddp f0, f0
|
|
; P9LE-NEXT: xscvuxddp f1, f1
|
|
; P9LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P9LE-NEXT: blr
|
|
;
|
|
; P8BE-LABEL: test2:
|
|
; P8BE: # %bb.0: # %entry
|
|
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; P8BE-NEXT: mfvsrwz r4, v3
|
|
; P8BE-NEXT: mtfprwz f1, r4
|
|
; P8BE-NEXT: mffprwz r3, f0
|
|
; P8BE-NEXT: xscvuxddp f1, f1
|
|
; P8BE-NEXT: mtfprwz f0, r3
|
|
; P8BE-NEXT: xscvuxddp f0, f0
|
|
; P8BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P8BE-NEXT: blr
|
|
;
|
|
; P8LE-LABEL: test2:
|
|
; P8LE: # %bb.0: # %entry
|
|
; P8LE-NEXT: xxswapd vs0, v2
|
|
; P8LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; P8LE-NEXT: mffprwz r3, f0
|
|
; P8LE-NEXT: mffprwz r4, f1
|
|
; P8LE-NEXT: mtfprwz f0, r3
|
|
; P8LE-NEXT: mtfprwz f1, r4
|
|
; P8LE-NEXT: xscvuxddp f0, f0
|
|
; P8LE-NEXT: xscvuxddp f1, f1
|
|
; P8LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P8LE-NEXT: blr
|
|
entry:
|
|
%vecext = extractelement <4 x i32> %a, i32 0
|
|
%conv = uitofp i32 %vecext to double
|
|
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
|
%vecext1 = extractelement <4 x i32> %b, i32 1
|
|
%conv2 = uitofp i32 %vecext1 to double
|
|
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
|
ret <2 x double> %vecinit3
|
|
}
|