1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Exploit VSX rounding instrs for rint

Exploit native VSX rounding instruction, x(v|s)r(d|s)pic, which does
rounding using current rounding mode.

According to C standard library, rint may raise INEXACT exception while
nearbyint won't.

Reviewed By: lkail

Differential Revision: https://reviews.llvm.org/D72685
This commit is contained in:
Qiu Chaofan 2020-02-13 20:59:50 +08:00
parent b6fc689526
commit a735dedfe5
6 changed files with 186 additions and 333 deletions

View File

@ -799,12 +799,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);

View File

@ -2563,6 +2563,14 @@ def : Pat<(f32 (fceil f32:$S)),
def : Pat<(f32 (ftrunc f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIZ
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (frint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
// Rounding for double precision.
def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
}
// Materialize a zero-vector of long long

View File

@ -11,118 +11,34 @@
define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind {
; CHECK-LE-LABEL: splat_swap:
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: mflr 0
; CHECK-LE-NEXT: std 0, 16(1)
; CHECK-LE-NEXT: stdu 1, -80(1)
; CHECK-LE-NEXT: li 3, 64
; CHECK-LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; CHECK-LE-NEXT: xvadddp 63, 34, 35
; CHECK-LE-NEXT: xxlor 1, 63, 63
; CHECK-LE-NEXT: bl rint
; CHECK-LE-NEXT: nop
; CHECK-LE-NEXT: xxswapd 0, 63
; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-LE-NEXT: li 3, 48
; CHECK-LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; CHECK-LE-NEXT: fmr 1, 0
; CHECK-LE-NEXT: bl rint
; CHECK-LE-NEXT: nop
; CHECK-LE-NEXT: li 3, 48
; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; CHECK-LE-NEXT: li 3, 64
; CHECK-LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-LE-NEXT: xxmrghd 0, 0, 1
; CHECK-LE-NEXT: xvadddp 0, 34, 35
; CHECK-LE-NEXT: xvrdpic 0, 0
; CHECK-LE-NEXT: xxswapd 1, 0
; CHECK-LE-NEXT: xssubdp 1, 1, 0
; CHECK-LE-NEXT: addi 1, 1, 80
; CHECK-LE-NEXT: ld 0, 16(1)
; CHECK-LE-NEXT: mtlr 0
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: splat_swap:
; CHECK-BE: # %bb.0:
; CHECK-BE-NEXT: mflr 0
; CHECK-BE-NEXT: std 0, 16(1)
; CHECK-BE-NEXT: stdu 1, -160(1)
; CHECK-BE-NEXT: li 3, 144
; CHECK-BE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; CHECK-BE-NEXT: xvadddp 63, 34, 35
; CHECK-BE-NEXT: xxlor 1, 63, 63
; CHECK-BE-NEXT: bl rint
; CHECK-BE-NEXT: nop
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-BE-NEXT: li 3, 128
; CHECK-BE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; CHECK-BE-NEXT: xxswapd 1, 63
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-BE-NEXT: bl rint
; CHECK-BE-NEXT: nop
; CHECK-BE-NEXT: li 3, 128
; CHECK-BE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-BE-NEXT: li 3, 144
; CHECK-BE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-BE-NEXT: xxmrghd 0, 0, 1
; CHECK-BE-NEXT: xvadddp 0, 34, 35
; CHECK-BE-NEXT: xvrdpic 0, 0
; CHECK-BE-NEXT: xxswapd 1, 0
; CHECK-BE-NEXT: xssubdp 1, 0, 1
; CHECK-BE-NEXT: addi 1, 1, 160
; CHECK-BE-NEXT: ld 0, 16(1)
; CHECK-BE-NEXT: mtlr 0
; CHECK-BE-NEXT: blr
;
; CHECK-P9LE-LABEL: splat_swap:
; CHECK-P9LE: # %bb.0:
; CHECK-P9LE-NEXT: mflr 0
; CHECK-P9LE-NEXT: std 0, 16(1)
; CHECK-P9LE-NEXT: stdu 1, -64(1)
; CHECK-P9LE-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; CHECK-P9LE-NEXT: xvadddp 63, 34, 35
; CHECK-P9LE-NEXT: xscpsgndp 1, 63, 63
; CHECK-P9LE-NEXT: bl rint
; CHECK-P9LE-NEXT: nop
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9LE-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; CHECK-P9LE-NEXT: xxswapd 1, 63
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-P9LE-NEXT: bl rint
; CHECK-P9LE-NEXT: nop
; CHECK-P9LE-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9LE-NEXT: xxmrghd 0, 0, 1
; CHECK-P9LE-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; CHECK-P9LE-NEXT: xvadddp 0, 34, 35
; CHECK-P9LE-NEXT: xvrdpic 0, 0
; CHECK-P9LE-NEXT: xxswapd 1, 0
; CHECK-P9LE-NEXT: xssubdp 1, 1, 0
; CHECK-P9LE-NEXT: addi 1, 1, 64
; CHECK-P9LE-NEXT: ld 0, 16(1)
; CHECK-P9LE-NEXT: mtlr 0
; CHECK-P9LE-NEXT: blr
;
; CHECK-P9BE-LABEL: splat_swap:
; CHECK-P9BE: # %bb.0:
; CHECK-P9BE-NEXT: mflr 0
; CHECK-P9BE-NEXT: std 0, 16(1)
; CHECK-P9BE-NEXT: stdu 1, -144(1)
; CHECK-P9BE-NEXT: stxv 63, 128(1) # 16-byte Folded Spill
; CHECK-P9BE-NEXT: xvadddp 63, 34, 35
; CHECK-P9BE-NEXT: xscpsgndp 1, 63, 63
; CHECK-P9BE-NEXT: bl rint
; CHECK-P9BE-NEXT: nop
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9BE-NEXT: stxv 1, 112(1) # 16-byte Folded Spill
; CHECK-P9BE-NEXT: xxswapd 1, 63
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-P9BE-NEXT: bl rint
; CHECK-P9BE-NEXT: nop
; CHECK-P9BE-NEXT: lxv 0, 112(1) # 16-byte Folded Reload
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9BE-NEXT: xxmrghd 0, 0, 1
; CHECK-P9BE-NEXT: lxv 63, 128(1) # 16-byte Folded Reload
; CHECK-P9BE-NEXT: xvadddp 0, 34, 35
; CHECK-P9BE-NEXT: xvrdpic 0, 0
; CHECK-P9BE-NEXT: xxswapd 1, 0
; CHECK-P9BE-NEXT: xssubdp 1, 0, 1
; CHECK-P9BE-NEXT: addi 1, 1, 144
; CHECK-P9BE-NEXT: ld 0, 16(1)
; CHECK-P9BE-NEXT: mtlr 0
; CHECK-P9BE-NEXT: blr
%added = fadd <2 x double> %x, %y
%call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone

View File

@ -559,3 +559,47 @@ entry:
}
declare float @llvm.ceil.f32(float)
define dso_local double @test_rint(double %d) local_unnamed_addr {
; BE-LABEL: test_rint:
; BE: # %bb.0: # %entry
; BE-NEXT: xsrdpic f1, f1
; BE-NEXT: blr
;
; CHECK-LABEL: test_rint:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsrdpic f1, f1
; CHECK-NEXT: blr
;
; FAST-LABEL: test_rint:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xsrdpic f1, f1
; FAST-NEXT: blr
entry:
%0 = tail call double @llvm.rint.f64(double %d)
ret double %0
}
declare double @llvm.rint.f64(double)
define dso_local float @test_rintf(float %f) local_unnamed_addr {
; BE-LABEL: test_rintf:
; BE: # %bb.0: # %entry
; BE-NEXT: xsrdpic f1, f1
; BE-NEXT: blr
;
; CHECK-LABEL: test_rintf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsrdpic f1, f1
; CHECK-NEXT: blr
;
; FAST-LABEL: test_rintf:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xsrdpic f1, f1
; FAST-NEXT: blr
entry:
%0 = tail call float @llvm.rint.f32(float %f)
ret float %0
}
declare float @llvm.rint.f32(float)

View File

@ -4748,34 +4748,20 @@ entry:
define <1 x float> @constrained_vector_rint_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_rint_v1f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -32(1)
; PC64LE-NEXT: addis 3, 2, .LCPI75_0@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI75_0@toc@l(3)
; PC64LE-NEXT: bl rintf
; PC64LE-NEXT: nop
; PC64LE-NEXT: xscvdpspn 0, 1
; PC64LE-NEXT: lfs 0, .LCPI75_0@toc@l(3)
; PC64LE-NEXT: xsrdpic 0, 0
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: addi 1, 1, 32
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v1f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -32(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI75_0@toc@ha
; PC64LE9-NEXT: lfs 1, .LCPI75_0@toc@l(3)
; PC64LE9-NEXT: bl rintf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: lfs 0, .LCPI75_0@toc@l(3)
; PC64LE9-NEXT: xsrdpic 0, 0
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: addi 1, 1, 32
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
@ -4788,50 +4774,19 @@ entry:
define <2 x double> @constrained_vector_rint_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v2f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI76_0@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI76_0@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI76_1@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI76_1@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: xxmrghd 34, 1, 0
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: addi 3, 3, .LCPI76_0@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xvrdpic 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI76_0@toc@ha
; PC64LE9-NEXT: lfd 1, .LCPI76_0@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI76_1@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfs 1, .LCPI76_1@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 1, 0
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: addi 3, 3, .LCPI76_0@toc@l
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 34, 0
; PC64LE9-NEXT: blr
entry:
%rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
@ -4844,80 +4799,50 @@ entry:
define <3 x float> @constrained_vector_rint_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_rint_v3f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill
; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -48(1)
; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI77_0@toc@l(3)
; PC64LE-NEXT: bl rintf
; PC64LE-NEXT: nop
; PC64LE-NEXT: addis 3, 2, .LCPI77_1@toc@ha
; PC64LE-NEXT: fmr 31, 1
; PC64LE-NEXT: lfs 1, .LCPI77_1@toc@l(3)
; PC64LE-NEXT: bl rintf
; PC64LE-NEXT: nop
; PC64LE-NEXT: addis 3, 2, .LCPI77_2@toc@ha
; PC64LE-NEXT: fmr 30, 1
; PC64LE-NEXT: lfs 1, .LCPI77_2@toc@l(3)
; PC64LE-NEXT: bl rintf
; PC64LE-NEXT: nop
; PC64LE-NEXT: xscvdpspn 0, 30
; PC64LE-NEXT: addis 4, 2, .LCPI77_1@toc@ha
; PC64LE-NEXT: lfs 0, .LCPI77_2@toc@l(3)
; PC64LE-NEXT: lfs 1, .LCPI77_1@toc@l(4)
; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha
; PC64LE-NEXT: xsrdpic 0, 0
; PC64LE-NEXT: lfs 2, .LCPI77_0@toc@l(3)
; PC64LE-NEXT: addis 3, 2, .LCPI77_3@toc@ha
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xsrdpic 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI77_3@toc@l
; PC64LE-NEXT: xsrdpic 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE-NEXT: xscvdpspn 0, 31
; PC64LE-NEXT: xscvdpspn 0, 2
; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE-NEXT: vmrglw 2, 2, 3
; PC64LE-NEXT: vmrglw 2, 3, 2
; PC64LE-NEXT: lvx 3, 0, 3
; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v3f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill
; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_0@toc@ha
; PC64LE9-NEXT: lfs 1, .LCPI77_0@toc@l(3)
; PC64LE9-NEXT: bl rintf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lfs 0, .LCPI77_0@toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_1@toc@ha
; PC64LE9-NEXT: fmr 31, 1
; PC64LE9-NEXT: lfs 1, .LCPI77_1@toc@l(3)
; PC64LE9-NEXT: bl rintf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI77_2@toc@ha
; PC64LE9-NEXT: fmr 30, 1
; PC64LE9-NEXT: lfs 1, .LCPI77_2@toc@l(3)
; PC64LE9-NEXT: bl rintf
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: xscvdpspn 0, 1
; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
; PC64LE9-NEXT: xscvdpspn 0, 30
; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1
; PC64LE9-NEXT: xscvdpspn 0, 31
; PC64LE9-NEXT: xsrdpic 0, 0
; PC64LE9-NEXT: lfs 2, .LCPI77_2@toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI77_3@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI77_3@toc@l
; PC64LE9-NEXT: xsrdpic 1, 1
; PC64LE9-NEXT: xsrdpic 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
; PC64LE9-NEXT: vmrglw 2, 3, 2
; PC64LE9-NEXT: lxvx 35, 0, 3
; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
; PC64LE9-NEXT: vperm 2, 4, 2, 3
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
@ -4930,72 +4855,31 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
define <3 x double> @constrained_vector_rint_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v3f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -80(1)
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI78_1@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI78_1@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI78_0@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI78_0@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI78_1@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI78_1@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: addis 3, 2, .LCPI78_2@toc@ha
; PC64LE-NEXT: xxmrghd 63, 0, 1
; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: fmr 3, 1
; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xsrdpic 3, 1
; PC64LE-NEXT: xvrdpic 2, 0
; PC64LE-NEXT: xxswapd 1, 2
; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI78_0@toc@ha
; PC64LE9-NEXT: lfd 1, .LCPI78_0@toc@l(3)
; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lfd 0, .LCPI78_0@toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI78_1@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfs 1, .LCPI78_1@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: addis 3, 2, .LCPI78_2@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 0, 1
; PC64LE9-NEXT: lfd 1, .LCPI78_2@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: addi 3, 3, .LCPI78_1@toc@l
; PC64LE9-NEXT: xsrdpic 3, 0
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 2, 0
; PC64LE9-NEXT: xxswapd 1, 2
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE9-NEXT: blr
entry:
%rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
@ -5008,86 +4892,28 @@ entry:
define <4 x double> @constrained_vector_rint_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_rint_v4f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -80(1)
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI79_0@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI79_0@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI79_1@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI79_1@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: addis 3, 2, .LCPI79_2@toc@ha
; PC64LE-NEXT: xxmrghd 63, 1, 0
; PC64LE-NEXT: lfd 1, .LCPI79_2@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI79_3@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI79_3@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: vmr 2, 31
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: xxmrghd 35, 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: addis 4, 2, .LCPI79_1@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI79_0@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addi 3, 4, .LCPI79_1@toc@l
; PC64LE-NEXT: lxvd2x 1, 0, 3
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xxswapd 1, 1
; PC64LE-NEXT: xvrdpic 34, 0
; PC64LE-NEXT: xvrdpic 35, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_rint_v4f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI79_0@toc@ha
; PC64LE9-NEXT: lfd 1, .LCPI79_0@toc@l(3)
; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addi 3, 3, .LCPI79_0@toc@l
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI79_1@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI79_1@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: addis 3, 2, .LCPI79_2@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 0
; PC64LE9-NEXT: lfd 1, .LCPI79_2@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI79_3@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI79_3@toc@l(3)
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: vmr 2, 31
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 1, 0
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: addi 3, 3, .LCPI79_1@toc@l
; PC64LE9-NEXT: xvrdpic 34, 0
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 35, 0
; PC64LE9-NEXT: blr
entry:
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(

View File

@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
; RUN: FileCheck %s --check-prefix=P9
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
; RUN: FileCheck %s
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
define dso_local <2 x double> @test_rint_v2f64(<2 x double> %d) local_unnamed_addr {
; P9-LABEL: test_rint_v2f64:
; P9: # %bb.0: # %entry
; P9-NEXT: xvrdpic v2, v2
; P9-NEXT: blr
;
; CHECK-LABEL: test_rint_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvrdpic v2, v2
; CHECK-NEXT: blr
;
; FAST-LABEL: test_rint_v2f64:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xvrdpic v2, v2
; FAST-NEXT: blr
entry:
%0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %d)
ret <2 x double> %0
}
declare <2 x double> @llvm.rint.v2f64(<2 x double>)
define dso_local <4 x float> @test_rint_v4f32(<4 x float> %d) local_unnamed_addr {
; P9-LABEL: test_rint_v4f32:
; P9: # %bb.0: # %entry
; P9-NEXT: xvrspic v2, v2
; P9-NEXT: blr
;
; CHECK-LABEL: test_rint_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvrspic v2, v2
; CHECK-NEXT: blr
;
; FAST-LABEL: test_rint_v4f32:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xvrspic v2, v2
; FAST-NEXT: blr
entry:
%0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %d)
ret <4 x float> %0
}
declare <4 x float> @llvm.rint.v4f32(<4 x float>)