1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Fix STRICT_FRINT/STRICT_FNEARBYINT lowering

In standard C library, both rint and nearbyint returns rounding result
in current rounding mode. But nearbyint never raises inexact exception.
On PowerPC, x(v|s)r(d|s)pic may modify FPSCR XX, raising inexact
exception. So we can't select constrained fnearbyint into xvrdpic.

One exception here is xsrqpi, which will not raise inexact exception, so
fnearbyint f128 is okay here.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D87220
This commit is contained in:
Qiu Chaofan 2020-09-09 22:38:58 +08:00
parent d81eb83109
commit 6053db5a2e
4 changed files with 347 additions and 55 deletions

View File

@ -316,8 +316,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
if (Subtarget.hasVSX())
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
if (Subtarget.hasVSX()) {
setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
}
if (Subtarget.hasFSQRT()) {
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
@ -1059,7 +1061,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
@ -1073,7 +1075,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);

View File

@ -890,15 +890,15 @@ let hasSideEffects = 0 in {
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
[(set f64:$XT, (any_fnearbyint f64:$XB))]>;
[(set f64:$XT, (fnearbyint f64:$XB))]>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpic $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
[(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspic $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
[(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
// Max/Min Instructions
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
@ -2681,7 +2681,7 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_fnearbyint f32:$S)),
def : Pat<(f32 (fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
@ -2696,11 +2696,11 @@ def : Pat<(f32 (any_ftrunc f32:$S)),
def : Pat<(f32 (any_frint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
// Rounding for double precision.
def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),

View File

@ -170,12 +170,30 @@ define <2 x double> @floor_v2f64(<2 x double> %vf1) {
define double @nearbyint_f64(double %f1, double %f2) {
; P8-LABEL: nearbyint_f64:
; P8: # %bb.0:
; P8-NEXT: xsrdpic f1, f1
; P8-NEXT: mflr r0
; P8-NEXT: std r0, 16(r1)
; P8-NEXT: stdu r1, -112(r1)
; P8-NEXT: .cfi_def_cfa_offset 112
; P8-NEXT: .cfi_offset lr, 16
; P8-NEXT: bl nearbyint
; P8-NEXT: nop
; P8-NEXT: addi r1, r1, 112
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_f64:
; P9: # %bb.0:
; P9-NEXT: xsrdpic f1, f1
; P9-NEXT: mflr r0
; P9-NEXT: std r0, 16(r1)
; P9-NEXT: stdu r1, -32(r1)
; P9-NEXT: .cfi_def_cfa_offset 32
; P9-NEXT: .cfi_offset lr, 16
; P9-NEXT: bl nearbyint
; P9-NEXT: nop
; P9-NEXT: addi r1, r1, 32
; P9-NEXT: ld r0, 16(r1)
; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call double @llvm.experimental.constrained.nearbyint.f64(
double %f1,
@ -187,12 +205,104 @@ define double @nearbyint_f64(double %f1, double %f2) {
define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
; P8-LABEL: nearbyint_v4f32:
; P8: # %bb.0:
; P8-NEXT: xvrspic v2, v2
; P8-NEXT: mflr r0
; P8-NEXT: std r0, 16(r1)
; P8-NEXT: stdu r1, -176(r1)
; P8-NEXT: .cfi_def_cfa_offset 176
; P8-NEXT: .cfi_offset lr, 16
; P8-NEXT: .cfi_offset v30, -32
; P8-NEXT: .cfi_offset v31, -16
; P8-NEXT: xxsldwi vs0, v2, v2, 3
; P8-NEXT: li r3, 144
; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
; P8-NEXT: li r3, 160
; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; P8-NEXT: vmr v31, v2
; P8-NEXT: xscvspdpn f1, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
; P8-NEXT: xxsldwi vs0, v31, v31, 1
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: li r3, 128
; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
; P8-NEXT: xscvspdpn f1, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
; P8-NEXT: li r3, 128
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
; P8-NEXT: xxmrghd vs0, vs1, vs0
; P8-NEXT: xscvspdpn f1, v31
; P8-NEXT: xvcvdpsp v30, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
; P8-NEXT: xxswapd vs0, v31
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: li r3, 128
; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
; P8-NEXT: xscvspdpn f1, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
; P8-NEXT: li r3, 128
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
; P8-NEXT: li r3, 160
; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; P8-NEXT: li r3, 144
; P8-NEXT: xxmrghd vs0, vs0, vs1
; P8-NEXT: xvcvdpsp v2, vs0
; P8-NEXT: vmrgew v2, v2, v30
; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
; P8-NEXT: addi r1, r1, 176
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_v4f32:
; P9: # %bb.0:
; P9-NEXT: xvrspic v2, v2
; P9-NEXT: mflr r0
; P9-NEXT: std r0, 16(r1)
; P9-NEXT: stdu r1, -80(r1)
; P9-NEXT: .cfi_def_cfa_offset 80
; P9-NEXT: .cfi_offset lr, 16
; P9-NEXT: .cfi_offset v30, -32
; P9-NEXT: .cfi_offset v31, -16
; P9-NEXT: xxsldwi vs0, v2, v2, 3
; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
; P9-NEXT: xscvspdpn f1, vs0
; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; P9-NEXT: vmr v31, v2
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
; P9-NEXT: xxsldwi vs0, v31, v31, 1
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
; P9-NEXT: xscvspdpn f1, vs0
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: xxmrghd vs0, vs1, vs0
; P9-NEXT: xscvspdpn f1, v31
; P9-NEXT: xvcvdpsp v30, vs0
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
; P9-NEXT: xxswapd vs0, v31
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
; P9-NEXT: xscvspdpn f1, vs0
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; P9-NEXT: xxmrghd vs0, vs0, vs1
; P9-NEXT: xvcvdpsp v2, vs0
; P9-NEXT: vmrgew v2, v2, v30
; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
; P9-NEXT: addi r1, r1, 80
; P9-NEXT: ld r0, 16(r1)
; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
<4 x float> %vf1,
@ -204,12 +314,62 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
; P8-LABEL: nearbyint_v2f64:
; P8: # %bb.0:
; P8-NEXT: xvrdpic v2, v2
; P8-NEXT: mflr r0
; P8-NEXT: std r0, 16(r1)
; P8-NEXT: stdu r1, -160(r1)
; P8-NEXT: .cfi_def_cfa_offset 160
; P8-NEXT: .cfi_offset lr, 16
; P8-NEXT: .cfi_offset v31, -16
; P8-NEXT: li r3, 144
; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
; P8-NEXT: vmr v31, v2
; P8-NEXT: xxlor f1, v31, v31
; P8-NEXT: bl nearbyint
; P8-NEXT: nop
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: li r3, 128
; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
; P8-NEXT: xxswapd vs1, v31
; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; P8-NEXT: bl nearbyint
; P8-NEXT: nop
; P8-NEXT: li r3, 128
; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
; P8-NEXT: li r3, 144
; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; P8-NEXT: xxmrghd v2, vs0, vs1
; P8-NEXT: addi r1, r1, 160
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_v2f64:
; P9: # %bb.0:
; P9-NEXT: xvrdpic v2, v2
; P9-NEXT: mflr r0
; P9-NEXT: std r0, 16(r1)
; P9-NEXT: stdu r1, -64(r1)
; P9-NEXT: .cfi_def_cfa_offset 64
; P9-NEXT: .cfi_offset lr, 16
; P9-NEXT: .cfi_offset v31, -16
; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill
; P9-NEXT: vmr v31, v2
; P9-NEXT: xscpsgndp f1, v31, v31
; P9-NEXT: bl nearbyint
; P9-NEXT: nop
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
; P9-NEXT: xxswapd vs1, v31
; P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; P9-NEXT: bl nearbyint
; P9-NEXT: nop
; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload
; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: xxmrghd v2, vs0, vs1
; P9-NEXT: addi r1, r1, 64
; P9-NEXT: ld r0, 16(r1)
; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
<2 x double> %vf1,

View File

@ -4899,19 +4899,50 @@ entry:
define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xvrdpic 34, 0
; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: xxmrghd 34, 1, 0
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 34, 0
; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 1, 0
; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
@ -5010,31 +5041,72 @@ entry:
define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_nearby_v3f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -80(1)
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3)
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xsrdpic 3, 1
; PC64LE-NEXT: xvrdpic 2, 0
; PC64LE-NEXT: xxswapd 1, 2
; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha
; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha
; PC64LE-NEXT: xxmrghd 63, 0, 1
; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: fmr 3, 1
; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha
; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3)
; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l
; PC64LE9-NEXT: xsrdpic 3, 0
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 2, 0
; PC64LE9-NEXT: xxswapd 1, 2
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 0, 1
; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
@ -5047,28 +5119,86 @@ entry:
define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -80(1)
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha
; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l
; PC64LE-NEXT: lxvd2x 1, 0, 3
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xxswapd 1, 1
; PC64LE-NEXT: xvrdpic 35, 0
; PC64LE-NEXT: xvrdpic 34, 1
; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha
; PC64LE-NEXT: xxmrghd 63, 1, 0
; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: vmr 2, 31
; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: xxmrghd 35, 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l
; PC64LE9-NEXT: xvrdpic 35, 0
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvrdpic 34, 0
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 0
; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3)
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
; PC64LE9-NEXT: vmr 2, 31
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 1, 0
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(