1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/PowerPC/fp-strict.ll
Qiu Chaofan c9d96287d5 [SPE] Support constrained float operations on SPE
This patch enables support on SPE for constrained arithmetic and
comparison operations. This fixes bugzilla 50070.

One thing not covered is fcmp vs. fcmps on SPE. Some condition code
generates singaling comparison while some not. In this patch, all are
considered as singaling. So there might be still some issue when
compiling from C code.

Reviewed By: jhibbits

Differential Revision: https://reviews.llvm.org/D101282
2021-04-29 16:34:10 +08:00

2128 lines
73 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
define float @fadd_f32(float %f1, float %f2) #0 {
; CHECK-LABEL: fadd_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsaddsp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fadd_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fadds f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fadd_f32:
; SPE: # %bb.0:
; SPE-NEXT: efsadd r3, r3, r4
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.fadd.f32(
float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res
}
define double @fadd_f64(double %f1, double %f2) #0 {
; CHECK-LABEL: fadd_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsadddp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fadd_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fadd f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fadd_f64:
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdadd r4, r3, r5
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.fadd.f64(
double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res
}
define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvaddsp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fadd_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: fadds f0, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fadds f0, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fadds f0, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fadds f0, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fadd_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: efsadd r6, r6, r10
; SPE-NEXT: efsadd r5, r5, r9
; SPE-NEXT: efsadd r4, r4, r8
; SPE-NEXT: efsadd r3, r3, r7
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
<4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fadd_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvadddp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fadd_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fadd f2, f2, f4
; NOVSX-NEXT: fadd f1, f1, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fadd_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: evldd r4, 8(r1)
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r8, r9, r10
; SPE-NEXT: li r9, 8
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: efdadd r4, r7, r4
; SPE-NEXT: evstddx r4, r3, r9
; SPE-NEXT: efdadd r4, r5, r8
; SPE-NEXT: evstdd r4, 0(r3)
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
<2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
define float @fsub_f32(float %f1, float %f2) #0 {
; CHECK-LABEL: fsub_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xssubsp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsub_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsubs f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsub_f32:
; SPE: # %bb.0:
; SPE-NEXT: efssub r3, r3, r4
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.fsub.f32(
float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res;
}
define double @fsub_f64(double %f1, double %f2) #0 {
; CHECK-LABEL: fsub_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xssubdp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsub_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsub f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsub_f64:
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdsub r4, r3, r5
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.fsub.f64(
double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res;
}
define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fsub_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvsubsp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsub_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: fsubs f0, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fsubs f0, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fsubs f0, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fsubs f0, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsub_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: efssub r6, r6, r10
; SPE-NEXT: efssub r5, r5, r9
; SPE-NEXT: efssub r4, r4, r8
; SPE-NEXT: efssub r3, r3, r7
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
<4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res;
}
define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fsub_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvsubdp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsub_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsub f2, f2, f4
; NOVSX-NEXT: fsub f1, f1, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsub_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: evldd r4, 8(r1)
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r8, r9, r10
; SPE-NEXT: li r9, 8
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: efdsub r4, r7, r4
; SPE-NEXT: evstddx r4, r3, r9
; SPE-NEXT: efdsub r4, r5, r8
; SPE-NEXT: evstdd r4, 0(r3)
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
<2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res;
}
define float @fmul_f32(float %f1, float %f2) #0 {
; CHECK-LABEL: fmul_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmulsp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmul_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmuls f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmul_f32:
; SPE: # %bb.0:
; SPE-NEXT: efsmul r3, r3, r4
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.fmul.f32(
float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res;
}
define double @fmul_f64(double %f1, double %f2) #0 {
; CHECK-LABEL: fmul_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmuldp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmul_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmul f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmul_f64:
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdmul r4, r3, r5
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.fmul.f64(
double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res;
}
define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fmul_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmulsp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmul_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: fmuls f0, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fmuls f0, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fmuls f0, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fmuls f0, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmul_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: efsmul r6, r6, r10
; SPE-NEXT: efsmul r5, r5, r9
; SPE-NEXT: efsmul r4, r4, r8
; SPE-NEXT: efsmul r3, r3, r7
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
<4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res;
}
define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fmul_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmuldp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmul_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmul f2, f2, f4
; NOVSX-NEXT: fmul f1, f1, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmul_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: evldd r4, 8(r1)
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r8, r9, r10
; SPE-NEXT: li r9, 8
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: efdmul r4, r7, r4
; SPE-NEXT: evstddx r4, r3, r9
; SPE-NEXT: efdmul r4, r5, r8
; SPE-NEXT: evstdd r4, 0(r3)
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
<2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res;
}
define float @fdiv_f32(float %f1, float %f2) #0 {
; CHECK-LABEL: fdiv_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsdivsp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fdiv_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fdivs f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fdiv_f32:
; SPE: # %bb.0:
; SPE-NEXT: efsdiv r3, r3, r4
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.fdiv.f32(
float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res;
}
define double @fdiv_f64(double %f1, double %f2) #0 {
; CHECK-LABEL: fdiv_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsdivdp f1, f1, f2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fdiv_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fdiv f1, f1, f2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fdiv_f64:
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efddiv r4, r3, r5
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.fdiv.f64(
double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res;
}
define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fdiv_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvdivsp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fdiv_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: fdivs f0, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fdivs f0, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fdivs f0, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fdivs f0, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fdiv_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: efsdiv r6, r6, r10
; SPE-NEXT: efsdiv r5, r5, r9
; SPE-NEXT: efsdiv r4, r4, r8
; SPE-NEXT: efsdiv r3, r3, r7
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
<4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fdiv_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvdivdp v2, v2, v3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fdiv_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fdiv f2, f2, f4
; NOVSX-NEXT: fdiv f1, f1, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fdiv_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: evldd r4, 8(r1)
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r8, r9, r10
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: efddiv r4, r7, r4
; SPE-NEXT: li r7, 8
; SPE-NEXT: evstddx r4, r3, r7
; SPE-NEXT: efddiv r4, r5, r8
; SPE-NEXT: evstdd r4, 0(r3)
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
<2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
define double @no_fma_fold(double %f1, double %f2, double %f3) #0 {
; CHECK-LABEL: no_fma_fold:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmuldp f0, f1, f2
; CHECK-NEXT: xsadddp f1, f0, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: no_fma_fold:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmul f0, f1, f2
; NOVSX-NEXT: fadd f1, f0, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: no_fma_fold:
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r5, r5, r6
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdmul r3, r3, r5
; SPE-NEXT: efdadd r4, r3, r7
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: blr
%mul = call double @llvm.experimental.constrained.fmul.f64(
double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%add = call double @llvm.experimental.constrained.fadd.f64(
double %mul, double %f3,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %add
}
define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
; CHECK-LABEL: fmadd_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmaddasp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmadd_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmadds f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmadd_f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: bl fmaf
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.fma.f32(
float %f0, float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res
}
define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
; CHECK-LABEL: fmadd_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmaddadp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmadd_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmadd f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmadd_f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: evmergelo r8, r7, r8
; SPE-NEXT: evmergelo r6, r5, r6
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r6, r6
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.fma.f64(
double %f0, double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res
}
define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fmadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmaddasp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmadd_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v4, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
; NOVSX-NEXT: stvx v3, 0, r4
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: lfs f2, -52(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: lfs f2, -56(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: lfs f2, -60(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: lfs f2, -64(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmadd_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -144(r1)
; SPE-NEXT: .cfi_def_cfa_offset 144
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r21, -44
; SPE-NEXT: .cfi_offset r22, -40
; SPE-NEXT: .cfi_offset r23, -36
; SPE-NEXT: .cfi_offset r24, -32
; SPE-NEXT: .cfi_offset r25, -28
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r21, -136
; SPE-NEXT: .cfi_offset r22, -128
; SPE-NEXT: .cfi_offset r23, -120
; SPE-NEXT: .cfi_offset r24, -112
; SPE-NEXT: .cfi_offset r25, -104
; SPE-NEXT: .cfi_offset r26, -96
; SPE-NEXT: .cfi_offset r27, -88
; SPE-NEXT: .cfi_offset r28, -80
; SPE-NEXT: .cfi_offset r29, -72
; SPE-NEXT: .cfi_offset r30, -64
; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r27, r5
; SPE-NEXT: lwz r5, 164(r1)
; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r25, r3
; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r26, r4
; SPE-NEXT: mr r3, r6
; SPE-NEXT: mr r4, r10
; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r28, r7
; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r29, r8
; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r9
; SPE-NEXT: lwz r24, 152(r1)
; SPE-NEXT: lwz r23, 156(r1)
; SPE-NEXT: lwz r22, 160(r1)
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r21, r3
; SPE-NEXT: mr r3, r27
; SPE-NEXT: mr r4, r30
; SPE-NEXT: mr r5, r22
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r30, r3
; SPE-NEXT: mr r3, r26
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r23
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r29, r3
; SPE-NEXT: mr r3, r25
; SPE-NEXT: mr r4, r28
; SPE-NEXT: mr r5, r24
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r30
; SPE-NEXT: mr r6, r21
; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 148(r1)
; SPE-NEXT: addi r1, r1, 144
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fmadd_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmaddadp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmadd_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmadd f2, f2, f4, f6
; NOVSX-NEXT: fmadd f1, f1, f3, f5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmadd_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -96(r1)
; SPE-NEXT: .cfi_def_cfa_offset 96
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r26, -80
; SPE-NEXT: .cfi_offset r27, -72
; SPE-NEXT: .cfi_offset r28, -64
; SPE-NEXT: .cfi_offset r29, -56
; SPE-NEXT: .cfi_offset r30, -48
; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: evmergelo r27, r7, r8
; SPE-NEXT: evmergelo r9, r9, r10
; SPE-NEXT: evmergelo r4, r5, r6
; SPE-NEXT: mr r30, r3
; SPE-NEXT: evldd r8, 112(r1)
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r9, r9
; SPE-NEXT: mr r6, r9
; SPE-NEXT: evldd r29, 120(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: evldd r28, 104(r1)
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r26, r3, r4
; SPE-NEXT: evmergehi r3, r27, r27
; SPE-NEXT: evmergehi r5, r28, r28
; SPE-NEXT: evmergehi r7, r29, r29
; SPE-NEXT: mr r4, r27
; SPE-NEXT: mr r6, r28
; SPE-NEXT: mr r8, r29
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: li r5, 8
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: evstddx r3, r30, r5
; SPE-NEXT: evstdd r26, 0(r30)
; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 100(r1)
; SPE-NEXT: addi r1, r1, 96
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
; CHECK-LABEL: fmsub_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmsubasp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmsub_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmsubs f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmsub_f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: efsneg r5, r5
; SPE-NEXT: bl fmaf
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg float %f2
%res = call float @llvm.experimental.constrained.fma.f32(
float %f0, float %f1, float %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res
}
define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
; CHECK-LABEL: fmsub_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmsubadp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmsub_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmsub f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmsub_f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: evmergelo r6, r5, r6
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergelo r3, r7, r8
; SPE-NEXT: efdneg r8, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r6, r6
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg double %f2
%res = call double @llvm.experimental.constrained.fma.f64(
double %f0, double %f1, double %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res
}
define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fmsub_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmsubasp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmsub_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: vspltisb v5, -1
; NOVSX-NEXT: addi r3, r1, -48
; NOVSX-NEXT: addi r4, r1, -64
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: vsubfp v4, v5, v4
; NOVSX-NEXT: stvx v4, 0, r3
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -36(r1)
; NOVSX-NEXT: lfs f1, -52(r1)
; NOVSX-NEXT: lfs f2, -20(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -56(r1)
; NOVSX-NEXT: lfs f2, -24(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -40(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -60(r1)
; NOVSX-NEXT: lfs f2, -28(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -44(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -64(r1)
; NOVSX-NEXT: lfs f2, -32(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -48(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmsub_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -144(r1)
; SPE-NEXT: .cfi_def_cfa_offset 144
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r21, -44
; SPE-NEXT: .cfi_offset r22, -40
; SPE-NEXT: .cfi_offset r23, -36
; SPE-NEXT: .cfi_offset r24, -32
; SPE-NEXT: .cfi_offset r25, -28
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r21, -136
; SPE-NEXT: .cfi_offset r22, -128
; SPE-NEXT: .cfi_offset r23, -120
; SPE-NEXT: .cfi_offset r24, -112
; SPE-NEXT: .cfi_offset r25, -104
; SPE-NEXT: .cfi_offset r26, -96
; SPE-NEXT: .cfi_offset r27, -88
; SPE-NEXT: .cfi_offset r28, -80
; SPE-NEXT: .cfi_offset r29, -72
; SPE-NEXT: .cfi_offset r30, -64
; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r25, r3
; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r26, r4
; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r27, r5
; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r28, r7
; SPE-NEXT: lwz r3, 160(r1)
; SPE-NEXT: lwz r4, 152(r1)
; SPE-NEXT: lwz r5, 156(r1)
; SPE-NEXT: lwz r7, 164(r1)
; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r22, r3
; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r23, r5
; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r24, r4
; SPE-NEXT: efsneg r5, r7
; SPE-NEXT: mr r3, r6
; SPE-NEXT: mr r4, r10
; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r29, r8
; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r9
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r21, r3
; SPE-NEXT: mr r3, r27
; SPE-NEXT: mr r4, r30
; SPE-NEXT: mr r5, r22
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r30, r3
; SPE-NEXT: mr r3, r26
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r23
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r29, r3
; SPE-NEXT: mr r3, r25
; SPE-NEXT: mr r4, r28
; SPE-NEXT: mr r5, r24
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r30
; SPE-NEXT: mr r6, r21
; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 148(r1)
; SPE-NEXT: addi r1, r1, 144
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg <4 x float> %vf2
%res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
<4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fmsub_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmsubadp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fmsub_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fmsub f2, f2, f4, f6
; NOVSX-NEXT: fmsub f1, f1, f3, f5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fmsub_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -96(r1)
; SPE-NEXT: .cfi_def_cfa_offset 96
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r26, -80
; SPE-NEXT: .cfi_offset r27, -72
; SPE-NEXT: .cfi_offset r28, -64
; SPE-NEXT: .cfi_offset r29, -56
; SPE-NEXT: .cfi_offset r30, -48
; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r3
; SPE-NEXT: evldd r3, 112(r1)
; SPE-NEXT: evldd r11, 120(r1)
; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: efdneg r27, r11
; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: evmergelo r29, r7, r8
; SPE-NEXT: evmergelo r9, r9, r10
; SPE-NEXT: evmergelo r4, r5, r6
; SPE-NEXT: efdneg r8, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r9, r9
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: mr r6, r9
; SPE-NEXT: evldd r28, 104(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r26, r3, r4
; SPE-NEXT: evmergehi r3, r29, r29
; SPE-NEXT: evmergehi r5, r28, r28
; SPE-NEXT: evmergehi r7, r27, r27
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r6, r28
; SPE-NEXT: mr r8, r27
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: li r5, 8
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: evstddx r3, r30, r5
; SPE-NEXT: evstdd r26, 0(r30)
; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 100(r1)
; SPE-NEXT: addi r1, r1, 96
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg <2 x double> %vf2
%res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
<2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
; CHECK-LABEL: fnmadd_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsnmaddasp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmadd_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmadds f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: bl fmaf
; SPE-NEXT: efsneg r3, r3
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%fma = call float @llvm.experimental.constrained.fma.f32(
float %f0, float %f1, float %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg float %fma
ret float %res
}
define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
; CHECK-LABEL: fnmadd_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsnmaddadp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmadd_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmadd f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: evmergelo r8, r7, r8
; SPE-NEXT: evmergelo r6, r5, r6
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r6, r6
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdneg r4, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%fma = call double @llvm.experimental.constrained.fma.f64(
double %f0, double %f1, double %f2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg double %fma
ret double %res
}
define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fnmadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvmaddasp v4, v2, v3
; CHECK-NEXT: xvnegsp v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmadd_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: addi r4, r1, -48
; NOVSX-NEXT: stvx v4, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
; NOVSX-NEXT: stvx v3, 0, r4
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: vspltisb v2, -1
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: lfs f1, -36(r1)
; NOVSX-NEXT: lfs f2, -52(r1)
; NOVSX-NEXT: vslw v2, v2, v2
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -40(r1)
; NOVSX-NEXT: lfs f2, -56(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -44(r1)
; NOVSX-NEXT: lfs f2, -60(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: lfs f1, -48(r1)
; NOVSX-NEXT: lfs f2, -64(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v3, 0, r3
; NOVSX-NEXT: vsubfp v2, v2, v3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -144(r1)
; SPE-NEXT: .cfi_def_cfa_offset 144
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r21, -44
; SPE-NEXT: .cfi_offset r22, -40
; SPE-NEXT: .cfi_offset r23, -36
; SPE-NEXT: .cfi_offset r24, -32
; SPE-NEXT: .cfi_offset r25, -28
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r21, -136
; SPE-NEXT: .cfi_offset r22, -128
; SPE-NEXT: .cfi_offset r23, -120
; SPE-NEXT: .cfi_offset r24, -112
; SPE-NEXT: .cfi_offset r25, -104
; SPE-NEXT: .cfi_offset r26, -96
; SPE-NEXT: .cfi_offset r27, -88
; SPE-NEXT: .cfi_offset r28, -80
; SPE-NEXT: .cfi_offset r29, -72
; SPE-NEXT: .cfi_offset r30, -64
; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r27, r5
; SPE-NEXT: lwz r5, 164(r1)
; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r25, r3
; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r26, r4
; SPE-NEXT: mr r3, r6
; SPE-NEXT: mr r4, r10
; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r28, r7
; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r29, r8
; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r9
; SPE-NEXT: lwz r24, 152(r1)
; SPE-NEXT: lwz r23, 156(r1)
; SPE-NEXT: lwz r22, 160(r1)
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r21, r3
; SPE-NEXT: mr r3, r27
; SPE-NEXT: mr r4, r30
; SPE-NEXT: mr r5, r22
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r30, r3
; SPE-NEXT: mr r3, r26
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r23
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r29, r3
; SPE-NEXT: mr r3, r25
; SPE-NEXT: mr r4, r28
; SPE-NEXT: mr r5, r24
; SPE-NEXT: bl fmaf
; SPE-NEXT: efsneg r4, r29
; SPE-NEXT: efsneg r5, r30
; SPE-NEXT: efsneg r6, r21
; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
; SPE-NEXT: efsneg r3, r3
; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 148(r1)
; SPE-NEXT: addi r1, r1, 144
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg <4 x float> %fma
ret <4 x float> %res
}
define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fnmadd_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvnmaddadp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmadd_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmadd f2, f2, f4, f6
; NOVSX-NEXT: fnmadd f1, f1, f3, f5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -96(r1)
; SPE-NEXT: .cfi_def_cfa_offset 96
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r26, -80
; SPE-NEXT: .cfi_offset r27, -72
; SPE-NEXT: .cfi_offset r28, -64
; SPE-NEXT: .cfi_offset r29, -56
; SPE-NEXT: .cfi_offset r30, -48
; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: evmergelo r27, r7, r8
; SPE-NEXT: evmergelo r9, r9, r10
; SPE-NEXT: evmergelo r4, r5, r6
; SPE-NEXT: mr r30, r3
; SPE-NEXT: evldd r8, 112(r1)
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r9, r9
; SPE-NEXT: mr r6, r9
; SPE-NEXT: evldd r29, 120(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: evldd r28, 104(r1)
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r26, r3, r4
; SPE-NEXT: evmergehi r3, r27, r27
; SPE-NEXT: evmergehi r5, r28, r28
; SPE-NEXT: evmergehi r7, r29, r29
; SPE-NEXT: mr r4, r27
; SPE-NEXT: mr r6, r28
; SPE-NEXT: mr r8, r29
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: li r5, 8
; SPE-NEXT: efdneg r3, r3
; SPE-NEXT: evstddx r3, r30, r5
; SPE-NEXT: efdneg r3, r26
; SPE-NEXT: evstdd r3, 0(r30)
; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 100(r1)
; SPE-NEXT: addi r1, r1, 96
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg <2 x double> %fma
ret <2 x double> %res
}
define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
; CHECK-LABEL: fnmsub_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xsnmsubasp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmsub_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmsubs f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: efsneg r5, r5
; SPE-NEXT: bl fmaf
; SPE-NEXT: efsneg r3, r3
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg float %f2
%fma = call float @llvm.experimental.constrained.fma.f32(
float %f0, float %f1, float %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg float %fma
ret float %res
}
define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
; CHECK-LABEL: fnmsub_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xsnmsubadp f3, f1, f2
; CHECK-NEXT: fmr f1, f3
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmsub_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmsub f1, f1, f2, f3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: evmergelo r6, r5, r6
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergelo r3, r7, r8
; SPE-NEXT: efdneg r8, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r6, r6
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: efdneg r4, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg double %f2
%fma = call double @llvm.experimental.constrained.fma.f64(
double %f0, double %f1, double %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg double %fma
ret double %res
}
define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
; CHECK-LABEL: fnmsub_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvnmsubasp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmsub_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: vspltisb v5, -1
; NOVSX-NEXT: addi r3, r1, -48
; NOVSX-NEXT: addi r4, r1, -64
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v2, 0, r4
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: vsubfp v4, v5, v4
; NOVSX-NEXT: stvx v4, 0, r3
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -36(r1)
; NOVSX-NEXT: lfs f1, -52(r1)
; NOVSX-NEXT: lfs f2, -20(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -56(r1)
; NOVSX-NEXT: lfs f2, -24(r1)
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -40(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -60(r1)
; NOVSX-NEXT: lfs f2, -28(r1)
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -44(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: lfs f1, -64(r1)
; NOVSX-NEXT: lfs f2, -32(r1)
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -48(r1)
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: vsubfp v2, v5, v2
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -144(r1)
; SPE-NEXT: .cfi_def_cfa_offset 144
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r21, -44
; SPE-NEXT: .cfi_offset r22, -40
; SPE-NEXT: .cfi_offset r23, -36
; SPE-NEXT: .cfi_offset r24, -32
; SPE-NEXT: .cfi_offset r25, -28
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r21, -136
; SPE-NEXT: .cfi_offset r22, -128
; SPE-NEXT: .cfi_offset r23, -120
; SPE-NEXT: .cfi_offset r24, -112
; SPE-NEXT: .cfi_offset r25, -104
; SPE-NEXT: .cfi_offset r26, -96
; SPE-NEXT: .cfi_offset r27, -88
; SPE-NEXT: .cfi_offset r28, -80
; SPE-NEXT: .cfi_offset r29, -72
; SPE-NEXT: .cfi_offset r30, -64
; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r25, r3
; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r26, r4
; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r27, r5
; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r28, r7
; SPE-NEXT: lwz r3, 160(r1)
; SPE-NEXT: lwz r4, 152(r1)
; SPE-NEXT: lwz r5, 156(r1)
; SPE-NEXT: lwz r7, 164(r1)
; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r22, r3
; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r23, r5
; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: efsneg r24, r4
; SPE-NEXT: efsneg r5, r7
; SPE-NEXT: mr r3, r6
; SPE-NEXT: mr r4, r10
; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r29, r8
; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r9
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r21, r3
; SPE-NEXT: mr r3, r27
; SPE-NEXT: mr r4, r30
; SPE-NEXT: mr r5, r22
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r30, r3
; SPE-NEXT: mr r3, r26
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r23
; SPE-NEXT: bl fmaf
; SPE-NEXT: mr r29, r3
; SPE-NEXT: mr r3, r25
; SPE-NEXT: mr r4, r28
; SPE-NEXT: mr r5, r24
; SPE-NEXT: bl fmaf
; SPE-NEXT: efsneg r4, r29
; SPE-NEXT: efsneg r5, r30
; SPE-NEXT: efsneg r6, r21
; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
; SPE-NEXT: efsneg r3, r3
; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 148(r1)
; SPE-NEXT: addi r1, r1, 144
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg <4 x float> %vf2
%fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
<4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg <4 x float> %fma
ret <4 x float> %res
}
define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
; CHECK-LABEL: fnmsub_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvnmsubadp v4, v2, v3
; CHECK-NEXT: vmr v2, v4
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fnmsub_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fnmsub f2, f2, f4, f6
; NOVSX-NEXT: fnmsub f1, f1, f3, f5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -96(r1)
; SPE-NEXT: .cfi_def_cfa_offset 96
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r26, -24
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r26, -80
; SPE-NEXT: .cfi_offset r27, -72
; SPE-NEXT: .cfi_offset r28, -64
; SPE-NEXT: .cfi_offset r29, -56
; SPE-NEXT: .cfi_offset r30, -48
; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r3
; SPE-NEXT: evldd r3, 112(r1)
; SPE-NEXT: evldd r11, 120(r1)
; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: efdneg r27, r11
; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
; SPE-NEXT: evmergelo r29, r7, r8
; SPE-NEXT: evmergelo r9, r9, r10
; SPE-NEXT: evmergelo r4, r5, r6
; SPE-NEXT: efdneg r8, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: evmergehi r5, r9, r9
; SPE-NEXT: evmergehi r7, r8, r8
; SPE-NEXT: mr r6, r9
; SPE-NEXT: evldd r28, 104(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r26, r3, r4
; SPE-NEXT: evmergehi r3, r29, r29
; SPE-NEXT: evmergehi r5, r28, r28
; SPE-NEXT: evmergehi r7, r27, r27
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r6, r28
; SPE-NEXT: mr r8, r27
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
; SPE-NEXT: bl fma
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: li r5, 8
; SPE-NEXT: efdneg r3, r3
; SPE-NEXT: evstddx r3, r30, r5
; SPE-NEXT: efdneg r3, r26
; SPE-NEXT: evstdd r3, 0(r30)
; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 100(r1)
; SPE-NEXT: addi r1, r1, 96
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%neg = fneg <2 x double> %vf2
%fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
<2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%res = fneg <2 x double> %fma
ret <2 x double> %res
}
define float @fsqrt_f32(float %f1) #0 {
; CHECK-LABEL: fsqrt_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xssqrtsp f1, f1
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsqrt_f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsqrts f1, f1
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsqrt_f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: bl sqrtf
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call float @llvm.experimental.constrained.sqrt.f32(
float %f1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res
}
define double @fsqrt_f64(double %f1) #0 {
; CHECK-LABEL: fsqrt_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xssqrtdp f1, f1
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsqrt_f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsqrt f1, f1
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsqrt_f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -16(r1)
; SPE-NEXT: .cfi_def_cfa_offset 16
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: bl sqrt
; SPE-NEXT: evmergelo r4, r3, r4
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: lwz r0, 20(r1)
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: addi r1, r1, 16
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call double @llvm.experimental.constrained.sqrt.f64(
double %f1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res
}
define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
; CHECK-LABEL: fsqrt_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvsqrtsp v2, v2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsqrt_v4f32:
; NOVSX: # %bb.0:
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -16
; NOVSX-NEXT: lfs f0, -20(r1)
; NOVSX-NEXT: fsqrts f0, f0
; NOVSX-NEXT: stfs f0, -4(r1)
; NOVSX-NEXT: lfs f0, -24(r1)
; NOVSX-NEXT: fsqrts f0, f0
; NOVSX-NEXT: stfs f0, -8(r1)
; NOVSX-NEXT: lfs f0, -28(r1)
; NOVSX-NEXT: fsqrts f0, f0
; NOVSX-NEXT: stfs f0, -12(r1)
; NOVSX-NEXT: lfs f0, -32(r1)
; NOVSX-NEXT: fsqrts f0, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsqrt_v4f32:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -80(r1)
; SPE-NEXT: .cfi_def_cfa_offset 80
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r27, -20
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r27, -72
; SPE-NEXT: .cfi_offset r28, -64
; SPE-NEXT: .cfi_offset r29, -56
; SPE-NEXT: .cfi_offset r30, -48
; SPE-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r28, r3
; SPE-NEXT: mr r3, r6
; SPE-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r27, 8(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r29, r4
; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: mr r30, r5
; SPE-NEXT: bl sqrtf
; SPE-NEXT: mr r27, r3
; SPE-NEXT: mr r3, r30
; SPE-NEXT: bl sqrtf
; SPE-NEXT: mr r30, r3
; SPE-NEXT: mr r3, r29
; SPE-NEXT: bl sqrtf
; SPE-NEXT: mr r29, r3
; SPE-NEXT: mr r3, r28
; SPE-NEXT: bl sqrtf
; SPE-NEXT: mr r4, r29
; SPE-NEXT: mr r5, r30
; SPE-NEXT: mr r6, r27
; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r27, 8(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 84(r1)
; SPE-NEXT: addi r1, r1, 80
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
<4 x float> %vf1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
; CHECK-LABEL: fsqrt_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvsqrtdp v2, v2
; CHECK-NEXT: blr
;
; NOVSX-LABEL: fsqrt_v2f64:
; NOVSX: # %bb.0:
; NOVSX-NEXT: fsqrt f2, f2
; NOVSX-NEXT: fsqrt f1, f1
; NOVSX-NEXT: blr
;
; SPE-LABEL: fsqrt_v2f64:
; SPE: # %bb.0:
; SPE-NEXT: mflr r0
; SPE-NEXT: stw r0, 4(r1)
; SPE-NEXT: stwu r1, -64(r1)
; SPE-NEXT: .cfi_def_cfa_offset 64
; SPE-NEXT: .cfi_offset lr, 4
; SPE-NEXT: .cfi_offset r28, -16
; SPE-NEXT: .cfi_offset r29, -12
; SPE-NEXT: .cfi_offset r30, -8
; SPE-NEXT: .cfi_offset r28, -48
; SPE-NEXT: .cfi_offset r29, -40
; SPE-NEXT: .cfi_offset r30, -32
; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill
; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill
; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill
; SPE-NEXT: evmergelo r29, r7, r8
; SPE-NEXT: evmergelo r4, r5, r6
; SPE-NEXT: mr r30, r3
; SPE-NEXT: evmergehi r3, r4, r4
; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: bl sqrt
; SPE-NEXT: evmergelo r28, r3, r4
; SPE-NEXT: evmergehi r3, r29, r29
; SPE-NEXT: mr r4, r29
; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
; SPE-NEXT: bl sqrt
; SPE-NEXT: li r5, 8
; SPE-NEXT: evmergelo r3, r3, r4
; SPE-NEXT: evstddx r3, r30, r5
; SPE-NEXT: evstdd r28, 0(r30)
; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload
; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload
; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload
; SPE-NEXT: lwz r0, 68(r1)
; SPE-NEXT: addi r1, r1, 64
; SPE-NEXT: mtlr r0
; SPE-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
<2 x double> %vf1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
attributes #0 = { strictfp }