1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/PowerPC/repeated-fp-divisors.ll
Qiu Chaofan 1223486e31 [PowerPC] Require nsz flag for c-a*b to FNMSUB
On PowerPC, FNMSUB (both VSX and non-VSX version) means -(a*b-c). But
the backend used to generate these instructions regardless whether nsz
flag exists or not. If a*b-c==0, such transformation changes sign of
zero.

This patch introduces PPC specific FNMSUB ISD opcode, which may help
improving combined FMA code sequence.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D76585
2020-06-04 16:41:27 +08:00

28 lines
1.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s
define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
; CHECK-LABEL: repeated_fp_divisor:
; CHECK: # %bb.0:
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: lvx 3, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
; CHECK-NEXT: lvx 4, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvresp 1, 0
; CHECK-NEXT: xvnmsubasp 35, 0, 1
; CHECK-NEXT: xvmulsp 0, 34, 36
; CHECK-NEXT: xvmaddasp 1, 1, 35
; CHECK-NEXT: xvmulsp 34, 0, 1
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
%t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
%mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
ret <4 x float> %mul
}