1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Exploit vnmsubfp instruction

On PowerPC, we have vnmsubfp Altivec instruction for fnmsub operation on
v4f32 type. Default pattern for this instruction never works since we
don't have legal fneg for v4f32 when VSX disabled.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D80617
This commit is contained in:
Qiu Chaofan 2020-06-14 23:19:17 +08:00
parent e59e06d663
commit d9107e9132
4 changed files with 6 additions and 10 deletions

View File

@ -16287,8 +16287,7 @@ SDValue PPCTargetLowering::combineFMALike(SDNode *N,
SDLoc Loc(N);
// TODO: QPX subtarget is deprecated. No transformation here.
if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT) ||
(VT.isVector() && !Subtarget.hasVSX()))
if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
return SDValue();
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0

View File

@ -1024,6 +1024,9 @@ def : Pat<(fmul v4f32:$vA, v4f32:$vB),
(VMADDFP $vA, $vB,
(v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
(VNMSUBFP $A, $B, $C)>;
def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
(VMADDFP $A, $B, $C)>;
def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),

View File

@ -304,10 +304,7 @@ define <4 x float> @test_fast_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
;
; NO-VSX-LABEL: test_fast_neg_fma_v4f32:
; NO-VSX: # %bb.0: # %entry
; NO-VSX-NEXT: vspltisb 5, -1
; NO-VSX-NEXT: vslw 5, 5, 5
; NO-VSX-NEXT: vsubfp 2, 5, 2
; NO-VSX-NEXT: vmaddfp 2, 2, 3, 4
; NO-VSX-NEXT: vnmsubfp 2, 2, 3, 4
; NO-VSX-NEXT: blr
<4 x float> %c) {
entry:

View File

@ -679,12 +679,9 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-P7: # %bb.0:
; CHECK-P7-NEXT: vspltisw 4, -1
; CHECK-P7-NEXT: vrefp 5, 3
; CHECK-P7-NEXT: vspltisb 0, -1
; CHECK-P7-NEXT: vslw 0, 0, 0
; CHECK-P7-NEXT: vslw 4, 4, 4
; CHECK-P7-NEXT: vsubfp 3, 0, 3
; CHECK-P7-NEXT: vmaddfp 4, 2, 5, 4
; CHECK-P7-NEXT: vmaddfp 2, 3, 4, 2
; CHECK-P7-NEXT: vnmsubfp 2, 3, 4, 2
; CHECK-P7-NEXT: vmaddfp 2, 5, 2, 4
; CHECK-P7-NEXT: blr
;