1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[RISCV] Add patterns for scalable-vector fabs & fcopysign

The patterns mostly follow the scalar counterparts, save for some extra
optimizations to match the vector/scalar forms.

The patch adds a DAGCombine for ISD::FCOPYSIGN to try and reorder
ISD::FNEG around any ISD::FP_EXTEND or ISD::FP_TRUNC of the second
operand. This helps us achieve better codegen to match vfsgnjn.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D96028
This commit is contained in:
Fraser Cormack 2021-02-04 12:07:59 +00:00
parent cc791cffbb
commit 7d1ce513fb
4 changed files with 1699 additions and 1 deletions

View File

@ -497,6 +497,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
};
if (Subtarget.hasStdExtZfh())
@ -604,6 +605,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbp()) {
setTargetDAGCombine(ISD::OR);
}
if (Subtarget.hasStdExtV())
setTargetDAGCombine(ISD::FCOPYSIGN);
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
@ -2966,6 +2969,30 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
case ISD::FCOPYSIGN: {
EVT VT = N->getValueType(0);
if (!VT.isVector())
break;
// There is a form of VFSGNJ which injects the negated sign of its second
// operand. Try and bubble any FNEG up after the extend/round to produce
// this optimized pattern. Avoid modifying cases where FP_ROUND and
// TRUNC=1.
SDValue In2 = N->getOperand(1);
// Avoid cases where the extend/round has multiple uses, as duplicating
// those is typically more expensive than removing a fneg.
if (!In2.hasOneUse())
break;
if (In2.getOpcode() != ISD::FP_EXTEND &&
(In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
break;
In2 = In2.getOperand(0);
if (In2.getOpcode() != ISD::FNEG)
break;
SDLoc DL(N);
SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
}
return SDValue();

View File

@ -642,11 +642,32 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX)
vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
// 14.10. Vector Floating-Point Sign-Injection Instructions
// 14.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg vti.RegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))),
(!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
}
// 14.11. Vector Floating-Point Compare Instructions

View File

@ -0,0 +1,185 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half>)
define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: vfabs_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
ret <vscale x 1 x half> %r
}
declare <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half>)
define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: vfabs_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
ret <vscale x 2 x half> %r
}
declare <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half>)
define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: vfabs_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
ret <vscale x 4 x half> %r
}
declare <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half>)
define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
; CHECK-LABEL: vfabs_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
ret <vscale x 8 x half> %r
}
declare <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half>)
define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
; CHECK-LABEL: vfabs_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
ret <vscale x 16 x half> %r
}
declare <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half>)
define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
; CHECK-LABEL: vfabs_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
ret <vscale x 32 x half> %r
}
declare <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float>)
define <vscale x 1 x float> @vfabs_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: vfabs_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> %v)
ret <vscale x 1 x float> %r
}
declare <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float>)
define <vscale x 2 x float> @vfabs_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: vfabs_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %v)
ret <vscale x 2 x float> %r
}
declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
define <vscale x 4 x float> @vfabs_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: vfabs_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %v)
ret <vscale x 4 x float> %r
}
declare <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float>)
define <vscale x 8 x float> @vfabs_nxv8f32(<vscale x 8 x float> %v) {
; CHECK-LABEL: vfabs_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> %v)
ret <vscale x 8 x float> %r
}
declare <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float>)
define <vscale x 16 x float> @vfabs_nxv16f32(<vscale x 16 x float> %v) {
; CHECK-LABEL: vfabs_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> %v)
ret <vscale x 16 x float> %r
}
declare <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double>)
define <vscale x 1 x double> @vfabs_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: vfabs_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> %v)
ret <vscale x 1 x double> %r
}
declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
define <vscale x 2 x double> @vfabs_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: vfabs_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %v)
ret <vscale x 2 x double> %r
}
declare <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double>)
define <vscale x 4 x double> @vfabs_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: vfabs_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %v)
ret <vscale x 4 x double> %r
}
declare <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double>)
define <vscale x 8 x double> @vfabs_nxv8f64(<vscale x 8 x double> %v) {
; CHECK-LABEL: vfabs_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
; CHECK-NEXT: ret
%r = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> %v)
ret <vscale x 8 x double> %r
}

File diff suppressed because it is too large Load Diff