mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[RISCV] Add patterns for scalable-vector fabs & fcopysign
The patterns mostly follow the scalar counterparts, save for some extra optimizations to match the vector/scalar forms. The patch adds a DAGCombine for ISD::FCOPYSIGN to try and reorder ISD::FNEG around any ISD::FP_EXTEND or ISD::FP_TRUNC of the second operand. This helps us achieve better codegen to match vfsgnjn. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D96028
This commit is contained in:
parent
cc791cffbb
commit
7d1ce513fb
@ -497,6 +497,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
|
||||
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
|
||||
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
|
||||
};
|
||||
|
||||
if (Subtarget.hasStdExtZfh())
|
||||
@ -604,6 +605,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
if (Subtarget.hasStdExtZbp()) {
|
||||
setTargetDAGCombine(ISD::OR);
|
||||
}
|
||||
if (Subtarget.hasStdExtV())
|
||||
setTargetDAGCombine(ISD::FCOPYSIGN);
|
||||
}
|
||||
|
||||
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
|
||||
@ -2966,6 +2969,30 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::FCOPYSIGN: {
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!VT.isVector())
|
||||
break;
|
||||
// There is a form of VFSGNJ which injects the negated sign of its second
|
||||
// operand. Try and bubble any FNEG up after the extend/round to produce
|
||||
// this optimized pattern. Avoid modifying cases where FP_ROUND and
|
||||
// TRUNC=1.
|
||||
SDValue In2 = N->getOperand(1);
|
||||
// Avoid cases where the extend/round has multiple uses, as duplicating
|
||||
// those is typically more expensive than removing a fneg.
|
||||
if (!In2.hasOneUse())
|
||||
break;
|
||||
if (In2.getOpcode() != ISD::FP_EXTEND &&
|
||||
(In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
|
||||
break;
|
||||
In2 = In2.getOperand(0);
|
||||
if (In2.getOpcode() != ISD::FNEG)
|
||||
break;
|
||||
SDLoc DL(N);
|
||||
SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
|
||||
return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
|
||||
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -642,11 +642,32 @@ foreach vti = AllFloatVectors in {
|
||||
(!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
|
||||
|
||||
// 14.10. Vector Floating-Point Sign-Injection Instructions
|
||||
// 14.12. Vector Floating-Point Sign-Injection Instructions
|
||||
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
|
||||
(!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
|
||||
// Handle fneg with VFSGNJN using the same input for both operands.
|
||||
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
|
||||
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.SEW)>;
|
||||
|
||||
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Vector vti.RegClass:$rs2))),
|
||||
(!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
|
||||
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))),
|
||||
(!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
|
||||
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
|
||||
|
||||
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Vector (fneg vti.RegClass:$rs2)))),
|
||||
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.SEW)>;
|
||||
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))),
|
||||
(!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
|
||||
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.SEW)>;
|
||||
}
|
||||
|
||||
// 14.11. Vector Floating-Point Compare Instructions
|
||||
|
185
test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
Normal file
185
test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
Normal file
@ -0,0 +1,185 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half>)
|
||||
|
||||
define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv1f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
|
||||
ret <vscale x 1 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half>)
|
||||
|
||||
define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
|
||||
ret <vscale x 2 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half>)
|
||||
|
||||
define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
|
||||
ret <vscale x 4 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half>)
|
||||
|
||||
define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
|
||||
ret <vscale x 8 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half>)
|
||||
|
||||
define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
|
||||
ret <vscale x 16 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half>)
|
||||
|
||||
define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
|
||||
ret <vscale x 32 x half> %r
|
||||
}
|
||||
|
||||
declare <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float>)
|
||||
|
||||
define <vscale x 1 x float> @vfabs_nxv1f32(<vscale x 1 x float> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv1f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> %v)
|
||||
ret <vscale x 1 x float> %r
|
||||
}
|
||||
|
||||
declare <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float>)
|
||||
|
||||
define <vscale x 2 x float> @vfabs_nxv2f32(<vscale x 2 x float> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv2f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %v)
|
||||
ret <vscale x 2 x float> %r
|
||||
}
|
||||
|
||||
declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
|
||||
|
||||
define <vscale x 4 x float> @vfabs_nxv4f32(<vscale x 4 x float> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %v)
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
declare <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float>)
|
||||
|
||||
define <vscale x 8 x float> @vfabs_nxv8f32(<vscale x 8 x float> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> %v)
|
||||
ret <vscale x 8 x float> %r
|
||||
}
|
||||
|
||||
declare <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float>)
|
||||
|
||||
define <vscale x 16 x float> @vfabs_nxv16f32(<vscale x 16 x float> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> %v)
|
||||
ret <vscale x 16 x float> %r
|
||||
}
|
||||
|
||||
declare <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double>)
|
||||
|
||||
define <vscale x 1 x double> @vfabs_nxv1f64(<vscale x 1 x double> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv1f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> %v)
|
||||
ret <vscale x 1 x double> %r
|
||||
}
|
||||
|
||||
declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
|
||||
|
||||
define <vscale x 2 x double> @vfabs_nxv2f64(<vscale x 2 x double> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %v)
|
||||
ret <vscale x 2 x double> %r
|
||||
}
|
||||
|
||||
declare <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double>)
|
||||
|
||||
define <vscale x 4 x double> @vfabs_nxv4f64(<vscale x 4 x double> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %v)
|
||||
ret <vscale x 4 x double> %r
|
||||
}
|
||||
|
||||
declare <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double>)
|
||||
|
||||
define <vscale x 8 x double> @vfabs_nxv8f64(<vscale x 8 x double> %v) {
|
||||
; CHECK-LABEL: vfabs_nxv8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vfsgnjx.vv v8, v8, v8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> %v)
|
||||
ret <vscale x 8 x double> %r
|
||||
}
|
1465
test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
Normal file
1465
test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user