1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[SelectionDAG] Support scalable splats in U(ADD|SUB)SAT combines

This patch builds on top of D106575 in which scalable-vector splats were
supported in `ISD::matchBinaryPredicate`. It teaches the DAGCombiner how
to perform a variety of the pre-existing saturating add/sub combines on
scalable-vector types.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D106652
This commit is contained in:
Fraser Cormack 2021-07-23 11:13:08 +01:00
parent 5afacc5171
commit cf6bdfc026
2 changed files with 34 additions and 56 deletions

View File

@ -10025,10 +10025,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// If it's on the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
if (ISD::isBuildVectorAllOnes(N1.getNode())) {
if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
} else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
} else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
Other = N1;
}
@ -10049,7 +10049,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
(OpLHS == CondLHS || OpRHS == CondLHS))
return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
(OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
CondLHS == OpLHS) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
@ -10070,10 +10072,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
if (ISD::isBuildVectorAllZeros(N1.getNode())) {
if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
} else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
} else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
Other = N1;
}
@ -10102,8 +10104,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
if (isa<BuildVectorSDNode>(CondRHS)) {
if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> usubsat x, C
@ -10125,13 +10129,14 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> usubsat x, C
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
APInt SplatValue;
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignMask()) {
ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
SplatValue.isSignMask()) {
// Note that we have to rebuild the RHS constant here to
// ensure we don't rely on particular values of undef lanes.
OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
OpRHS = DAG.getConstant(SplatValue, DL, VT);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
}
@ -10140,7 +10145,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
}
}
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.

View File

@ -101,10 +101,7 @@ define <vscale x 2 x i64> @vselect_sub_nxv2i64(<vscale x 2 x i64> %a0, <vscale x
; CHECK-LABEL: vselect_sub_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
; CHECK-NEXT: vmsleu.vv v0, v10, v8
; CHECK-NEXT: vsub.vv v26, v8, v10
; CHECK-NEXT: vmv.v.i v28, 0
; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0
; CHECK-NEXT: vssubu.vv v8, v8, v10
; CHECK-NEXT: ret
%cmp = icmp uge <vscale x 2 x i64> %a0, %a1
%v1 = sub <vscale x 2 x i64> %a0, %a1
@ -131,9 +128,7 @@ define <vscale x 8 x i16> @vselect_sub_2_nxv8i16(<vscale x 8 x i16> %x, i16 zero
; CHECK-LABEL: vselect_sub_2_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; CHECK-NEXT: vmsltu.vx v0, v8, a0
; CHECK-NEXT: vsub.vx v26, v8, a0
; CHECK-NEXT: vmerge.vim v8, v26, 0, v0
; CHECK-NEXT: vssubu.vx v8, v8, a0
; CHECK-NEXT: ret
entry:
%0 = insertelement <vscale x 8 x i16> undef, i16 %w, i32 0
@ -163,11 +158,9 @@ define <2 x i64> @vselect_add_const_v2i64(<2 x i64> %a0) {
define <vscale x 2 x i64> @vselect_add_const_nxv2i64(<vscale x 2 x i64> %a0) {
; CHECK-LABEL: vselect_add_const_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
; CHECK-NEXT: vadd.vi v26, v8, -6
; CHECK-NEXT: vmsgtu.vi v0, v8, 5
; CHECK-NEXT: vmv.v.i v28, 0
; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0
; CHECK-NEXT: addi a0, zero, 6
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu
; CHECK-NEXT: vssubu.vx v8, v8, a0
; CHECK-NEXT: ret
%cm1 = insertelement <vscale x 2 x i64> poison, i64 -6, i32 0
%splatcm1 = shufflevector <vscale x 2 x i64> %cm1, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@ -205,27 +198,17 @@ define <vscale x 2 x i16> @vselect_add_const_signbit_nxv2i16(<vscale x 2 x i16>
; RV32-LABEL: vselect_add_const_signbit_nxv2i16:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -2
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
; RV32-NEXT: vmsgtu.vx v0, v8, a0
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: addi a0, a0, 1
; RV32-NEXT: vadd.vx v25, v8, a0
; RV32-NEXT: vmv.v.i v26, 0
; RV32-NEXT: vmerge.vvm v8, v26, v25, v0
; RV32-NEXT: vssubu.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_add_const_signbit_nxv2i16:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -2
; RV64-NEXT: addiw a0, a0, -1
; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: lui a0, 1048568
; RV64-NEXT: addiw a0, a0, 1
; RV64-NEXT: vadd.vx v25, v8, a0
; RV64-NEXT: vmv.v.i v26, 0
; RV64-NEXT: vmerge.vvm v8, v26, v25, v0
; RV64-NEXT: vssubu.vx v8, v8, a0
; RV64-NEXT: ret
%cm1 = insertelement <vscale x 2 x i16> poison, i16 32766, i32 0
%splatcm1 = shufflevector <vscale x 2 x i16> %cm1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@ -255,12 +238,9 @@ define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) {
define <vscale x 2 x i16> @vselect_xor_const_signbit_nxv2i16(<vscale x 2 x i16> %a0) {
; CHECK-LABEL: vselect_xor_const_signbit_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vxor.vx v26, v8, a0
; CHECK-NEXT: vmerge.vvm v8, v25, v26, v0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
; CHECK-NEXT: vssubu.vx v8, v8, a0
; CHECK-NEXT: ret
%cmp = icmp slt <vscale x 2 x i16> %a0, zeroinitializer
%ins = insertelement <vscale x 2 x i16> poison, i16 -32768, i32 0
@ -291,10 +271,7 @@ define <vscale x 2 x i64> @vselect_add_nxv2i64(<vscale x 2 x i64> %a0, <vscale x
; CHECK-LABEL: vselect_add_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
; CHECK-NEXT: vadd.vv v26, v8, v10
; CHECK-NEXT: vmsleu.vv v0, v8, v26
; CHECK-NEXT: vmv.v.i v28, -1
; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0
; CHECK-NEXT: vsaddu.vv v8, v8, v10
; CHECK-NEXT: ret
%v1 = add <vscale x 2 x i64> %a0, %a1
%cmp = icmp ule <vscale x 2 x i64> %a0, %v1
@ -323,10 +300,7 @@ define <vscale x 2 x i64> @vselect_add_const_2_nxv2i64(<vscale x 2 x i64> %a0) {
; CHECK-LABEL: vselect_add_const_2_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
; CHECK-NEXT: vadd.vi v26, v8, 6
; CHECK-NEXT: vmsleu.vi v0, v8, -7
; CHECK-NEXT: vmv.v.i v28, -1
; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0
; CHECK-NEXT: vsaddu.vi v8, v8, 6
; CHECK-NEXT: ret
%cm1 = insertelement <vscale x 2 x i64> poison, i64 6, i32 0
%splatcm1 = shufflevector <vscale x 2 x i64> %cm1, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer