1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[Hexagon] Handle selection between HVX vector predicates

Make sure that (select i1 q0 q1) is handled properly.
This commit is contained in:
Krzysztof Parzyszek 2020-10-23 18:05:06 -05:00
parent 77ffdb3e3c
commit 28d3c031c0
5 changed files with 336 additions and 18 deletions

View File

@ -477,6 +477,7 @@ private:
SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const;

View File

@ -94,6 +94,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::MUL, T, Legal);
setOperationAction(ISD::CTPOP, T, Legal);
setOperationAction(ISD::CTLZ, T, Legal);
setOperationAction(ISD::SELECT, T, Legal);
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
if (T != ByteV) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
@ -211,6 +212,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
setOperationAction(ISD::SELECT, BoolV, Custom);
setOperationAction(ISD::AND, BoolV, Legal);
setOperationAction(ISD::OR, BoolV, Legal);
setOperationAction(ISD::XOR, BoolV, Legal);
@ -1619,6 +1621,26 @@ HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(0));
}
SDValue
HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
if (ResTy.getVectorElementType() != MVT::i1)
return Op;
const SDLoc &dl(Op);
unsigned HwLen = Subtarget.getVectorLength();
unsigned VecLen = ResTy.getVectorNumElements();
assert(HwLen % VecLen == 0);
unsigned ElemSize = HwLen / VecLen;
MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
SDValue S =
DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
}
SDValue
HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
if (SDValue S = getVectorShiftByInt(Op, DAG))
@ -2031,6 +2053,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
case ISD::SELECT: return LowerHvxSelect(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerHvxShift(Op, DAG);
@ -2143,27 +2166,41 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
if (DCI.isBeforeLegalizeOps())
return SDValue();
SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end());
switch (Opc) {
case ISD::VSELECT: {
// (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
SDValue Cond = Op.getOperand(0);
SDValue Cond = Ops[0];
if (Cond->getOpcode() == ISD::XOR) {
SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
if (C1->getOpcode() == HexagonISD::QTRUE)
return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
Op.getOperand(2), Op.getOperand(1));
return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
}
break;
}
case HexagonISD::V2Q:
if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
: DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
}
break;
case HexagonISD::Q2V:
if (Ops[0].getOpcode() == HexagonISD::QTRUE)
return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
DAG.getConstant(-1, dl, MVT::i32));
if (Ops[0].getOpcode() == HexagonISD::QFALSE)
return getZero(dl, ty(Op), DAG);
break;
case HexagonISD::VINSERTW0:
if (isUndef(Op.getOperand(1)))
return Op.getOperand(0);
if (isUndef(Ops[1]))
return Ops[0];;
break;
case HexagonISD::VROR: {
SDValue Op0 = Op.getOperand(0);
if (Op0.getOpcode() == HexagonISD::VROR) {
SDValue Vec = Op0.getOperand(0);
SDValue Rot0 = Op.getOperand(1), Rot1 = Op0.getOperand(1);
if (Ops[0].getOpcode() == HexagonISD::VROR) {
SDValue Vec = Ops[0].getOperand(0);
SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
}

View File

@ -172,16 +172,19 @@ let Predicates = [UseHVX] in {
}
let Predicates = [UseHVX] in {
def: Pat<(VecI8 vzero), (V6_vd0)>;
def: Pat<(VecI16 vzero), (V6_vd0)>;
def: Pat<(VecI32 vzero), (V6_vd0)>;
def: Pat<(VecPI8 vzero), (PS_vdd0)>;
def: Pat<(VecPI16 vzero), (PS_vdd0)>;
def: Pat<(VecPI32 vzero), (PS_vdd0)>;
let AddedComplexity = 100 in {
// These should be preferred over a vsplat of 0.
def: Pat<(VecI8 vzero), (V6_vd0)>;
def: Pat<(VecI16 vzero), (V6_vd0)>;
def: Pat<(VecI32 vzero), (V6_vd0)>;
def: Pat<(VecPI8 vzero), (PS_vdd0)>;
def: Pat<(VecPI16 vzero), (PS_vdd0)>;
def: Pat<(VecPI32 vzero), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
}
def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
(Combinev HvxVR:$Vt, HvxVR:$Vs)>;

View File

@ -0,0 +1,40 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; REQUIRES: asserts
; Check that this doesn't crash.
; CHECK: vand
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
%s.0 = type { [4 x <32 x i32>] }
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #0
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #0
declare <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32>, <32 x i32>, i32) #0
declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #0
; Function Attrs: nounwind
define void @f0() local_unnamed_addr #1 {
b0:
%v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> undef, i32 16843009)
%v1 = getelementptr inbounds %s.0, %s.0* null, i32 0, i32 0, i32 3
br label %b1
b1: ; preds = %b1, %b0
%v2 = phi i32 [ 0, %b0 ], [ %v11, %b1 ]
%v3 = and i32 %v2, 1
%v4 = icmp eq i32 %v3, 0
%v5 = select i1 %v4, <128 x i1> zeroinitializer, <128 x i1> %v0
%v6 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
%v7 = tail call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> undef, <32 x i32> %v6, i32 -32)
%v8 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v7)
%v9 = tail call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> undef, <32 x i32> %v8, i32 -32)
%v10 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v9)
store <32 x i32> %v10, <32 x i32>* %v1, align 128
%v11 = add nuw nsw i32 %v2, 1
br label %b1
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }

View File

@ -0,0 +1,237 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that selection (based on i1) between vector predicates works.
define <128 x i8> @f0(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, i32 %a4) #0 {
; CHECK-LABEL: f0:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.b,v1.b)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q1 = vcmp.gt(v1.b,v2.b)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vand(q1,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vand(q0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v0 = v2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q3 = vand(v0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q3,v1,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = icmp sgt <128 x i8> %a0, %a1
%v1 = icmp sgt <128 x i8> %a1, %a2
%v2 = icmp sgt i32 %a4, 0
%v3 = select i1 %v2, <128 x i1> %v0, <128 x i1> %v1
%v4 = select <128 x i1> %v3, <128 x i8> %a1, <128 x i8> %a3
ret <128 x i8> %v4
}
define <64 x i16> @f1(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2, <64 x i16> %a3, i32 %a4) #0 {
; CHECK-LABEL: f1:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q1 = vcmp.gt(v1.h,v2.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vand(q1,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vand(q0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v0 = v2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q3 = vand(v0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q3,v1,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = icmp sgt <64 x i16> %a0, %a1
%v1 = icmp sgt <64 x i16> %a1, %a2
%v2 = icmp sgt i32 %a4, 0
%v3 = select i1 %v2, <64 x i1> %v0, <64 x i1> %v1
%v4 = select <64 x i1> %v3, <64 x i16> %a1, <64 x i16> %a3
ret <64 x i16> %v4
}
define <32 x i32> @f2(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2, <32 x i32> %a3, i32 %a4) #0 {
; CHECK-LABEL: f2:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q1 = vcmp.gt(v1.w,v2.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vand(q1,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vand(q0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v0 = v2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q3 = vand(v0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q3,v1,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = icmp sgt <32 x i32> %a0, %a1
%v1 = icmp sgt <32 x i32> %a1, %a2
%v2 = icmp sgt i32 %a4, 0
%v3 = select i1 %v2, <32 x i1> %v0, <32 x i1> %v1
%v4 = select <32 x i1> %v3, <32 x i32> %a1, <32 x i32> %a3
ret <32 x i32> %v4
}
; Selection of vector predicates first converts them into regular vectors.
; Check that all-true and all-false bool vectors are optimized into splat(-1)
; and vxor(v,v).
define <128 x i8> @f3(<128 x i8> %a0, <128 x i8> %a1, i32 %a2) #0 {
; CHECK-LABEL: f3:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vxor(v2,v2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3 = vsplat(r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v2 = v3
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = insertelement <128 x i1> undef, i1 true, i32 0
%v1 = shufflevector <128 x i1> %v0, <128 x i1> undef, <128 x i32> zeroinitializer
%v2 = icmp sgt i32 %a2, 0
%v3 = select i1 %v2, <128 x i1> %v1, <128 x i1> zeroinitializer
%v4 = select <128 x i1> %v3, <128 x i8> %a0, <128 x i8> %a1
ret <128 x i8> %v4
}
define <64 x i16> @f4(<64 x i16> %a0, <64 x i16> %a1, i32 %a2) #0 {
; CHECK-LABEL: f4:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vxor(v2,v2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3 = vsplat(r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v2 = v3
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = insertelement <64 x i1> undef, i1 true, i32 0
%v1 = shufflevector <64 x i1> %v0, <64 x i1> undef, <64 x i32> zeroinitializer
%v2 = icmp sgt i32 %a2, 0
%v3 = select i1 %v2, <64 x i1> %v1, <64 x i1> zeroinitializer
%v4 = select <64 x i1> %v3, <64 x i16> %a0, <64 x i16> %a1
ret <64 x i16> %v4
}
define <32 x i32> @f5(<32 x i32> %a0, <32 x i32> %a1, i32 %a2) #0 {
; CHECK-LABEL: f5:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #-1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.gt(r0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vxor(v2,v2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3 = vsplat(r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) v2 = v3
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = insertelement <32 x i1> undef, i1 true, i32 0
%v1 = shufflevector <32 x i1> %v0, <32 x i1> undef, <32 x i32> zeroinitializer
%v2 = icmp sgt i32 %a2, 0
%v3 = select i1 %v2, <32 x i1> %v1, <32 x i1> zeroinitializer
%v4 = select <32 x i1> %v3, <32 x i32> %a0, <32 x i32> %a1
ret <32 x i32> %v4
}
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b,-packets" }