[Hexagon] Handle widening of vector truncate

2025-02-01 05:01:59 +01:00 · 2020-09-04 19:33:14 -05:00 · 2020-09-04 19:33:14 -05:00 · 00ccf90843
commit 00ccf90843
parent 4af70f1516
7 changed files with 99 additions and 22 deletions
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -1862,6 +1862,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
  case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
  case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
+  case HexagonISD::VPACKL:        return "HexagonISD::VPACKL";
  case HexagonISD::OP_END:        break;
  }
  return nullptr;
@ -3014,7 +3015,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
    return LowerINLINEASM(Op, DAG);

-  if (isHvxOperation(Op.getNode())) {
+  if (isHvxOperation(Op.getNode(), DAG)) {
    // If HVX lowering returns nothing, try the default lowering.
    if (SDValue V = LowerHvxOperation(Op, DAG))
      return V;
@ -3075,7 +3076,7 @@ void
 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
                                             SmallVectorImpl<SDValue> &Results,
                                             SelectionDAG &DAG) const {
-  if (isHvxOperation(N)) {
+  if (isHvxOperation(N, DAG)) {
    LowerHvxOperationWrapper(N, Results, DAG);
    if (!Results.empty())
      return;
@ -3094,7 +3095,7 @@ void
 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
                                          SmallVectorImpl<SDValue> &Results,
                                          SelectionDAG &DAG) const {
-  if (isHvxOperation(N)) {
+  if (isHvxOperation(N, DAG)) {
    ReplaceHvxNodeResults(N, Results, DAG);
    if (!Results.empty())
      return;
@ -3123,7 +3124,7 @@ HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
      const {
  if (DCI.isBeforeLegalizeOps())
    return SDValue();
-  if (isHvxOperation(N)) {
+  if (isHvxOperation(N, DCI.DAG)) {
    if (SDValue V = PerformHvxDAGCombine(N, DCI))
      return V;
    return SDValue();
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@ -88,6 +88,11 @@ enum NodeType : unsigned {
               // been loaded from address in Op2.
  VALIGNADDR,  // Align vector address: Op0 & -Op1, except when it is
               // an address in a vector load, then it's a no-op.
+  VPACKL,      // Pack low parts of the input vector to the front of the
+               // output. For example v64i16 VPACKL(v32i32) will pick
+               // the low halfwords and pack them into the first 32
+               // halfwords of the output. The rest of the output is
+               // unspecified.
  OP_END
 };

@ -476,12 +481,13 @@ private:
  SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
  SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
  SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
+  SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const;

  std::pair<const TargetRegisterClass*, uint8_t>
  findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
      const override;

-  bool isHvxOperation(SDNode *N) const;
+  bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const;
  SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
  void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                SelectionDAG &DAG) const;
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@ -233,8 +233,10 @@ HexagonTargetLowering::initializeHVXLowering() {
    for (int N = 2; N < MaxElems; N *= 2) {
      MVT VecTy = MVT::getVectorVT(ElemTy, N);
      auto Action = getPreferredVectorAction(VecTy);
-      if (Action == TargetLoweringBase::TypeWidenVector)
+      if (Action == TargetLoweringBase::TypeWidenVector) {
        setOperationAction(ISD::STORE, VecTy, Custom);
+        setOperationAction(ISD::TRUNCATE, VecTy, Custom);
+      }
    }
  }

@ -1912,6 +1914,21 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
                            MOp, ISD::UNINDEXED, false, false);
 }

+SDValue
+HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
+  const SDLoc &dl(Op);
+  MVT ResTy = ty(Op);
+  unsigned HwWidth = 8*Subtarget.getVectorLength();
+  unsigned ResWidth = ResTy.getSizeInBits();
+  assert(HwWidth % ResWidth == 0);
+
+  unsigned WideNumElem = ResTy.getVectorNumElements() * (HwWidth / ResWidth);
+  MVT WideTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideNumElem);
+  SDValue WideOp = DAG.getNode(HexagonISD::VPACKL, dl, WideTy,
+                               Op.getOperand(0));
+  return WideOp;
+}
+
 SDValue
 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
  unsigned Opc = Op.getOpcode();
@ -2020,7 +2037,14 @@ void
 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
      SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
  unsigned Opc = N->getOpcode();
+  SDValue Op(N, 0);
  switch (Opc) {
+    case ISD::TRUNCATE:
+      if (!Subtarget.isHVXVectorType(ty(Op), false)) {
+        SDValue T = WidenHvxTruncate(Op, DAG);
+        Results.push_back(T);
+      }
+      break;
    case ISD::BITCAST:
      if (isHvxBoolTy(ty(N->getOperand(0)))) {
        SDValue Op(N, 0);
@ -2058,25 +2082,38 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
 }

 bool
-HexagonTargetLowering::isHvxOperation(SDNode *N) const {
-  if (N->getOpcode() == ISD::STORE) {
-    // If it's a store-to-be-widened, treat it as an HVX operation.
-    SDValue Val = cast<StoreSDNode>(N)->getValue();
-    MVT ValTy = ty(Val);
-    if (ValTy.isVector()) {
-      auto Action = getPreferredVectorAction(ValTy);
-      if (Action == TargetLoweringBase::TypeWidenVector)
-        return true;
-    }
-  }
+HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
  // If the type of any result, or any operand type are HVX vector types,
  // this is an HVX operation.
-  auto IsHvxTy = [this] (EVT Ty) {
+  auto IsHvxTy = [this](EVT Ty) {
    return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
  };
  auto IsHvxOp = [this](SDValue Op) {
    return Op.getValueType().isSimple() &&
           Subtarget.isHVXVectorType(ty(Op), true);
  };
-  return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp);
+  if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
+    return true;
+
+  // Check if this could be an HVX operation after type widening.
+  auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
+    if (!Op.getValueType().isSimple())
+      return false;
+    MVT ValTy = ty(Op);
+    if (ValTy.isVector()) {
+      auto Action = getPreferredVectorAction(ValTy);
+      if (Action == TargetLoweringBase::TypeWidenVector) {
+        EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy);
+        assert(WideTy.isSimple());
+        return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true);
+      }
+    }
+    return false;
+  };
+
+  for (int i = 0, e = N->getNumValues(); i != e; ++i) {
+    if (IsWidenedToHvx(SDValue(N, i)))
+      return true;
+  }
+  return llvm::any_of(N->ops(), IsWidenedToHvx);
 }
--- a/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/lib/Target/Hexagon/HexagonPatternsHVX.td
@ -1,3 +1,6 @@
+def SDTVecUnaryOp:
+  SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+
 def SDTVecBinOp:
  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;

@ -37,6 +40,7 @@ def HexagonVZERO:      SDNode<"HexagonISD::VZERO",      SDTVecLeaf>;
 def HexagonQCAT:       SDNode<"HexagonISD::QCAT",       SDTVecBinOp>;
 def HexagonQTRUE:      SDNode<"HexagonISD::QTRUE",      SDTVecLeaf>;
 def HexagonQFALSE:     SDNode<"HexagonISD::QFALSE",     SDTVecLeaf>;
+def HexagonVPACKL:     SDNode<"HexagonISD::VPACKL",     SDTVecUnaryOp>;

 def vzero:  PatFrag<(ops), (HexagonVZERO)>;
 def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
@ -44,7 +48,8 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
 def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
                    (HexagonQCAT node:$Qs, node:$Qt)>;

-def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+def qnot:   PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;

 def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb  $Vs)>;
 def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>;
@ -401,6 +406,10 @@ let Predicates = [UseHVX] in {
  def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
  def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;

+  def: Pat<(VecI8  (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>;
+  def: Pat<(VecI8  (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>;
+  def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
+
  def: Pat<(VecI16 (bswap HVI16:$Vs)),
           (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
  def: Pat<(VecI32 (bswap HVI32:$Vs)),
--- a/test/CodeGen/Hexagon/autohvx/isel-truncate.ll
+++ b/test/CodeGen/Hexagon/autohvx/isel-truncate.ll
@ -1,7 +1,7 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s

 ; Check that this compiles successfully.
-; CHECK: vpacke
+; CHECK: vdeal

 target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon"
@ -17,4 +17,4 @@ b0:
  ret void
 }

-attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65" }
--- a/test/CodeGen/Hexagon/autohvx/short-store-widen.ll
+++ b/test/CodeGen/Hexagon/autohvx/short-store-widen.ll
--- a/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll
+++ b/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll
@ -0,0 +1,24 @@
+; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
+
+; Check for successful compilation.
+; Expect that the truncate to v32i8 is lowered to vdeale.
+
+; CHECK-LABEL: f0:
+; CHECK: vdeale
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define dllexport void @f0(<32 x i32> %a0) local_unnamed_addr #0 {
+b0:
+  %v0 = trunc <32 x i32> %a0 to <32 x i8>
+  %v1 = shufflevector <32 x i8> %v0, <32 x i8> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* undef, i32 128, <128 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1
+
+attributes #0 = { "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }
+attributes #1 = { argmemonly nounwind willreturn }