[LegalizeTypes] Teach PromoteIntRes_BITCAST to better handle a bitcast with vector output type and a vector input type that needs to be widened

Summary: Previously if we had a bitcast vector output type that needs promotion and a vector input type that needs widening we would just do a stack store and load to handle the conversion. We can do a little better if we can widen the bitcast to a legal vector type the same size as the widened input type. Then we can do the bitcast between this widened type and the widened input type. Afterwards we can extract_subvector back to the original output and any_extend that. Type legalization will then circle back and handle promotion of the extract_subvector and the any_extend will just be removed. This will avoid going through the stack and allows us to remove a custom version of this legalization from X86. Reviewers: efriedma, RKSimon Reviewed By: efriedma Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D53229 llvm-svn: 345567
2024-11-22 10:42:39 +01:00 · 2018-10-30 03:27:15 +00:00 · 2018-10-30 03:27:15 +00:00 · e3e2aba431
commit e3e2aba431
parent d7e457ed36
3 changed files with 27 additions and 17 deletions
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -310,6 +310,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
    // make us bitcast between two vectors which are legalized in different ways.
    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+    // If the output type is also a vector and widening it to the same size
+    // as the widened input type would be a legal type, we can widen the bitcast
+    // and handle the promotion after.
+    if (NOutVT.isVector()) {
+      unsigned WidenInSize = NInVT.getSizeInBits();
+      unsigned OutSize = OutVT.getSizeInBits();
+      if (WidenInSize % OutSize == 0) {
+        unsigned Scale = WidenInSize / OutSize;
+        EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
+                                         OutVT.getVectorElementType(),
+                                         OutVT.getVectorNumElements() * Scale);
+        if (isTypeLegal(WideOutVT)) {
+          InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+          MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+          InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+                             DAG.getConstant(0, dl, IdxTy));
+          return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+        }
+      }
+    }
  }

  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -26338,7 +26338,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
      return;
    }

-    if ((SrcVT != MVT::f64 && SrcVT != MVT::v2f32) ||
+    if (SrcVT != MVT::f64 ||
        (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
        getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
      return;
@ -26347,13 +26347,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    EVT SVT = DstVT.getVectorElementType();
    EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
    SDValue Res;
-    if (SrcVT == MVT::f64)
-      Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
-                             MVT::v2f64, N->getOperand(0));
-    else
-      Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0),
-                        DAG.getUNDEF(MVT::v2f32));
-
+    Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
    Res = DAG.getBitcast(WiderVT, Res);
    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
                      DAG.getIntPtrConstant(0, dl));
--- a/test/CodeGen/AArch64/bitcast-promote-widen.ll
+++ b/test/CodeGen/AArch64/bitcast-promote-widen.ll
@ -6,16 +6,12 @@
 define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: bitcast_v2i16_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16 // =16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    str s0, [sp, #12]
-; CHECK-NEXT:    ldrh w8, [sp, #12]
-; CHECK-NEXT:    ldrh w9, [sp, #14]
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    add sp, sp, #16 // =16
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    mov v1.s[1], w8
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
  %y = bitcast <2 x half> %x to <2 x i16>
  ret <2 x i16> %y