DAG: Fix extract_subvector combine for a single element

This would fail before because 1x vectors aren't legal, so instead just use the scalar type. Avoids regressions in a future AMDGPU commit to add v4i16/v4f16 as legal types. Test update is just the one test that this triggers on in tree now. It wasn't checking anything before. The result is completely changed since the selects are eliminated. Not sure if it's considered better or not. llvm-svn: 334440
2025-01-31 20:51:52 +01:00 · 2018-06-11 21:27:41 +00:00 · 2018-06-11 21:27:41 +00:00 · a27dc5bdab
commit a27dc5bdab
parent 799395800d
2 changed files with 27 additions and 5 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -15968,13 +15968,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
      // Only do this if we won't split any elements.
      if (ExtractSize % EltSize == 0) {
        unsigned NumElems = ExtractSize / EltSize;
-        EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
-                                         InVT.getVectorElementType(), NumElems);
+        EVT EltVT = InVT.getVectorElementType();
+        EVT ExtractVT = NumElems == 1 ? EltVT :
+          EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
        if ((Level < AfterLegalizeDAG ||
-             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+             (NumElems == 1 ||
+              TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
            (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
          unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
                            EltSize;
+          if (NumElems == 1) {
+            SDValue Src = V->getOperand(IdxVal);
+            if (EltVT != Src.getValueType())
+              Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+
+            return DAG.getBitcast(NVT, Src);
+          }

          // Extract the pieces from the original build_vector.
          SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
--- a/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
+++ b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
@ -1,7 +1,20 @@
-; RUN: llc < %s
-target triple = "armv7-none-linux-gnueabi"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s

 define <3 x i64> @shuffle(i1 %dec1, i1 %dec0, <3 x i64> %b) {
+; CHECK-LABEL: shuffle:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    tst r1, #1
+; CHECK-NEXT:    moveq r1, #0
+; CHECK-NEXT:    vmoveq d16, r1, r1
+; CHECK-NEXT:    vldrne d16, [sp]
+; CHECK-NEXT:    tst r2, #1
+; CHECK-NEXT:    moveq r1, #0
+; CHECK-NEXT:    vmoveq d18, r1, r1
+; CHECK-NEXT:    vldrne d18, [sp, #8]
+; CHECK-NEXT:    vorr d17, d18, d18
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:    bx lr
 entry:
  %.sink = select i1 %dec1, <3 x i64> %b, <3 x i64> zeroinitializer
  %.sink15 = select i1 %dec0, <3 x i64> %b, <3 x i64> zeroinitializer