Revert "[AArch64] Coalesce Copy Zero during instruction selection"

This reverts commit d8f57105010cc7e78026e511d5def873fc91e0e7. Original Commit: Author: Haicheng Wu <haicheng@codeaurora.org> Date: Sun Feb 18 13:51:33 2018 +0000 [AArch64] Coalesce Copy Zero during instruction selection Add special case for copy of zero to avoid a double copy. Differential Revision: https://reviews.llvm.org/D36104 Author's intention is to remove a BB that has one mov instruction. In order to do that, d8f571050 pessmizes MachineSinking by introducing a copy, such that mov instruction is NOT moved to the BB. Optimization downstream gets rid of the BB with only mov instruction. This works well if we have only one fall through branch as there is only one "extra" mov instruction. If we have multiple fall throughs, we will have a lot of redundant movs. In such a case, it's better to have this BB which has one mov instruction. This is causing degradation in jpeg, fft and other codebases. I believe if we want to remove a BB with only one branch instruction, we should not pessimize Machine Sinking at all, and find some other solution. llvm-svn: 335251
2024-11-22 10:42:39 +01:00 · 2018-06-21 16:05:24 +00:00 · 2018-06-21 16:05:24 +00:00 · 8aef1e988f
commit 8aef1e988f
parent 32a95830b7
5 changed files with 3 additions and 79 deletions
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@ -2892,35 +2892,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
    }
    break;
  }
-  case ISD::CopyToReg: {
-    // Special case for copy of zero to avoid a double copy.
-    SDNode *CopyVal = Node->getOperand(2).getNode();
-    ConstantSDNode *CopyValConst = dyn_cast<ConstantSDNode>(CopyVal);
-    if (!CopyValConst || !CopyValConst->isNullValue())
-      break;
-    const SDValue &Dest = Node->getOperand(1);
-    if (!TargetRegisterInfo::isVirtualRegister(
-            cast<RegisterSDNode>(Dest)->getReg()))
-      break;
-    unsigned ZeroReg;
-    EVT ZeroVT = CopyValConst->getValueType(0);
-    if (ZeroVT == MVT::i32)
-      ZeroReg = AArch64::WZR;
-    else if (ZeroVT == MVT::i64)
-      ZeroReg = AArch64::XZR;
-    else
-      break;
-    unsigned NumOperands = Node->getNumOperands();
-    SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT);
-    // Replace the source operand (#0) with ZeroRegVal.
-    SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1), ZeroRegVal,
-                     (NumOperands == 4) ? Node->getOperand(3) : SDValue()};
-    SDValue New =
-        CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), Node->getVTList(),
-                        makeArrayRef(Ops, NumOperands));
-    ReplaceNode(Node, New.getNode());
-    return;
-  }
+
  case ISD::FrameIndex: {
    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@ -28,7 +28,6 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; Next BB
 ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
 ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
-; CHECK-NEXT: mov w0, wzr
 ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
 entry:
  %idxprom = sext i32 %i1 to i64
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@ -10,7 +10,7 @@ entry:
 ; CHECK: subs
 ; CHECK-NOT: cmp
 ; CHECK-NOT: sub
-; CHECK: b.lt
+; CHECK: b.ge
 ; CHECK: sub
 ; CHECK: sub
 ; CHECK-NOT: sub
--- a/test/CodeGen/AArch64/copy-zero-reg.ll
+++ b/test/CodeGen/AArch64/copy-zero-reg.ll
@ -1,47 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
-
-; Verify there is no tiny block having only one mov wzr instruction between for.body.lr.ph and sw.epilog.loopexit
-define void @unroll_by_2(i32 %trip_count, i32* %p) {
-; CHECK-LABEL: unroll_by_2
-; CHECK: // %for.body.lr.ph
-; CHECK:     mov   w{{[0-9]+}}, wzr
-; CHECK:     b.eq
-; CHECK-NOT: mov   w{{[0-9]+}}, wzr
-; CHECK: // %for.body.lr.ph.new
-; CHECK: // %for.body
-; CHECK: // %sw.epilog.loopexit
-; CHECK: // %for.body.epil
-; CHECK: // %exit
-; CHECK-NEXT:   ret
-for.body.lr.ph:
-  %xtraiter = and i32 %trip_count, 1
-  %cmp = icmp eq i32 %trip_count, 1
-  br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new
-
-for.body.lr.ph.new:
-  %unroll_iter = sub nsw i32 %trip_count, %xtraiter
-  br label %for.body
-
-for.body:
-  %indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ]
-  %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ]
-  %array = getelementptr inbounds i32, i32 * %p, i32 %indvars
-  store  i32 %niter, i32* %array
-  %indvars.next = add i32 %indvars, 2
-  %niter.nsub = add i32 %niter, -2
-  %niter.ncmp = icmp eq i32 %niter.nsub, 0
-  br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body
-
-sw.epilog.loopexit:
-  %indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ]
-  %lcmp.mod = icmp eq i32 %xtraiter, 0
-  br i1 %lcmp.mod, label %exit, label %for.body.epil
-
-for.body.epil:
-  %array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr
-  store  i32 %indvars.unr, i32* %array.epil
-  br label %exit
-
-exit:
-  ret void
-}
--- a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
+++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
@ -10,7 +10,7 @@ define void @test1() {
 ; registers that make up the i128 pair

 ; CHECK:  mov  x0, xzr
-; CHECK:  mov  x1, xzr 
+; CHECK:  mov  x1, x0
 ; CHECK:  bl  _test2

 }