[SystemZ] SystemZElimCompare pass improved.

Compare elimination extended to recognize load-and-test instructions used for comparison and eliminate them the same way as with compare instructions. Test case fp-cmp-05.ll updated to expect optimized results now also for z13. The order of instruction shortening and compare elimination passes have been changed so that opcodes do not have to be handled in both passes. Reviewed by Ulrich Weigand. llvm-svn: 249666
2025-01-31 20:51:52 +01:00 · 2015-10-08 07:40:23 +00:00 · 2015-10-08 07:40:23 +00:00 · d41b87b65d
commit d41b87b65d
parent 0a12c89155
3 changed files with 52 additions and 18 deletions
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@ -155,6 +155,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
  return Ref;
 }

+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+  // If we during isel used a load-and-test as a compare with 0, the
+  // def operand is dead.
+  return ((MI->getOpcode() == SystemZ::LTEBR ||
+	   MI->getOpcode() == SystemZ::LTDBR ||
+	   MI->getOpcode() == SystemZ::LTXBR) &&
+	  MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+  unsigned reg = 0;
+  if (Compare->isCompare())
+    reg = Compare->getOperand(0).getReg();
+  else if (isLoadAndTestAsCmp(Compare))
+    reg = Compare->getOperand(1).getReg();
+  assert (reg);
+
+  return reg;
+}
+
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
 // and convert the branch to a BRCT(G).  Return true on success.
@ -185,7 +209,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
  // We already know that there are no references to the register between
  // MI and Compare.  Make sure that there are also no references between
  // Compare and Branch.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
  for (++MBBI; MBBI != MBBE; ++MBBI)
    if (getRegReferences(MBBI, SrcReg))
@ -305,6 +329,10 @@ static bool isCompareZero(MachineInstr *Compare) {
    return true;

  default:
+
+    if (isLoadAndTestAsCmp(Compare))
+      return true;
+
    return (Compare->getNumExplicitOperands() == 2 &&
            Compare->getOperand(1).isImm() &&
            Compare->getOperand(1).getImm() == 0);
@ -322,7 +350,7 @@ optimizeCompareZero(MachineInstr *Compare,
    return false;

  // Search back for CC results that are based on the first operand.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
  MachineBasicBlock &MBB = *Compare->getParent();
  MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
  Reference CCRefs;
@ -431,7 +459,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
  while (MBBI != MBB.begin()) {
    MachineInstr *MI = --MBBI;
    if (CompleteCCUsers &&
-        MI->isCompare() &&
+        (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
        (optimizeCompareZero(MI, CCUsers) ||
         fuseCompareAndBranch(MI, CCUsers))) {
      ++MBBI;
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@ -130,6 +130,13 @@ void SystemZPassConfig::addPreSched2() {
 }

 void SystemZPassConfig::addPreEmitPass() {
+
+  // Do instruction shortening before compare elimination because some
+  // vector instructions will be shortened into opcodes that compare
+  // elimination recognizes.
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
  // We eliminate comparisons here rather than earlier because some
  // transformations can change the set of available CC values and we
  // generally want those transformations to have priority.  This is
@ -155,8 +162,6 @@ void SystemZPassConfig::addPreEmitPass() {
  // preventing that would be a win or not.
  if (getOptLevel() != CodeGenOpt::None)
    addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
  addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
 }

--- a/test/CodeGen/SystemZ/fp-cmp-05.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-05.ll
@ -4,14 +4,15 @@
 ; handled by SystemZElimcompare, so for Z13 this is currently
 ; unimplemented.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -check-prefix=CHECK-Z10
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s

 ; Load complement (sign-bit flipped).
 ; Test f32
 define float @f1(float %a, float %b, float %f) {
 ; CHECK-LABEL: f1:
-; CHECK-Z10: lcebr
-; CHECK-Z10-NEXT: je
+; CHECK: lcebr
+; CHECK-NEXT: je
  %neg = fsub float -0.0, %f
  %cond = fcmp oeq float %neg, 0.0
  %res = select i1 %cond, float %a, float %b
@ -21,8 +22,8 @@ define float @f1(float %a, float %b, float %f) {
 ; Test f64
 define double @f2(double %a, double %b, double %f) {
 ; CHECK-LABEL: f2:
-; CHECK-Z10: lcdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lcdbr
+; CHECK-NEXT: je
  %neg = fsub double -0.0, %f
  %cond = fcmp oeq double %neg, 0.0
  %res = select i1 %cond, double %a, double %b
@ -34,8 +35,8 @@ define double @f2(double %a, double %b, double %f) {
 declare float @llvm.fabs.f32(float %f)
 define float @f3(float %a, float %b, float %f) {
 ; CHECK-LABEL: f3:
-; CHECK-Z10: lnebr
-; CHECK-Z10-NEXT: je
+; CHECK: lnebr
+; CHECK-NEXT: je
  %abs = call float @llvm.fabs.f32(float %f)
  %neg = fsub float -0.0, %abs
  %cond = fcmp oeq float %neg, 0.0
@ -47,8 +48,8 @@ define float @f3(float %a, float %b, float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f4(double %a, double %b, double %f) {
 ; CHECK-LABEL: f4:
-; CHECK-Z10: lndbr
-; CHECK-Z10-NEXT: je
+; CHECK: lndbr
+; CHECK-NEXT: je
  %abs = call double @llvm.fabs.f64(double %f)
  %neg = fsub double -0.0, %abs
  %cond = fcmp oeq double %neg, 0.0
@ -60,8 +61,8 @@ define double @f4(double %a, double %b, double %f) {
 ; Test f32
 define float @f5(float %a, float %b, float %f) {
 ; CHECK-LABEL: f5:
-; CHECK-Z10: lpebr
-; CHECK-Z10-NEXT: je
+; CHECK: lpebr
+; CHECK-NEXT: je
  %abs = call float @llvm.fabs.f32(float %f)
  %cond = fcmp oeq float %abs, 0.0
  %res = select i1 %cond, float %a, float %b
@ -71,8 +72,8 @@ define float @f5(float %a, float %b, float %f) {
 ; Test f64
 define double @f6(double %a, double %b, double %f) {
 ; CHECK-LABEL: f6:
-; CHECK-Z10: lpdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lpdbr
+; CHECK-NEXT: je
  %abs = call double @llvm.fabs.f64(double %f)
  %cond = fcmp oeq double %abs, 0.0
  %res = select i1 %cond, double %a, double %b