diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index ada17fc6308..d5f11eb9005 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -155,6 +155,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { return Ref; } +// Return true if this is a load and test which can be optimized the +// same way as compare instruction. +static bool isLoadAndTestAsCmp(MachineInstr *MI) { + // If we during isel used a load-and-test as a compare with 0, the + // def operand is dead. + return ((MI->getOpcode() == SystemZ::LTEBR || + MI->getOpcode() == SystemZ::LTDBR || + MI->getOpcode() == SystemZ::LTXBR) && + MI->getOperand(0).isDead()); +} + +// Return the source register of Compare, which is the unknown value +// being tested. +static unsigned getCompareSourceReg(MachineInstr *Compare) { + unsigned reg = 0; + if (Compare->isCompare()) + reg = Compare->getOperand(0).getReg(); + else if (isLoadAndTestAsCmp(Compare)) + reg = Compare->getOperand(1).getReg(); + assert (reg); + + return reg; +} + // Compare compares the result of MI against zero. If MI is an addition // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition // and convert the branch to a BRCT(G). Return true on success. @@ -185,7 +209,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, // We already know that there are no references to the register between // MI and Compare. Make sure that there are also no references between // Compare and Branch. - unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg = getCompareSourceReg(Compare); MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) if (getRegReferences(MBBI, SrcReg)) @@ -305,6 +329,10 @@ static bool isCompareZero(MachineInstr *Compare) { return true; default: + + if (isLoadAndTestAsCmp(Compare)) + return true; + return (Compare->getNumExplicitOperands() == 2 && Compare->getOperand(1).isImm() && Compare->getOperand(1).getImm() == 0); @@ -322,7 +350,7 @@ optimizeCompareZero(MachineInstr *Compare, return false; // Search back for CC results that are based on the first operand. - unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg = getCompareSourceReg(Compare); MachineBasicBlock &MBB = *Compare->getParent(); MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); Reference CCRefs; @@ -431,7 +459,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { while (MBBI != MBB.begin()) { MachineInstr *MI = --MBBI; if (CompleteCCUsers && - MI->isCompare() && + (MI->isCompare() || isLoadAndTestAsCmp(MI)) && (optimizeCompareZero(MI, CCUsers) || fuseCompareAndBranch(MI, CCUsers))) { ++MBBI; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index efa3f82cec4..22beaad2ab7 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -130,6 +130,13 @@ void SystemZPassConfig::addPreSched2() { } void SystemZPassConfig::addPreEmitPass() { + + // Do instruction shortening before compare elimination because some + // vector instructions will be shortened into opcodes that compare + // elimination recognizes. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false); + // We eliminate comparisons here rather than earlier because some // transformations can change the set of available CC values and we // generally want those transformations to have priority. This is @@ -155,8 +162,6 @@ void SystemZPassConfig::addPreEmitPass() { // preventing that would be a win or not. if (getOptLevel() != CodeGenOpt::None) addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false); - if (getOptLevel() != CodeGenOpt::None) - addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false); addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); } diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll index 1d71a0fcec5..7e937d1adae 100644 --- a/test/CodeGen/SystemZ/fp-cmp-05.ll +++ b/test/CodeGen/SystemZ/fp-cmp-05.ll @@ -4,14 +4,15 @@ ; handled by SystemZElimcompare, so for Z13 this is currently ; unimplemented. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -check-prefix=CHECK-Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Load complement (sign-bit flipped). ; Test f32 define float @f1(float %a, float %b, float %f) { ; CHECK-LABEL: f1: -; CHECK-Z10: lcebr -; CHECK-Z10-NEXT: je +; CHECK: lcebr +; CHECK-NEXT: je %neg = fsub float -0.0, %f %cond = fcmp oeq float %neg, 0.0 %res = select i1 %cond, float %a, float %b @@ -21,8 +22,8 @@ define float @f1(float %a, float %b, float %f) { ; Test f64 define double @f2(double %a, double %b, double %f) { ; CHECK-LABEL: f2: -; CHECK-Z10: lcdbr -; CHECK-Z10-NEXT: je +; CHECK: lcdbr +; CHECK-NEXT: je %neg = fsub double -0.0, %f %cond = fcmp oeq double %neg, 0.0 %res = select i1 %cond, double %a, double %b @@ -34,8 +35,8 @@ define double @f2(double %a, double %b, double %f) { declare float @llvm.fabs.f32(float %f) define float @f3(float %a, float %b, float %f) { ; CHECK-LABEL: f3: -; CHECK-Z10: lnebr -; CHECK-Z10-NEXT: je +; CHECK: lnebr +; CHECK-NEXT: je %abs = call float @llvm.fabs.f32(float %f) %neg = fsub float -0.0, %abs %cond = fcmp oeq float %neg, 0.0 @@ -47,8 +48,8 @@ define float @f3(float %a, float %b, float %f) { declare double @llvm.fabs.f64(double %f) define double @f4(double %a, double %b, double %f) { ; CHECK-LABEL: f4: -; CHECK-Z10: lndbr -; CHECK-Z10-NEXT: je +; CHECK: lndbr +; CHECK-NEXT: je %abs = call double @llvm.fabs.f64(double %f) %neg = fsub double -0.0, %abs %cond = fcmp oeq double %neg, 0.0 @@ -60,8 +61,8 @@ define double @f4(double %a, double %b, double %f) { ; Test f32 define float @f5(float %a, float %b, float %f) { ; CHECK-LABEL: f5: -; CHECK-Z10: lpebr -; CHECK-Z10-NEXT: je +; CHECK: lpebr +; CHECK-NEXT: je %abs = call float @llvm.fabs.f32(float %f) %cond = fcmp oeq float %abs, 0.0 %res = select i1 %cond, float %a, float %b @@ -71,8 +72,8 @@ define float @f5(float %a, float %b, float %f) { ; Test f64 define double @f6(double %a, double %b, double %f) { ; CHECK-LABEL: f6: -; CHECK-Z10: lpdbr -; CHECK-Z10-NEXT: je +; CHECK: lpdbr +; CHECK-NEXT: je %abs = call double @llvm.fabs.f64(double %f) %cond = fcmp oeq double %abs, 0.0 %res = select i1 %cond, double %a, double %b