From 54552bfee66e4253d0652c6499c3ca91740f873d Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Thu, 27 Oct 2016 05:17:58 +0000 Subject: [PATCH] [PowerPC] - No SExt/ZExt needed for count trailing zeros This patch corresponds to review: https://reviews.llvm.org/D25896 It just eliminates the redundant ZExt after a count trailing zeros instruction. llvm-svn: 285267 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 6 ++- .../PowerPC/no-ext-with-count-zeros.ll | 54 +++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/PowerPC/no-ext-with-count-zeros.ll diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1395a4b4099..cf8b858c6a7 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4041,8 +4041,9 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, return true; } - // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended. - if (Op32.getMachineOpcode() == PPC::CNTLZW) { + // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. + if (Op32.getMachineOpcode() == PPC::CNTLZW || + Op32.getMachineOpcode() == PPC::CNTTZW) { ToPromote.insert(Op32.getNode()); return true; } @@ -4237,6 +4238,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; + case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; diff --git a/test/CodeGen/PowerPC/no-ext-with-count-zeros.ll b/test/CodeGen/PowerPC/no-ext-with-count-zeros.ll new file mode 100644 index 00000000000..4d58e74f7b8 --- /dev/null +++ b/test/CodeGen/PowerPC/no-ext-with-count-zeros.ll @@ -0,0 +1,54 @@ +; Function Attrs: nounwind readnone +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s + +define signext i32 @ctw(i32 signext %a) { +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %0 +; CHECK-LABEL: ctw +; CHECK: cnttzw 3, 3 +; CHECK-NEXT: blr +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.cttz.i32(i32, i1) + +; Function Attrs: nounwind readnone +define signext i32 @clw(i32 signext %a) { +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %0 +; CHECK-LABEL: clw +; CHECK: cntlzw 3, 3 +; CHECK-NEXT: blr +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.ctlz.i32(i32, i1) + +; Function Attrs: nounwind readnone +define i64 @ctd(i64 %a) { +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %0 +; CHECK-LABEL: ctd +; CHECK: cnttzd 3, 3 +; CHECK-NEXT: blr +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.cttz.i64(i64, i1) + +; Function Attrs: nounwind readnone +define i64 @cld(i64 %a) { +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %0 +; CHECK-LABEL: cld +; CHECK: cntlzd 3, 3 +; CHECK-NEXT: blr +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.ctlz.i64(i64, i1)