From 57400949409f32798caef59becc7ee8d30c79b89 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 24 Jul 2021 12:58:02 +0100
Subject: [PATCH] [CGP] despeculateCountZeros - Don't create is-zero branch if
 cttz/ctlz source is known non-zero

If value tracking can confirm that the cttz/ctlz source is known non-zero then we don't need to create a branch (which DAG will struggle to recover from).

Differential Revision: https://reviews.llvm.org/D106685
---
 lib/CodeGen/CodeGenPrepare.cpp |  4 ++++
 test/CodeGen/X86/clz.ll        | 22 ----------------------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 1c0932ee40e..77ce3d2fb56 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -2041,6 +2041,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
     return false;
 
+  // Bail if the value is never zero.
+  if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+    return false;
+
   // The intrinsic will be sunk behind a compare against zero and branch.
   BasicBlock *StartBlock = CountZeros->getParent();
   BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 0c12135c127..755f6b43d09 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -704,33 +704,21 @@ define i64 @cttz_i64_zero_test(i64 %n) {
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
-;        codegen doesn't know how to delete the movl and je.
 define i32 @ctlz_i32_fold_cmov(i32 %n) {
 ; X86-LABEL: ctlz_i32_fold_cmov:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
-; X86-NEXT:    je .LBB16_1
-; X86-NEXT:  # %bb.2: # %cond.false
 ; X86-NEXT:    bsrl %eax, %eax
 ; X86-NEXT:    xorl $31, %eax
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB16_1:
-; X86-NEXT:    movl $32, %eax
-; X86-NEXT:    retl
 ;
 ; X64-LABEL: ctlz_i32_fold_cmov:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    je .LBB16_1
-; X64-NEXT:  # %bb.2: # %cond.false
 ; X64-NEXT:    bsrl %edi, %eax
 ; X64-NEXT:    xorl $31, %eax
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB16_1:
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: ctlz_i32_fold_cmov:
 ; X86-CLZ:       # %bb.0:
@@ -953,14 +941,9 @@ define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
 ; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orq $1, %rdi
-; X64-NEXT:    je .LBB21_1
-; X64-NEXT:  # %bb.2: # %cond.false
 ; X64-NEXT:    bsrq %rdi, %rax
 ; X64-NEXT:    xorq $63, %rax
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB21_1:
-; X64-NEXT:    movl $64, %eax
-; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
 ; X86-CLZ:       # %bb.0:
@@ -1026,13 +1009,8 @@ define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
 ; X64-NEXT:    orq %rdi, %rax
-; X64-NEXT:    je .LBB22_1
-; X64-NEXT:  # %bb.2: # %cond.false
 ; X64-NEXT:    bsfq %rax, %rax
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB22_1:
-; X64-NEXT:    movl $64, %eax
-; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
 ; X86-CLZ:       # %bb.0: