From 57400949409f32798caef59becc7ee8d30c79b89 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 24 Jul 2021 12:58:02 +0100 Subject: [PATCH] [CGP] despeculateCountZeros - Don't create is-zero branch if cttz/ctlz source is known non-zero If value tracking can confirm that the cttz/ctlz source is known non-zero then we don't need to create a branch (which DAG will struggle to recover from). Differential Revision: https://reviews.llvm.org/D106685 --- lib/CodeGen/CodeGenPrepare.cpp | 4 ++++ test/CodeGen/X86/clz.ll | 22 ---------------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 1c0932ee40e..77ce3d2fb56 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -2041,6 +2041,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) return false; + // Bail if the value is never zero. + if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL)) + return false; + // The intrinsic will be sunk behind a compare against zero and branch. BasicBlock *StartBlock = CountZeros->getParent(); BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll index 0c12135c127..755f6b43d09 100644 --- a/test/CodeGen/X86/clz.ll +++ b/test/CodeGen/X86/clz.ll @@ -704,33 +704,21 @@ define i64 @cttz_i64_zero_test(i64 %n) { ; Don't generate the cmovne when the source is known non-zero (and bsr would ; not set ZF). ; rdar://9490949 -; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and -; codegen doesn't know how to delete the movl and je. define i32 @ctlz_i32_fold_cmov(i32 %n) { ; X86-LABEL: ctlz_i32_fold_cmov: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $1, %eax -; X86-NEXT: je .LBB16_1 -; X86-NEXT: # %bb.2: # %cond.false ; X86-NEXT: bsrl %eax, %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: retl -; X86-NEXT: .LBB16_1: -; X86-NEXT: movl $32, %eax -; X86-NEXT: retl ; ; X64-LABEL: ctlz_i32_fold_cmov: ; X64: # %bb.0: ; X64-NEXT: orl $1, %edi -; X64-NEXT: je .LBB16_1 -; X64-NEXT: # %bb.2: # %cond.false ; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB16_1: -; X64-NEXT: movl $32, %eax -; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_i32_fold_cmov: ; X86-CLZ: # %bb.0: @@ -953,14 +941,9 @@ define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) { ; X64-LABEL: ctlz_i64_zero_test_knownneverzero: ; X64: # %bb.0: ; X64-NEXT: orq $1, %rdi -; X64-NEXT: je .LBB21_1 -; X64-NEXT: # %bb.2: # %cond.false ; X64-NEXT: bsrq %rdi, %rax ; X64-NEXT: xorq $63, %rax ; X64-NEXT: retq -; X64-NEXT: .LBB21_1: -; X64-NEXT: movl $64, %eax -; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: ; X86-CLZ: # %bb.0: @@ -1026,13 +1009,8 @@ define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) { ; X64: # %bb.0: ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-NEXT: orq %rdi, %rax -; X64-NEXT: je .LBB22_1 -; X64-NEXT: # %bb.2: # %cond.false ; X64-NEXT: bsfq %rax, %rax ; X64-NEXT: retq -; X64-NEXT: .LBB22_1: -; X64-NEXT: movl $64, %eax -; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: ; X86-CLZ: # %bb.0: