[SimplifyLibCalls] Correctly set the is_zero_undef flag for llvm.cttz

If <src> is non-zero we can safely set the flag to true, and this results in less code generated for, e.g. ffs(x) + 1 on FreeBSD. Thanks to majnemer for suggesting the fix and reviewing. Code generated before the patch was applied: 0: 0f bc c7 bsf %edi,%eax 3: b9 20 00 00 00 mov $0x20,%ecx 8: 0f 45 c8 cmovne %eax,%ecx b: 83 c1 02 add $0x2,%ecx e: b8 01 00 00 00 mov $0x1,%eax 13: 85 ff test %edi,%edi 15: 0f 45 c1 cmovne %ecx,%eax 18: c3 retq Code generated after the patch was applied: 0: 0f bc cf bsf %edi,%ecx 3: 83 c1 02 add $0x2,%ecx 6: 85 ff test %edi,%edi 8: b8 01 00 00 00 mov $0x1,%eax d: 0f 45 c1 cmovne %ecx,%eax 10: c3 retq It seems we can still use cmove and save another 'test' instruction, but that can be tackled separately. Differential Revision: http://reviews.llvm.org/D11989 llvm-svn: 244947
2024-11-23 11:13:28 +01:00 · 2015-08-13 20:34:26 +00:00 · 2015-08-13 20:34:26 +00:00 · 3672dcbb24
commit 3672dcbb24
parent 2a7ee13679
2 changed files with 4 additions and 4 deletions
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@ -1436,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
  Type *ArgType = Op->getType();
  Value *F =
      Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
-  Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
+  Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
  V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
  V = B.CreateIntCast(V, B.getInt32Ty(), false);

--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@ -102,7 +102,7 @@ define i32 @test_simplify12() {
 define i32 @test_simplify13(i32 %x) {
 ; CHECK-LABEL: @test_simplify13(
  %ret = call i32 @ffs(i32 %x)
-; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
 ; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@ -113,7 +113,7 @@ define i32 @test_simplify13(i32 %x) {
 define i32 @test_simplify14(i32 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify14(
  %ret = call i32 @ffsl(i32 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
 ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@ -124,7 +124,7 @@ define i32 @test_simplify14(i32 %x) {
 define i32 @test_simplify15(i64 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify15(
  %ret = call i32 @ffsll(i64 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
 ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0