From 3672dcbb245fdb46707f7c51b84dfa941e170879 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Thu, 13 Aug 2015 20:34:26 +0000 Subject: [PATCH] [SimplifyLibCalls] Correctly set the is_zero_undef flag for llvm.cttz If is non-zero we can safely set the flag to true, and this results in less code generated for, e.g. ffs(x) + 1 on FreeBSD. Thanks to majnemer for suggesting the fix and reviewing. Code generated before the patch was applied: 0: 0f bc c7 bsf %edi,%eax 3: b9 20 00 00 00 mov $0x20,%ecx 8: 0f 45 c8 cmovne %eax,%ecx b: 83 c1 02 add $0x2,%ecx e: b8 01 00 00 00 mov $0x1,%eax 13: 85 ff test %edi,%edi 15: 0f 45 c1 cmovne %ecx,%eax 18: c3 retq Code generated after the patch was applied: 0: 0f bc cf bsf %edi,%ecx 3: 83 c1 02 add $0x2,%ecx 6: 85 ff test %edi,%edi 8: b8 01 00 00 00 mov $0x1,%eax d: 0f 45 c1 cmovne %ecx,%eax 10: c3 retq It seems we can still use cmove and save another 'test' instruction, but that can be tackled separately. Differential Revision: http://reviews.llvm.org/D11989 llvm-svn: 244947 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 +- test/Transforms/InstCombine/ffs-1.ll | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83839e962e1..bb03c98b492 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1436,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz"); + Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll index c8763dc199a..30002581cf5 100644 --- a/test/Transforms/InstCombine/ffs-1.ll +++ b/test/Transforms/InstCombine/ffs-1.ll @@ -102,7 +102,7 @@ define i32 @test_simplify12() { define i32 @test_simplify13(i32 %x) { ; CHECK-LABEL: @test_simplify13( %ret = call i32 @ffs(i32 %x) -; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) ; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0 ; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0 @@ -113,7 +113,7 @@ define i32 @test_simplify13(i32 %x) { define i32 @test_simplify14(i32 %x) { ; CHECK-LINUX-LABEL: @test_simplify14( %ret = call i32 @ffsl(i32 %x) -; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0 @@ -124,7 +124,7 @@ define i32 @test_simplify14(i32 %x) { define i32 @test_simplify15(i64 %x) { ; CHECK-LINUX-LABEL: @test_simplify15( %ret = call i32 @ffsll(i64 %x) -; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true) ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1 ; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0