mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[SimplifyLibCalls] Correctly set the is_zero_undef flag for llvm.cttz
If <src> is non-zero we can safely set the flag to true, and this results in less code generated for, e.g. ffs(x) + 1 on FreeBSD. Thanks to majnemer for suggesting the fix and reviewing. Code generated before the patch was applied: 0: 0f bc c7 bsf %edi,%eax 3: b9 20 00 00 00 mov $0x20,%ecx 8: 0f 45 c8 cmovne %eax,%ecx b: 83 c1 02 add $0x2,%ecx e: b8 01 00 00 00 mov $0x1,%eax 13: 85 ff test %edi,%edi 15: 0f 45 c1 cmovne %ecx,%eax 18: c3 retq Code generated after the patch was applied: 0: 0f bc cf bsf %edi,%ecx 3: 83 c1 02 add $0x2,%ecx 6: 85 ff test %edi,%edi 8: b8 01 00 00 00 mov $0x1,%eax d: 0f 45 c1 cmovne %ecx,%eax 10: c3 retq It seems we can still use cmove and save another 'test' instruction, but that can be tackled separately. Differential Revision: http://reviews.llvm.org/D11989 llvm-svn: 244947
This commit is contained in:
parent
2a7ee13679
commit
3672dcbb24
@ -1436,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
|
||||
Type *ArgType = Op->getType();
|
||||
Value *F =
|
||||
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
|
||||
Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
|
||||
Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
|
||||
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
|
||||
V = B.CreateIntCast(V, B.getInt32Ty(), false);
|
||||
|
||||
|
@ -102,7 +102,7 @@ define i32 @test_simplify12() {
|
||||
define i32 @test_simplify13(i32 %x) {
|
||||
; CHECK-LABEL: @test_simplify13(
|
||||
%ret = call i32 @ffs(i32 %x)
|
||||
; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
|
||||
; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||
; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
|
||||
; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
|
||||
; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
|
||||
@ -113,7 +113,7 @@ define i32 @test_simplify13(i32 %x) {
|
||||
define i32 @test_simplify14(i32 %x) {
|
||||
; CHECK-LINUX-LABEL: @test_simplify14(
|
||||
%ret = call i32 @ffsl(i32 %x)
|
||||
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
|
||||
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
|
||||
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
|
||||
; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
|
||||
@ -124,7 +124,7 @@ define i32 @test_simplify14(i32 %x) {
|
||||
define i32 @test_simplify15(i64 %x) {
|
||||
; CHECK-LINUX-LABEL: @test_simplify15(
|
||||
%ret = call i32 @ffsll(i64 %x)
|
||||
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
|
||||
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
|
||||
; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
|
||||
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
|
||||
|
Loading…
Reference in New Issue
Block a user