it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back
to the default implementation that did a hi/lo multiply and compared the
top. Fix this to check the overflow flag that the 'mul' instruction
sets, so we can avoid an explicit test. Now we compile:
void *func(long count) {
return new int[count];
}
into:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
seto %cl ## encoding: [0x0f,0x90,0xc1]
testb %cl, %cl ## encoding: [0x84,0xc9]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
instead of:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
Other than the silly seto+test, this is using the o bit directly, so it's going in the right
direction.
llvm-svn: 120935
2010-12-05 08:30:36 +01:00
|
|
|
; RUN: llc < %s -march=x86 | FileCheck %s
|
2009-06-16 08:58:29 +02:00
|
|
|
|
|
|
|
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
2011-06-17 05:14:27 +02:00
|
|
|
define zeroext i1 @a(i32 %x) nounwind {
|
2009-06-16 08:58:29 +02:00
|
|
|
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
|
|
|
|
%obil = extractvalue {i32, i1} %res, 1
|
|
|
|
ret i1 %obil
|
it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back
to the default implementation that did a hi/lo multiply and compared the
top. Fix this to check the overflow flag that the 'mul' instruction
sets, so we can avoid an explicit test. Now we compile:
void *func(long count) {
return new int[count];
}
into:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
seto %cl ## encoding: [0x0f,0x90,0xc1]
testb %cl, %cl ## encoding: [0x84,0xc9]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
instead of:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
Other than the silly seto+test, this is using the o bit directly, so it's going in the right
direction.
llvm-svn: 120935
2010-12-05 08:30:36 +01:00
|
|
|
|
|
|
|
; CHECK: a:
|
|
|
|
; CHECK: mull
|
|
|
|
; CHECK: seto %al
|
2011-04-14 03:46:37 +02:00
|
|
|
; CHECK: movzbl %al, %eax
|
it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back
to the default implementation that did a hi/lo multiply and compared the
top. Fix this to check the overflow flag that the 'mul' instruction
sets, so we can avoid an explicit test. Now we compile:
void *func(long count) {
return new int[count];
}
into:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
seto %cl ## encoding: [0x0f,0x90,0xc1]
testb %cl, %cl ## encoding: [0x84,0xc9]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
instead of:
__Z4funcl: ## @_Z4funcl
movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
mulq %rcx ## encoding: [0x48,0xf7,0xe1]
testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2]
movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8]
jmp __Znam ## TAILCALL
Other than the silly seto+test, this is using the o bit directly, so it's going in the right
direction.
llvm-svn: 120935
2010-12-05 08:30:36 +01:00
|
|
|
; CHECK: ret
|
2009-06-16 08:58:29 +02:00
|
|
|
}
|
2011-05-21 20:31:55 +02:00
|
|
|
|
|
|
|
define i32 @test2(i32 %a, i32 %b) nounwind readnone {
|
|
|
|
entry:
|
|
|
|
%tmp0 = add i32 %b, %a
|
|
|
|
%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
|
|
|
|
%tmp2 = extractvalue { i32, i1 } %tmp1, 0
|
|
|
|
ret i32 %tmp2
|
|
|
|
; CHECK: test2:
|
|
|
|
; CHECK: addl
|
|
|
|
; CHECK-NEXT: addl
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test3(i32 %a, i32 %b) nounwind readnone {
|
|
|
|
entry:
|
|
|
|
%tmp0 = add i32 %b, %a
|
|
|
|
%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
|
|
|
|
%tmp2 = extractvalue { i32, i1 } %tmp1, 0
|
|
|
|
ret i32 %tmp2
|
|
|
|
; CHECK: test3:
|
|
|
|
; CHECK: addl
|
|
|
|
; CHECK: mull
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|