2012-02-02 00:20:51 +01:00
|
|
|
; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
|
2008-11-30 07:02:26 +01:00
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
|
|
|
|
target triple = "i686-pc-linux-gnu"
|
|
|
|
|
Add a couple dag combines to transform mulhi/mullo into a wider multiply
when the wider type is legal. This allows us to compile:
define zeroext i16 @test1(i16 zeroext %x) nounwind {
entry:
%div = udiv i16 %x, 33
ret i16 %div
}
into:
test1: # @test1
movzwl 4(%esp), %eax
imull $63551, %eax, %eax # imm = 0xF83F
shrl $21, %eax
ret
instead of:
test1: # @test1
movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F
mulw 4(%esp)
andl $65504, %edx # imm = 0xFFE0
movl %edx, %eax
shrl $5, %eax
ret
Implementing rdar://8760399 and example #4 from:
http://blog.regehr.org/archives/320
We should implement the same thing for [su]mul_hilo, but I don't
have immediate plans to do this.
llvm-svn: 121696
2010-12-13 09:39:01 +01:00
|
|
|
define zeroext i16 @test1(i16 zeroext %x) nounwind {
|
2008-11-30 07:02:26 +01:00
|
|
|
entry:
|
Add a couple dag combines to transform mulhi/mullo into a wider multiply
when the wider type is legal. This allows us to compile:
define zeroext i16 @test1(i16 zeroext %x) nounwind {
entry:
%div = udiv i16 %x, 33
ret i16 %div
}
into:
test1: # @test1
movzwl 4(%esp), %eax
imull $63551, %eax, %eax # imm = 0xF83F
shrl $21, %eax
ret
instead of:
test1: # @test1
movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F
mulw 4(%esp)
andl $65504, %edx # imm = 0xFFE0
movl %edx, %eax
shrl $5, %eax
ret
Implementing rdar://8760399 and example #4 from:
http://blog.regehr.org/archives/320
We should implement the same thing for [su]mul_hilo, but I don't
have immediate plans to do this.
llvm-svn: 121696
2010-12-13 09:39:01 +01:00
|
|
|
%div = udiv i16 %x, 33
|
|
|
|
ret i16 %div
|
|
|
|
; CHECK: test1:
|
|
|
|
; CHECK: imull $63551, %eax, %eax
|
|
|
|
; CHECK-NEXT: shrl $21, %eax
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
|
|
|
|
entry:
|
|
|
|
%div = udiv i16 %c, 3
|
|
|
|
ret i16 %div
|
|
|
|
|
|
|
|
; CHECK: test2:
|
|
|
|
; CHECK: imull $43691, %eax, %eax
|
|
|
|
; CHECK-NEXT: shrl $17, %eax
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
|
|
|
|
entry:
|
|
|
|
%div = udiv i8 %c, 3
|
|
|
|
ret i8 %div
|
|
|
|
|
|
|
|
; CHECK: test3:
|
2010-12-21 22:55:50 +01:00
|
|
|
; CHECK: movzbl 8(%esp), %eax
|
|
|
|
; CHECK-NEXT: imull $171, %eax, %eax
|
|
|
|
; CHECK-NEXT: shrl $9, %eax
|
Add a couple dag combines to transform mulhi/mullo into a wider multiply
when the wider type is legal. This allows us to compile:
define zeroext i16 @test1(i16 zeroext %x) nounwind {
entry:
%div = udiv i16 %x, 33
ret i16 %div
}
into:
test1: # @test1
movzwl 4(%esp), %eax
imull $63551, %eax, %eax # imm = 0xF83F
shrl $21, %eax
ret
instead of:
test1: # @test1
movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F
mulw 4(%esp)
andl $65504, %edx # imm = 0xFFE0
movl %edx, %eax
shrl $5, %eax
ret
Implementing rdar://8760399 and example #4 from:
http://blog.regehr.org/archives/320
We should implement the same thing for [su]mul_hilo, but I don't
have immediate plans to do this.
llvm-svn: 121696
2010-12-13 09:39:01 +01:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define signext i16 @test4(i16 signext %x) nounwind {
|
|
|
|
entry:
|
|
|
|
%div = sdiv i16 %x, 33 ; <i32> [#uses=1]
|
2008-11-30 07:02:26 +01:00
|
|
|
ret i16 %div
|
2010-12-15 06:58:59 +01:00
|
|
|
; CHECK: test4:
|
2011-07-08 22:46:18 +02:00
|
|
|
; CHECK: imull $1986, %eax, %
|
2008-11-30 07:02:26 +01:00
|
|
|
}
|
2010-12-15 06:58:59 +01:00
|
|
|
|
|
|
|
define i32 @test5(i32 %A) nounwind {
|
|
|
|
%tmp1 = udiv i32 %A, 1577682821 ; <i32> [#uses=1]
|
|
|
|
ret i32 %tmp1
|
|
|
|
; CHECK: test5:
|
|
|
|
; CHECK: movl $365384439, %eax
|
|
|
|
; CHECK: mull 4(%esp)
|
|
|
|
}
|
|
|
|
|
Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2) when c1 equals the amount of bits that are truncated off.
This happens all the time when a smul is promoted to a larger type.
On x86-64 we now compile "int test(int x) { return x/10; }" into
movslq %edi, %rax
imulq $1717986919, %rax, %rax
movq %rax, %rcx
shrq $63, %rcx
sarq $34, %rax <- used to be "shrq $32, %rax; sarl $2, %eax"
addl %ecx, %eax
This fires 96 times in gcc.c on x86-64.
llvm-svn: 124559
2011-01-30 17:38:43 +01:00
|
|
|
define signext i16 @test6(i16 signext %x) nounwind {
|
|
|
|
entry:
|
|
|
|
%div = sdiv i16 %x, 10
|
|
|
|
ret i16 %div
|
|
|
|
; CHECK: test6:
|
|
|
|
; CHECK: imull $26215, %eax, %eax
|
|
|
|
; CHECK: shrl $31, %ecx
|
|
|
|
; CHECK: sarl $18, %eax
|
|
|
|
}
|
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift.
This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into
shrl $2, %edi
imulq $613566757, %rdi, %rax
shrq $32, %rax
ret
instead of
movl %edi, %eax
imulq $613566757, %rax, %rcx
shrq $32, %rcx
subl %ecx, %eax
shrl %eax
addl %ecx, %eax
shrl $4, %eax
on x86_64
llvm-svn: 127829
2011-03-17 21:39:14 +01:00
|
|
|
|
|
|
|
define i32 @test7(i32 %x) nounwind {
|
|
|
|
%div = udiv i32 %x, 28
|
|
|
|
ret i32 %div
|
|
|
|
; CHECK: test7:
|
|
|
|
; CHECK: shrl $2
|
|
|
|
; CHECK: movl $613566757
|
|
|
|
; CHECK: mull
|
|
|
|
; CHECK-NOT: shrl
|
|
|
|
; CHECK: ret
|
|
|
|
}
|