2011-01-07 12:35:21 +01:00
|
|
|
; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
|
2008-08-21 23:00:15 +02:00
|
|
|
|
|
|
|
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
|
|
|
|
2010-04-01 20:19:11 +02:00
|
|
|
define fastcc void @t1() nounwind {
|
2008-08-21 23:00:15 +02:00
|
|
|
entry:
|
2013-07-14 08:24:09 +02:00
|
|
|
; CHECK-LABEL: t1:
|
2011-01-07 20:35:30 +01:00
|
|
|
; CHECK: calll _memset
|
2011-06-18 08:05:24 +02:00
|
|
|
call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
|
2010-04-01 08:04:33 +02:00
|
|
|
unreachable
|
2008-08-21 23:00:15 +02:00
|
|
|
}
|
2010-04-01 20:19:11 +02:00
|
|
|
|
|
|
|
define fastcc void @t2(i8 signext %c) nounwind {
|
|
|
|
entry:
|
2013-07-14 08:24:09 +02:00
|
|
|
; CHECK-LABEL: t2:
|
2010-09-22 07:49:14 +02:00
|
|
|
; CHECK: calll _memset
|
2011-06-18 08:05:24 +02:00
|
|
|
call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
|
2010-04-01 20:19:11 +02:00
|
|
|
unreachable
|
|
|
|
}
|
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.
Example code:
void test(char *s, int a) {
__builtin_memset(s, a, 4);
}
before:
_test: ## @test
movzbl 8(%esp), %eax
movl %eax, %ecx
shll $8, %ecx
orl %eax, %ecx
movl %ecx, %eax
shll $16, %eax
orl %ecx, %eax
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
after:
_test: ## @test
movzbl 8(%esp), %eax
imull $16843009, %eax, %eax ## imm = 0x1010101
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
llvm-svn: 122707
2011-01-02 20:44:58 +01:00
|
|
|
|
|
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
|
|
|
|
|
|
|
|
define void @t3(i8* nocapture %s, i8 %a) nounwind {
|
|
|
|
entry:
|
|
|
|
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
|
|
|
|
ret void
|
2013-07-14 08:24:09 +02:00
|
|
|
; CHECK-LABEL: t3:
|
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.
Example code:
void test(char *s, int a) {
__builtin_memset(s, a, 4);
}
before:
_test: ## @test
movzbl 8(%esp), %eax
movl %eax, %ecx
shll $8, %ecx
orl %eax, %ecx
movl %ecx, %eax
shll $16, %eax
orl %ecx, %eax
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
after:
_test: ## @test
movzbl 8(%esp), %eax
imull $16843009, %eax, %eax ## imm = 0x1010101
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
llvm-svn: 122707
2011-01-02 20:44:58 +01:00
|
|
|
; CHECK: imull $16843009
|
|
|
|
}
|
|
|
|
|
2011-01-02 20:57:05 +01:00
|
|
|
define void @t4(i8* nocapture %s, i8 %a) nounwind {
|
|
|
|
entry:
|
|
|
|
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
|
|
|
|
ret void
|
2013-07-14 08:24:09 +02:00
|
|
|
; CHECK-LABEL: t4:
|
2011-01-02 20:57:05 +01:00
|
|
|
; CHECK: imull $16843009
|
|
|
|
; CHECK-NOT: imul
|
|
|
|
; CHECK: ret
|
|
|
|
}
|