Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
; rdar://7860110
|
2010-04-15 07:40:59 +02:00
|
|
|
; RUN: llc < %s | FileCheck %s -check-prefix=X64
|
|
|
|
; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
target triple = "x86_64-apple-darwin10.2"
|
|
|
|
|
|
|
|
define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -256 ; 0xFFFFFF00
|
|
|
|
%C = zext i8 %a1 to i32
|
|
|
|
%D = or i32 %C, %B
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
|
|
|
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test1:
|
|
|
|
; X64: movb %sil, (%rdi)
|
|
|
|
|
|
|
|
; X32: test1:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, (%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -65281 ; 0xFFFF00FF
|
|
|
|
%C = zext i8 %a1 to i32
|
|
|
|
%CS = shl i32 %C, 8
|
|
|
|
%D = or i32 %B, %CS
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test2:
|
|
|
|
; X64: movb %sil, 1(%rdi)
|
|
|
|
|
|
|
|
; X32: test2:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, 1(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -65536 ; 0xFFFF0000
|
|
|
|
%C = zext i16 %a1 to i32
|
|
|
|
%D = or i32 %B, %C
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test3:
|
|
|
|
; X64: movw %si, (%rdi)
|
|
|
|
|
|
|
|
; X32: test3:
|
|
|
|
; X32: movw 8(%esp), %ax
|
|
|
|
; X32: movw %ax, (%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, 65535 ; 0x0000FFFF
|
|
|
|
%C = zext i16 %a1 to i32
|
|
|
|
%CS = shl i32 %C, 16
|
|
|
|
%D = or i32 %B, %CS
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test4:
|
|
|
|
; X64: movw %si, 2(%rdi)
|
|
|
|
|
|
|
|
; X32: test4:
|
2010-04-28 10:30:49 +02:00
|
|
|
; X32: movzwl 8(%esp), %eax
|
2010-04-15 07:40:59 +02:00
|
|
|
; X32: movw %ax, 2(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -4294901761 ; 0xFFFFFFFF0000FFFF
|
|
|
|
%C = zext i16 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 16
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test5:
|
|
|
|
; X64: movw %si, 2(%rdi)
|
|
|
|
|
|
|
|
; X32: test5:
|
2010-04-28 10:30:49 +02:00
|
|
|
; X32: movzwl 8(%esp), %eax
|
2010-04-15 07:40:59 +02:00
|
|
|
; X32: movw %ax, 2(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
|
|
|
|
%C = zext i8 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 40
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test6:
|
|
|
|
; X64: movb %sil, 5(%rdi)
|
|
|
|
|
|
|
|
|
|
|
|
; X32: test6:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, 5(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
enhance the load/store narrowing optimization to handle a
tokenfactor in between the load/store. This allows us to
optimize test7 into:
_test7: ## @test7
## BB#0: ## %entry
movl (%rdx), %eax
## kill: SIL<def> ESI<kill>
movb %sil, 5(%rdi)
ret
instead of:
_test7: ## @test7
## BB#0: ## %entry
movl 4(%esp), %ecx
movl $-65281, %eax ## imm = 0xFFFFFFFFFFFF00FF
andl 4(%ecx), %eax
movzbl 8(%esp), %edx
shll $8, %edx
addl %eax, %edx
movl 12(%esp), %eax
movl (%eax), %eax
movl %edx, 4(%ecx)
ret
llvm-svn: 101355
2010-04-15 08:10:49 +02:00
|
|
|
|
|
|
|
define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
|
|
|
|
entry:
|
|
|
|
%OtherLoad = load i32 *%P2
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
|
|
|
|
%C = zext i8 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 40
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret i32 %OtherLoad
|
|
|
|
; X64: test7:
|
|
|
|
; X64: movb %sil, 5(%rdi)
|
|
|
|
|
|
|
|
|
|
|
|
; X32: test7:
|
|
|
|
; X32: movb 8(%esp), %cl
|
|
|
|
; X32: movb %cl, 5(%{{.*}})
|
|
|
|
}
|
|
|
|
|