Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
; rdar://7860110
|
2010-09-02 23:18:42 +02:00
|
|
|
; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
|
|
|
|
; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
target triple = "x86_64-apple-darwin10.2"
|
|
|
|
|
|
|
|
define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -256 ; 0xFFFFFF00
|
|
|
|
%C = zext i8 %a1 to i32
|
|
|
|
%D = or i32 %C, %B
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
|
|
|
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test1:
|
|
|
|
; X64: movb %sil, (%rdi)
|
|
|
|
|
|
|
|
; X32: test1:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, (%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -65281 ; 0xFFFF00FF
|
|
|
|
%C = zext i8 %a1 to i32
|
|
|
|
%CS = shl i32 %C, 8
|
|
|
|
%D = or i32 %B, %CS
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test2:
|
|
|
|
; X64: movb %sil, 1(%rdi)
|
|
|
|
|
|
|
|
; X32: test2:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, 1(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, -65536 ; 0xFFFF0000
|
|
|
|
%C = zext i16 %a1 to i32
|
|
|
|
%D = or i32 %B, %C
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test3:
|
|
|
|
; X64: movw %si, (%rdi)
|
|
|
|
|
|
|
|
; X32: test3:
|
|
|
|
; X32: movw 8(%esp), %ax
|
|
|
|
; X32: movw %ax, (%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i32* %a0, align 4
|
|
|
|
%B = and i32 %A, 65535 ; 0x0000FFFF
|
|
|
|
%C = zext i16 %a1 to i32
|
|
|
|
%CS = shl i32 %C, 16
|
|
|
|
%D = or i32 %B, %CS
|
|
|
|
store i32 %D, i32* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test4:
|
|
|
|
; X64: movw %si, 2(%rdi)
|
|
|
|
|
|
|
|
; X32: test4:
|
2010-06-24 16:30:44 +02:00
|
|
|
; X32: movl 8(%esp), %eax
|
2010-04-15 07:40:59 +02:00
|
|
|
; X32: movw %ax, 2(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -4294901761 ; 0xFFFFFFFF0000FFFF
|
|
|
|
%C = zext i16 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 16
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test5:
|
|
|
|
; X64: movw %si, 2(%rdi)
|
|
|
|
|
|
|
|
; X32: test5:
|
2010-04-28 10:30:49 +02:00
|
|
|
; X32: movzwl 8(%esp), %eax
|
2010-04-15 07:40:59 +02:00
|
|
|
; X32: movw %ax, 2(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
|
|
|
|
%C = zext i8 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 40
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret void
|
2010-04-15 07:40:59 +02:00
|
|
|
; X64: test6:
|
|
|
|
; X64: movb %sil, 5(%rdi)
|
|
|
|
|
|
|
|
|
|
|
|
; X32: test6:
|
|
|
|
; X32: movb 8(%esp), %al
|
|
|
|
; X32: movb %al, 5(%{{.*}})
|
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is
safe. Daniel tells me that clang will start producing this sort
of thing with bitfields, and this does trigger a few dozen times
on 176.gcc produced by llvm-gcc even now.
This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll
into:
movl %eax, 36(%rdi)
instead of:
movl $4294967295, %eax ## imm = 0xFFFFFFFF
andq 32(%rdi), %rax
shlq $32, %rcx
addq %rax, %rcx
movq %rcx, 32(%rdi)
and each of the testcases into a single store. Each of them used
to compile into craziness like this:
_test4:
movl $65535, %eax ## imm = 0xFFFF
andl (%rdi), %eax
shll $16, %esi
addl %eax, %esi
movl %esi, (%rdi)
ret
llvm-svn: 101343
2010-04-15 06:48:01 +02:00
|
|
|
}
|
enhance the load/store narrowing optimization to handle a
tokenfactor in between the load/store. This allows us to
optimize test7 into:
_test7: ## @test7
## BB#0: ## %entry
movl (%rdx), %eax
## kill: SIL<def> ESI<kill>
movb %sil, 5(%rdi)
ret
instead of:
_test7: ## @test7
## BB#0: ## %entry
movl 4(%esp), %ecx
movl $-65281, %eax ## imm = 0xFFFFFFFFFFFF00FF
andl 4(%ecx), %eax
movzbl 8(%esp), %edx
shll $8, %edx
addl %eax, %edx
movl 12(%esp), %eax
movl (%eax), %eax
movl %edx, 4(%ecx)
ret
llvm-svn: 101355
2010-04-15 08:10:49 +02:00
|
|
|
|
|
|
|
define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
|
|
|
|
entry:
|
|
|
|
%OtherLoad = load i32 *%P2
|
|
|
|
%A = load i64* %a0, align 4
|
|
|
|
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
|
|
|
|
%C = zext i8 %a1 to i64
|
|
|
|
%CS = shl i64 %C, 40
|
|
|
|
%D = or i64 %B, %CS
|
|
|
|
store i64 %D, i64* %a0, align 4
|
|
|
|
ret i32 %OtherLoad
|
|
|
|
; X64: test7:
|
|
|
|
; X64: movb %sil, 5(%rdi)
|
|
|
|
|
|
|
|
|
|
|
|
; X32: test7:
|
|
|
|
; X32: movb 8(%esp), %cl
|
|
|
|
; X32: movb %cl, 5(%{{.*}})
|
|
|
|
}
|
|
|
|
|
2010-09-02 23:18:42 +02:00
|
|
|
; PR7833
|
|
|
|
|
|
|
|
@g_16 = internal global i32 -1
|
|
|
|
|
|
|
|
; X64: test8:
|
|
|
|
; X64-NEXT: movl _g_16(%rip), %eax
|
|
|
|
; X64-NEXT: movl $0, _g_16(%rip)
|
|
|
|
; X64-NEXT: orl $1, %eax
|
|
|
|
; X64-NEXT: movl %eax, _g_16(%rip)
|
|
|
|
; X64-NEXT: ret
|
|
|
|
define void @test8() nounwind {
|
|
|
|
%tmp = load i32* @g_16
|
|
|
|
store i32 0, i32* @g_16
|
|
|
|
%or = or i32 %tmp, 1
|
|
|
|
store i32 %or, i32* @g_16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; X64: test9:
|
|
|
|
; X64-NEXT: orb $1, _g_16(%rip)
|
|
|
|
; X64-NEXT: ret
|
|
|
|
define void @test9() nounwind {
|
|
|
|
%tmp = load i32* @g_16
|
|
|
|
%or = or i32 %tmp, 1
|
|
|
|
store i32 %or, i32* @g_16
|
|
|
|
ret void
|
|
|
|
}
|
2010-10-01 07:36:09 +02:00
|
|
|
|
|
|
|
; rdar://8494845 + PR8244
|
|
|
|
; X64: test10:
|
|
|
|
; X64-NEXT: movsbl (%rdi), %eax
|
|
|
|
; X64-NEXT: shrl $8, %eax
|
|
|
|
; X64-NEXT: ret
|
|
|
|
define i8 @test10(i8* %P) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%tmp = load i8* %P, align 1
|
|
|
|
%conv = sext i8 %tmp to i32
|
|
|
|
%shr3 = lshr i32 %conv, 8
|
|
|
|
%conv2 = trunc i32 %shr3 to i8
|
|
|
|
ret i8 %conv2
|
|
|
|
}
|