2009-09-20 21:03:47 +02:00
|
|
|
; RUN: opt < %s -gvn -S | FileCheck %s
|
|
|
|
|
|
|
|
; 32-bit little endian target.
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
|
|
|
|
|
|
|
;; Trivial RLE test.
|
|
|
|
define i32 @test0(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%A = load i32* %P
|
|
|
|
ret i32 %A
|
|
|
|
; CHECK: @test0
|
|
|
|
; CHECK: ret i32 %V
|
|
|
|
}
|
|
|
|
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Store -> Load and Load -> Load forwarding where src and dst are different
|
|
|
|
;; types, but where the base pointer is a must alias.
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
|
|
|
;; i32 -> f32 forwarding.
|
|
|
|
define float @coerce_mustalias1(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias1
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32* -> float forwarding.
|
|
|
|
define float @coerce_mustalias2(i32* %V, i32** %P) {
|
|
|
|
store i32* %V, i32** %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32** %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias2
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; float -> i32* forwarding.
|
|
|
|
define i32* @coerce_mustalias3(float %V, float* %P) {
|
|
|
|
store float %V, float* %P
|
|
|
|
|
|
|
|
%P2 = bitcast float* %P to i32**
|
|
|
|
|
|
|
|
%A = load i32** %P2
|
|
|
|
ret i32* %A
|
|
|
|
; CHECK: @coerce_mustalias3
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32*
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32 -> f32 load forwarding.
|
|
|
|
define float @coerce_mustalias4(i32* %P, i1 %cond) {
|
|
|
|
%A = load i32* %P
|
2009-09-20 22:09:34 +02:00
|
|
|
|
2009-09-20 21:03:47 +02:00
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
%B = load float* %P2
|
2009-09-20 22:09:34 +02:00
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
2009-09-20 21:03:47 +02:00
|
|
|
ret float %B
|
|
|
|
|
|
|
|
F:
|
|
|
|
%X = bitcast i32 %A to float
|
|
|
|
ret float %X
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias4
|
|
|
|
; CHECK: %A = load i32* %P
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
; CHECK: F:
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32 -> i8 forwarding
|
|
|
|
define i8 @coerce_mustalias5(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to i8*
|
|
|
|
|
|
|
|
%A = load i8* %P2
|
|
|
|
ret i8 %A
|
|
|
|
; CHECK: @coerce_mustalias5
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i64 -> float forwarding
|
|
|
|
define float @coerce_mustalias6(i64 %V, i64* %P) {
|
|
|
|
store i64 %V, i64* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i64* %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias6
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i64 -> i8* (32-bit) forwarding
|
|
|
|
define i8* @coerce_mustalias7(i64 %V, i64* %P) {
|
|
|
|
store i64 %V, i64* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i64* %P to i8**
|
|
|
|
|
|
|
|
%A = load i8** %P2
|
|
|
|
ret i8* %A
|
|
|
|
; CHECK: @coerce_mustalias7
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8*
|
|
|
|
}
|
|
|
|
|
2009-09-20 22:09:34 +02:00
|
|
|
;; non-local i32/float -> i8 load forwarding.
|
|
|
|
define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
store float 1.0, float* %P2
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias_nonlocal0
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
llvm-svn: 82439
2009-09-21 07:57:11 +02:00
|
|
|
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
|
|
|
|
;; bitcast equivalence can be properly phi translated.
|
|
|
|
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
store float 1.0, float* %P2
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
|
|
|
|
;; bootstrap, see r82411
|
|
|
|
;
|
|
|
|
; HECK: @coerce_mustalias_nonlocal1
|
|
|
|
; HECK: Cont:
|
|
|
|
; HECK: %A = phi i8 [
|
|
|
|
; HECK-NOT: load
|
|
|
|
; HECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-20 22:09:34 +02:00
|
|
|
;; non-local i32 -> i8 partial redundancy load forwarding.
|
|
|
|
define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias_pre0
|
|
|
|
; CHECK: F:
|
|
|
|
; CHECK: load i8* %P3
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
llvm-svn: 82439
2009-09-21 07:57:11 +02:00
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Store -> Load and Load -> Load forwarding where src and dst are different
|
|
|
|
;; types, and the reload is an offset from the store pointer.
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
|
|
|
;; i32 -> f32 forwarding.
|
2009-09-21 07:57:47 +02:00
|
|
|
;; PR4216
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
llvm-svn: 82439
2009-09-21 07:57:11 +02:00
|
|
|
define i8 @coerce_offset0(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to i8*
|
|
|
|
%P3 = getelementptr i8* %P2, i32 2
|
|
|
|
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
; CHECK: @coerce_offset0
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|