1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[Tests] More unordered atomic lowering tests

This time, focused around narrowing and widening transformations.  Also, include a few simple memory optimization tests to highlight missed oppurtunities.  This is part of building up the test base for D57601.

llvm-svn: 353972
This commit is contained in:
Philip Reames 2019-02-13 19:49:17 +00:00
parent b784551971
commit 9b9fbf59c5

View File

@ -117,10 +117,12 @@ define void @store_i64(i64* %ptr, i64 %v) {
ret void
}
;; The next batch of tests are intended to show transforms which we
;; The tests in the rest of this file are intended to show transforms which we
;; either *can't* do for legality, or don't currently implement. The later
;; are noted carefully where relevant.
;; Start w/some clearly illegal ones.
; Must use a full width op, not a byte op
define void @narrow_writeback_or(i64* %ptr) {
; CHECK-O0-LABEL: narrow_writeback_or:
@ -181,6 +183,17 @@ define void @narrow_writeback_xor(i64* %ptr) {
ret void
}
;; Next batch of tests are exercising cases where store widening would
;; improve codegeneration. Note that widening is only legal if the
;; resulting type would be atomic. Each tests has a well aligned, and
;; unaligned variant to ensure we get correct codegen here.
;;
;; Note: It's not a legality issue, but there's a gotcha here to be aware
;; of. Once we widen a pair of atomic stores, we loose the information
;; that the original atomicity requirement was half the width. Given that,
;; we can't then split the load again. This challenges our usual iterative
;; approach to incremental improvement.
; Legal if wider type is also atomic (TODO)
define void @widen_store(i32* %p0, i32 %v1, i32 %v2) {
; CHECK-O0-LABEL: widen_store:
@ -200,6 +213,27 @@ define void @widen_store(i32* %p0, i32 %v1, i32 %v2) {
ret void
}
; This one is *NOT* legal to widen. With weaker alignment,
; the wider type might cross a cache line and violate the
; atomicity requirement.
define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) {
; CHECK-O0-LABEL: widen_store_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl %esi, (%rdi)
; CHECK-O0-NEXT: movl %edx, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_store_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movl %esi, (%rdi)
; CHECK-O3-NEXT: movl %edx, 4(%rdi)
; CHECK-O3-NEXT: retq
%p1 = getelementptr i32, i32* %p0, i64 1
store atomic i32 %v1, i32* %p0 unordered, align 4
store atomic i32 %v2, i32* %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
define void @widen_broadcast(i32* %p0, i32 %v) {
; CHECK-O0-LABEL: widen_broadcast:
@ -219,6 +253,25 @@ define void @widen_broadcast(i32* %p0, i32 %v) {
ret void
}
; Not legal to widen due to alignment restriction
define void @widen_broadcast_unaligned(i32* %p0, i32 %v) {
; CHECK-O0-LABEL: widen_broadcast_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl %esi, (%rdi)
; CHECK-O0-NEXT: movl %esi, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_broadcast_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movl %esi, (%rdi)
; CHECK-O3-NEXT: movl %esi, 4(%rdi)
; CHECK-O3-NEXT: retq
%p1 = getelementptr i32, i32* %p0, i64 1
store atomic i32 %v, i32* %p0 unordered, align 4
store atomic i32 %v, i32* %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
define void @vec_store(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-LABEL: vec_store:
@ -246,6 +299,34 @@ define void @vec_store(i32* %p0, <2 x i32> %vec) {
ret void
}
; Not legal to widen due to alignment restriction
define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-LABEL: vec_store_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movd %xmm0, %eax
; CHECK-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-O0-NEXT: movd %xmm0, %ecx
; CHECK-O0-NEXT: movl %eax, (%rdi)
; CHECK-O0-NEXT: movl %ecx, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: vec_store_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movd %xmm0, %eax
; CHECK-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-O3-NEXT: movd %xmm0, %ecx
; CHECK-O3-NEXT: movl %eax, (%rdi)
; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
%p1 = getelementptr i32, i32* %p0, i64 1
store atomic i32 %v1, i32* %p0 unordered, align 4
store atomic i32 %v2, i32* %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
; Also, can avoid register move from xmm to eax (TODO)
@ -270,6 +351,27 @@ define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) {
ret void
}
; Not legal to widen due to alignment restriction
define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-LABEL: widen_broadcast2_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movd %xmm0, %eax
; CHECK-O0-NEXT: movl %eax, (%rdi)
; CHECK-O0-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_broadcast2_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movd %xmm0, %eax
; CHECK-O3-NEXT: movl %eax, (%rdi)
; CHECK-O3-NEXT: movl %eax, 4(%rdi)
; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%p1 = getelementptr i32, i32* %p0, i64 1
store atomic i32 %v1, i32* %p0 unordered, align 4
store atomic i32 %v1, i32* %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {
@ -292,6 +394,30 @@ define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {
ret void
}
; Not legal to widen due to alignment restriction
define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) {
; CHECK-O0-LABEL: widen_zero_init_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl $0, (%rdi)
; CHECK-O0-NEXT: movl $0, 4(%rdi)
; CHECK-O0-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-O0-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_zero_init_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movl $0, (%rdi)
; CHECK-O3-NEXT: movl $0, 4(%rdi)
; CHECK-O3-NEXT: retq
%p1 = getelementptr i32, i32* %p0, i64 1
store atomic i32 0, i32* %p0 unordered, align 4
store atomic i32 0, i32* %p1 unordered, align 4
ret void
}
;; The next batch of tests are stressing load folding. Folding is legal
;; on x86, so these are simply checking optimization quality.
; Legal, as expected
define i64 @load_fold_add1(i64* %p) {
; CHECK-O0-LABEL: load_fold_add1:
@ -1196,6 +1322,12 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
ret i1 %ret
}
;; The next batch of tests check for read-modify-write patterns
;; Legally, it's okay to use a memory operand here as long as the operand
;; is well aligned (i.e. doesn't cross a cache line boundary). We are
;; required not to narrow the store though!
; Legal, as expected
define void @rmw_fold_add1(i64* %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_add1:
@ -1811,5 +1943,196 @@ define void @rmw_fold_xor2(i64* %p, i64 %v) {
ret void
}
;; The next batch test truncations, in combination w/operations which could
;; be folded against the memory operation.
; Legal to reduce the load width (TODO)
define i32 @fold_trunc(i64* %p) {
; CHECK-O0-LABEL: fold_trunc:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdi
; CHECK-O0-NEXT: movl %edi, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: fold_trunc:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = trunc i64 %v to i32
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
define i32 @fold_trunc_add(i64* %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_add:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdi
; CHECK-O0-NEXT: movl %edi, %eax
; CHECK-O0-NEXT: addl %esi, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: fold_trunc_add:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: addl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = add i32 %trunc, %v2
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
define i32 @fold_trunc_and(i64* %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_and:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdi
; CHECK-O0-NEXT: movl %edi, %eax
; CHECK-O0-NEXT: andl %esi, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: fold_trunc_and:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: andl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = and i32 %trunc, %v2
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
define i32 @fold_trunc_or(i64* %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_or:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdi
; CHECK-O0-NEXT: movl %edi, %eax
; CHECK-O0-NEXT: orl %esi, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: fold_trunc_or:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: orl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = or i32 %trunc, %v2
ret i32 %ret
}
; It's tempting to split the wide load into two smaller byte loads
; to reduce memory traffic, but this would be illegal for a atomic load
define i32 @split_load(i64* %p) {
; CHECK-O0-LABEL: split_load:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdi
; CHECK-O0-NEXT: movb %dil, %al
; CHECK-O0-NEXT: shrq $32, %rdi
; CHECK-O0-NEXT: movb %dil, %cl
; CHECK-O0-NEXT: orb %cl, %al
; CHECK-O0-NEXT: movzbl %al, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: split_load:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: orl %eax, %ecx
; CHECK-O3-NEXT: movzbl %cl, %eax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%b1 = trunc i64 %v to i8
%v.shift = lshr i64 %v, 32
%b2 = trunc i64 %v.shift to i8
%or = or i8 %b1, %b2
%ret = zext i8 %or to i32
ret i32 %ret
}
;; A collection of simple memory forwarding tests. Nothing particular
;; interesting semantic wise, just demonstrating obvious missed transforms.
; Legal to forward and fold (TODO)
define i64 @load_forwarding(i64* %p) {
; CHECK-O0-LABEL: load_forwarding:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: orq (%rdi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_forwarding:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: orq %rcx, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%v2 = load atomic i64, i64* %p unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
}
; Legal to forward (TODO)
define i64 @store_forward(i64* %p, i64 %v) {
; CHECK-O0-LABEL: store_forward:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq %rsi, (%rdi)
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_forward:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq %rsi, (%rdi)
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: retq
store atomic i64 %v, i64* %p unordered, align 8
%ret = load atomic i64, i64* %p unordered, align 8
ret i64 %ret
}
; Legal to kill (TODO)
define void @dead_writeback(i64* %p) {
; CHECK-O0-LABEL: dead_writeback:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: dead_writeback:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
store atomic i64 %v, i64* %p unordered, align 8
ret void
}
; Legal to kill (TODO)
define void @dead_store(i64* %p, i64 %v) {
; CHECK-O0-LABEL: dead_store:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq $0, (%rdi)
; CHECK-O0-NEXT: movq %rsi, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: dead_store:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq $0, (%rdi)
; CHECK-O3-NEXT: movq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
store atomic i64 0, i64* %p unordered, align 8
store atomic i64 %v, i64* %p unordered, align 8
ret void
}