1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 21:13:02 +02:00
llvm-mirror/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
Chandler Carruth 7b066daf55 [SDAG] Revert r296476 (and r296486, r296668, r296690).
This patch causes compile times for some patterns to explode. I have
a (large, unreduced) test case that slows down by more than 20x and
several test cases slow down by 2x. I'm sending some of the test cases
directly to Nirav and following up with more details in the review log,
but this should unblock anyone else hitting this.

llvm-svn: 296862
2017-03-03 10:02:25 +00:00

58 lines
2.1 KiB
LLVM

; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
; Make sure we don't break load/store ordering when turning an extractelement
; into loads, off the stack or a previous store.
; Be very explicit about the ordering/stack offsets.
; CHECK-LABEL: test_extractelement_legalization_storereuse:
; CHECK: # BB#0
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl 16(%esp), %eax
; CHECK-NEXT: movl 24(%esp), %ecx
; CHECK-NEXT: movl 20(%esp), %edx
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 12(%edx), %edi
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 4(%edx), %edx
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edx, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
entry:
%0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
%am = add <4 x i32> %a, %1
store <4 x i32> %am, <4 x i32>* %0, align 16
%ip0 = shl nsw i32 %i, 2
%ip1 = or i32 %ip0, 1
%ip2 = or i32 %ip0, 2
%ip3 = or i32 %ip0, 3
%vecext = extractelement <4 x i32> %am, i32 %ip0
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
store i32 %vecext, i32* %arrayidx, align 4
%vecext5 = extractelement <4 x i32> %am, i32 %ip1
%arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
store i32 %vecext5, i32* %arrayidx8, align 4
%vecext11 = extractelement <4 x i32> %am, i32 %ip2
%arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
store i32 %vecext11, i32* %arrayidx14, align 4
%vecext17 = extractelement <4 x i32> %am, i32 %ip3
%arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
store i32 %vecext17, i32* %arrayidx20, align 4
ret void
}
attributes #0 = { nounwind }