mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-23 21:13:02 +02:00
7b066daf55
This patch causes compile times for some patterns to explode. I have a (large, unreduced) test case that slows down by more than 20x and several test cases slow down by 2x. I'm sending some of the test cases directly to Nirav and following up with more details in the review log, but this should unblock anyone else hitting this. llvm-svn: 296862
58 lines
2.1 KiB
LLVM
58 lines
2.1 KiB
LLVM
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
|
|
|
; Make sure we don't break load/store ordering when turning an extractelement
|
|
; into loads, off the stack or a previous store.
|
|
; Be very explicit about the ordering/stack offsets.
|
|
|
|
; CHECK-LABEL: test_extractelement_legalization_storereuse:
|
|
; CHECK: # BB#0
|
|
; CHECK-NEXT: pushl %ebx
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl 16(%esp), %eax
|
|
; CHECK-NEXT: movl 24(%esp), %ecx
|
|
; CHECK-NEXT: movl 20(%esp), %edx
|
|
; CHECK-NEXT: paddd (%edx), %xmm0
|
|
; CHECK-NEXT: movdqa %xmm0, (%edx)
|
|
; CHECK-NEXT: movl (%edx), %esi
|
|
; CHECK-NEXT: movl 12(%edx), %edi
|
|
; CHECK-NEXT: movl 8(%edx), %ebx
|
|
; CHECK-NEXT: movl 4(%edx), %edx
|
|
; CHECK-NEXT: shll $4, %ecx
|
|
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
|
|
; CHECK-NEXT: movl %edx, (%eax,%ecx)
|
|
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
|
|
; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: popl %ebx
|
|
; CHECK-NEXT: retl
|
|
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
|
|
entry:
|
|
%0 = bitcast i32* %y to <4 x i32>*
|
|
%1 = load <4 x i32>, <4 x i32>* %0, align 16
|
|
%am = add <4 x i32> %a, %1
|
|
store <4 x i32> %am, <4 x i32>* %0, align 16
|
|
%ip0 = shl nsw i32 %i, 2
|
|
%ip1 = or i32 %ip0, 1
|
|
%ip2 = or i32 %ip0, 2
|
|
%ip3 = or i32 %ip0, 3
|
|
%vecext = extractelement <4 x i32> %am, i32 %ip0
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
|
|
store i32 %vecext, i32* %arrayidx, align 4
|
|
%vecext5 = extractelement <4 x i32> %am, i32 %ip1
|
|
%arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
|
|
store i32 %vecext5, i32* %arrayidx8, align 4
|
|
%vecext11 = extractelement <4 x i32> %am, i32 %ip2
|
|
%arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
|
|
store i32 %vecext11, i32* %arrayidx14, align 4
|
|
%vecext17 = extractelement <4 x i32> %am, i32 %ip3
|
|
%arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
|
|
store i32 %vecext17, i32* %arrayidx20, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|