mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
eca5c8ffba
Unoptimized IR can have linear sequences of stores to an array, where the initial GEP for the first store is formed from the pointer to the array, and the GEP for each store after the first is formed from the previous GEP with some offset in an inductive fashion. The (large) resulting DAG when analyzed by DAGCombine undergoes an excessive number of combines as each store node is examined every time its' offset node is combined with any child of the offset. One of the transformations is findBetterNeighborChains which assists MergeConsecutiveStores. The former relies on repeated chain walking to do its' work, however MergeConsecutiveStores is disabled at O0 which makes the transformation redundant. Any optimization level other than O0 would invoke InstCombine which would resolve the chain of GEPs into flat base + offset GEP for each store which does not exhibit the repeated examination of each store to the array. Disabling this optimization fixes an excessive compile time issue (30~ minutes for the test case provided) at O0. Reviewers: niravd, craig.topper, t.p.northover Differential Revision: https://reviews.llvm.org/D40193 llvm-svn: 319142
83 lines
3.7 KiB
LLVM
83 lines
3.7 KiB
LLVM
; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
|
|
|
|
; rdar://12713765
|
|
; When realign-stack is set to false, make sure we are not creating stack
|
|
; objects that are assumed to be 64-byte aligned.
|
|
@T3_retval = common global <16 x float> zeroinitializer, align 16
|
|
|
|
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
|
|
entry:
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
|
|
; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
|
|
; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: add r[[R3:[0-9]+]], r[[R1]], #32
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: add r[[R3:[0-9]+]], r[[R1]], #48
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: mov r[[R2:[0-9]+]], sp
|
|
; CHECK: add r[[R3:[0-9]+]], r[[R2]], #48
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: add r[[R4:[0-9]+]], r[[R2]], #32
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
|
|
; CHECK: mov r[[R5:[0-9]+]], r[[R2]]
|
|
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]!
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: add r[[R1:[0-9]+]], r0, #48
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: add r[[R1:[0-9]+]], r0, #32
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
|
|
%retval = alloca <16 x float>, align 16
|
|
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
|
|
store <16 x float> %0, <16 x float>* %retval
|
|
%1 = load <16 x float>, <16 x float>* %retval
|
|
store <16 x float> %1, <16 x float>* %agg.result, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
|
|
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]!
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: mov r[[R1:[0-9]+]], sp
|
|
; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #16
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: mov r[[R3:[0-9]+]], #32
|
|
; CHECK: mov r[[R9:[0-9]+]], r[[R1]]
|
|
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128], r[[R3]]
|
|
; CHECK: mov r[[R3:[0-9]+]], r[[R9]]
|
|
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
|
|
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: add r[[R1:[0-9]+]], r0, #48
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: add r[[R1:[0-9]+]], r0, #32
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
|
|
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
|
|
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
|
|
|
|
|
|
%retval = alloca <16 x float>, align 16
|
|
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
|
|
store <16 x float> %0, <16 x float>* %retval
|
|
%1 = load <16 x float>, <16 x float>* %retval
|
|
store <16 x float> %1, <16 x float>* %agg.result, align 16
|
|
ret void
|
|
}
|