1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00
llvm-mirror/test/CodeGen/ARM/alloc-no-stack-realign.ll
Simon Dardis eca5c8ffba [DAGCombine] Disable finding better chains for stores at O0
Unoptimized IR can have linear sequences of stores to an array, where the
initial GEP for the first store is formed from the pointer to the array, and the
GEP for each store after the first is formed from the previous GEP with some
offset in an inductive fashion.

The (large) resulting DAG when analyzed by DAGCombine undergoes an excessive
number of combines as each store node is examined every time its' offset node
is combined with any child of the offset. One of the transformations is
findBetterNeighborChains which assists MergeConsecutiveStores. The former
relies on repeated chain walking to do its' work, however MergeConsecutiveStores
is disabled at O0 which makes the transformation redundant.

Any optimization level other than O0 would invoke InstCombine which would
resolve the chain of GEPs into flat base + offset GEP for each store which
does not exhibit the repeated examination of each store to the array.

Disabling this optimization fixes an excessive compile time issue (30~ minutes
for the test case provided) at O0.

Reviewers: niravd, craig.topper, t.p.northover

Differential Revision: https://reviews.llvm.org/D40193

llvm-svn: 319142
2017-11-28 04:07:59 +00:00

83 lines
3.7 KiB
LLVM

; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
; rdar://12713765
; When realign-stack is set to false, make sure we are not creating stack
; objects that are assumed to be 64-byte aligned.
@T3_retval = common global <16 x float> zeroinitializer, align 16
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
; CHECK-LABEL: test1:
; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R3:[0-9]+]], r[[R1]], #32
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: add r[[R3:[0-9]+]], r[[R1]], #48
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: mov r[[R2:[0-9]+]], sp
; CHECK: add r[[R3:[0-9]+]], r[[R2]], #48
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: add r[[R4:[0-9]+]], r[[R2]], #32
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
; CHECK: mov r[[R5:[0-9]+]], r[[R2]]
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]!
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #48
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #32
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
%1 = load <16 x float>, <16 x float>* %retval
store <16 x float> %1, <16 x float>* %agg.result, align 16
ret void
}
define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
; CHECK-LABEL: test2:
; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]!
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #16
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: mov r[[R3:[0-9]+]], #32
; CHECK: mov r[[R9:[0-9]+]], r[[R1]]
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128], r[[R3]]
; CHECK: mov r[[R3:[0-9]+]], r[[R9]]
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #48
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: add r[[R1:[0-9]+]], r0, #32
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
%1 = load <16 x float>, <16 x float>* %retval
store <16 x float> %1, <16 x float>* %agg.result, align 16
ret void
}